github-actions[bot] commited on
Commit
ee28bd3
·
0 Parent(s):

Deploy 1.15.12

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +2 -0
  2. Dockerfile +34 -0
  3. README.md +12 -0
  4. app/__init__.py +1 -0
  5. app/main.py +79 -0
  6. app/models/__init__.py +1 -0
  7. app/models/explain_schemas.py +185 -0
  8. app/models/ml_schemas.py +194 -0
  9. app/models/schemas.py +73 -0
  10. app/routers/__init__.py +1 -0
  11. app/routers/data_router.py +184 -0
  12. app/routers/explain_router.py +454 -0
  13. app/routers/ml_router.py +92 -0
  14. app/services/__init__.py +1 -0
  15. app/services/certificate_service.py +690 -0
  16. app/services/data_service.py +1272 -0
  17. app/services/ethics_service.py +500 -0
  18. app/services/explain_service.py +665 -0
  19. app/services/insight_service.py +607 -0
  20. app/services/ml_service.py +855 -0
  21. app/services/specialty_registry.py +559 -0
  22. app/utils/__init__.py +1 -0
  23. arena/__init__.py +0 -0
  24. arena/router.py +72 -0
  25. arena/schemas.py +64 -0
  26. arena/service.py +199 -0
  27. data_cache/cardiology_arrhythmia.csv +0 -0
  28. data_cache/cardiology_hf.csv +300 -0
  29. data_cache/depression_data.csv +0 -0
  30. data_cache/dermatology.csv +0 -0
  31. data_cache/endocrinology_diabetes.csv +768 -0
  32. data_cache/hepatology_liver.csv +583 -0
  33. data_cache/icu_sepsis.csv +0 -0
  34. data_cache/nephrology_ckd.csv +363 -0
  35. data_cache/neurology_parkinsons.csv +196 -0
  36. data_cache/obstetrics_fetal.csv +0 -0
  37. data_cache/oncology_cervical.csv +0 -0
  38. data_cache/ophthalmology.arff +0 -0
  39. data_cache/orthopaedics.arff +322 -0
  40. data_cache/pharmacy_readmission.csv +0 -0
  41. data_cache/pulmonology_copd.csv +102 -0
  42. data_cache/radiology_pneumonia.csv +0 -0
  43. data_cache/thyroid.csv +215 -0
  44. datasets/.gitkeep +0 -0
  45. main_hf.py +72 -0
  46. requirements.txt +19 -0
  47. static/.gitkeep +0 -0
  48. static/apple-touch-icon.png +0 -0
  49. static/assets/ArenaPage-C8SsT3v3.js +0 -0
  50. static/assets/ArenaPage-C8SsT3v3.js.map +0 -0
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *.pyc
2
+ __pycache__/
Dockerfile ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Stage 1 — install dependencies
2
+ FROM python:3.12-slim AS builder
3
+
4
+ WORKDIR /build
5
+
6
+ COPY requirements.txt .
7
+ RUN pip install --no-cache-dir --no-compile --target=/build/deps -r requirements.txt \
8
+ && find /build/deps -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true \
9
+ && find /build/deps -type d -name "tests" -exec rm -rf {} + 2>/dev/null || true
10
+
11
+ ## Stage 2 — slim runtime
12
+ FROM python:3.12-slim
13
+
14
+ # Native libs required by scikit-learn, xgboost, lightgbm, scipy, shap
15
+ RUN apt-get update && apt-get install -y --no-install-recommends \
16
+ libgomp1 \
17
+ libopenblas0 \
18
+ && apt-get clean \
19
+ && rm -rf /var/lib/apt/lists/*
20
+
21
+ WORKDIR /app
22
+
23
+ COPY --from=builder /build/deps /usr/local/lib/python3.12/site-packages
24
+
25
+ COPY app ./app
26
+ COPY datasets ./datasets
27
+ COPY data_cache ./data_cache
28
+ COPY arena ./arena
29
+ COPY static ./static
30
+ COPY main_hf.py .
31
+
32
+ EXPOSE 7860
33
+
34
+ CMD ["python", "-m", "uvicorn", "main_hf:app", "--host", "0.0.0.0", "--port", "7860"]
README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: HealthWithSevgi
3
+ emoji: 🏥
4
+ colorFrom: green
5
+ colorTo: blue
6
+ sdk: docker
7
+ pinned: false
8
+ ---
9
+
10
+ # HealthWithSevgi — ML Learning Tool for Healthcare Professionals
11
+
12
+ A 7-step ML visualization tool for healthcare professionals to explore clinical datasets, prepare data, train models, and interpret predictions.
app/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """HealthWithSevgi FastAPI backend package."""
app/main.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """HealthWithSevgi — FastAPI Backend Entry Point"""
2
+ from __future__ import annotations
3
+
4
+ import logging
5
+ from pathlib import Path
6
+
7
+ from dotenv import load_dotenv
8
+ load_dotenv(Path(__file__).resolve().parent.parent / ".env")
9
+
10
+ from fastapi import FastAPI
11
+ from fastapi.middleware.cors import CORSMiddleware
12
+
13
+ from app.services.certificate_service import CertificateService
14
+ from app.services.data_service import DataService
15
+ from app.services.ethics_service import EthicsService
16
+ from app.services.explain_service import ExplainService
17
+ from app.services.insight_service import InsightService
18
+ from app.services.ml_service import MLService
19
+
20
+ logging.basicConfig(level=logging.INFO, format="%(levelname)s | %(name)s | %(message)s")
21
+
22
+ app = FastAPI(
23
+ title="HealthWithSevgi API",
24
+ description="ML Visualization Tool for Healthcare — REST API",
25
+ version="1.3.1",
26
+ )
27
+
28
+ # CORS — allow frontend dev server
29
+ app.add_middleware(
30
+ CORSMiddleware,
31
+ allow_origins=["http://localhost:5173", "http://127.0.0.1:5173"],
32
+ allow_credentials=True,
33
+ allow_methods=["*"],
34
+ allow_headers=["*"],
35
+ )
36
+
37
+ # Singleton service instances
38
+ app.state.data_service = DataService()
39
+ app.state.ml_service = MLService()
40
+ app.state.explain_service = ExplainService()
41
+ app.state.ethics_service = EthicsService()
42
+ app.state.insight_service = InsightService()
43
+ app.state.certificate_service = CertificateService()
44
+
45
+ # Routers
46
+ from app.routers.data_router import router as data_router # noqa: E402
47
+ from app.routers.explain_router import router as explain_router # noqa: E402
48
+ from app.routers.ml_router import router as ml_router # noqa: E402
49
+
50
+ app.include_router(data_router)
51
+ app.include_router(ml_router)
52
+ app.include_router(explain_router)
53
+
54
+ # Model Arena extension
55
+ import sys
56
+ from pathlib import Path
57
+ _arena_path = str(Path(__file__).resolve().parent.parent.parent / "local" / "model-arena")
58
+ if _arena_path not in sys.path:
59
+ sys.path.insert(0, _arena_path)
60
+ from arena.router import router as arena_router # noqa: E402
61
+ from arena.service import ArenaService # noqa: E402
62
+
63
+ app.state.arena_service = ArenaService(app.state.ml_service)
64
+ app.include_router(arena_router)
65
+
66
+
67
+ @app.get("/")
68
+ async def root() -> dict:
69
+ """Health root — returns a short string so `docker-compose healthcheck` has a 200 target."""
70
+ return {"status": "ok", "project": "HealthWithSevgi", "version": "1.3.1"}
71
+
72
+
73
+ @app.get("/health")
74
+ async def health_check() -> dict:
75
+ """
76
+ Deep health probe — verifies the heavy native libs (sklearn, xgboost, lightgbm, shap,
77
+ scipy) import cleanly.
78
+ """
79
+ return {"status": "healthy"}
app/models/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Pydantic request/response schemas used by the routers."""
app/models/explain_schemas.py ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Pydantic schemas for explainability, ethics, and certificate endpoints."""
2
+ from __future__ import annotations
3
+
4
+ from typing import Literal
5
+
6
+ from pydantic import BaseModel, Field
7
+
8
+
9
+ class FeatureImportanceItem(BaseModel):
10
+ """One row of global SHAP importance — feature name + mean |SHAP value|."""
11
+ feature_name: str
12
+ clinical_name: str
13
+ importance: float
14
+ direction: Literal["positive", "negative", "neutral"]
15
+ clinical_note: str
16
+
17
+
18
+ class GlobalExplainabilityResponse(BaseModel):
19
+ """
20
+ Payload for `/api/explain/global-importance` — the ranked feature list with the method
21
+ used (tree or kernel SHAP) and a textual description for the UI.
22
+ """
23
+ model_id: str
24
+ method: str
25
+ feature_importances: list[FeatureImportanceItem]
26
+ top_feature_clinical_note: str
27
+ explained_variance_pct: float
28
+
29
+
30
+ class SHAPWaterfallPoint(BaseModel):
31
+ """
32
+ Single bar in the per-patient SHAP waterfall: which feature pushed the probability in
33
+ which direction and by how much.
34
+ """
35
+ feature_name: str
36
+ clinical_name: str
37
+ feature_value: float | str
38
+ shap_value: float
39
+ direction: Literal["increases_risk", "decreases_risk"]
40
+ plain_language: str
41
+
42
+
43
+ class SinglePatientExplainResponse(BaseModel):
44
+ """
45
+ Payload for `/api/explain/single-patient` — base value, final prediction, and the
46
+ ordered waterfall points.
47
+ """
48
+ model_id: str
49
+ patient_index: int
50
+ predicted_class: str
51
+ predicted_probability: float
52
+ base_value: float
53
+ waterfall: list[SHAPWaterfallPoint]
54
+ clinical_summary: str
55
+
56
+
57
+ class SubgroupMetrics(BaseModel):
58
+ """
59
+ Fairness metrics computed for one subgroup of a sensitive attribute (accuracy,
60
+ sensitivity, specificity, PPV, NPV, etc.).
61
+ """
62
+ group_name: str
63
+ group_label: str
64
+ sample_size: int
65
+ accuracy: float
66
+ sensitivity: float
67
+ specificity: float
68
+ precision: float
69
+ f1_score: float
70
+ status: Literal["acceptable", "review", "action_needed"]
71
+ status_reason: str = ""
72
+
73
+
74
+ class BiasWarning(BaseModel):
75
+ """
76
+ Machine-readable flag emitted when a subgroup metric falls outside the configured
77
+ tolerance relative to the overall cohort.
78
+ """
79
+ detected: bool
80
+ message: str
81
+ affected_group: str
82
+ metric: str
83
+ gap: float
84
+
85
+
86
+ class CaseStudy(BaseModel):
87
+ """
88
+ One narrative case study from the ethics LLM pass — a real-world regulatory/clinical
89
+ incident with a short lesson.
90
+ """
91
+ id: str
92
+ title: str
93
+ specialty: str
94
+ year: int
95
+ what_happened: str
96
+ impact: str
97
+ lesson: str
98
+ severity: Literal["failure", "near_miss", "prevention"]
99
+
100
+
101
+ class RepresentationWarning(BaseModel):
102
+ """Flags a demographic group whose training-data proportion differs
103
+ from the population norm by more than the configured threshold."""
104
+
105
+ group: str
106
+ attribute: str
107
+ dataset_pct: float
108
+ population_pct: float
109
+ gap_pp: float
110
+ message: str
111
+
112
+
113
+ class EthicsResponse(BaseModel):
114
+ """
115
+ Payload for `/api/explain/ethics` — overall metrics, subgroup breakdowns, warnings,
116
+ LLM narrative, and the EU AI Act checklist state.
117
+ """
118
+ model_id: str
119
+ subgroup_metrics: list[SubgroupMetrics]
120
+ bias_warnings: list[BiasWarning]
121
+ training_representation: dict
122
+ representation_warnings: list[RepresentationWarning] = Field(default_factory=list)
123
+ overall_sensitivity: float
124
+ eu_ai_act_items: list[dict]
125
+ case_studies: list[CaseStudy]
126
+ demographics_available: bool = True
127
+ demographics_note: str = ""
128
+
129
+
130
+ class WhatIfRequest(BaseModel):
131
+ """Request body for `/api/explain/what-if` — the patient vector plus the feature/value edits to probe."""
132
+ model_id: str
133
+ patient_index: int
134
+ feature_name: str
135
+ new_value: float
136
+
137
+
138
+ class WhatIfResponse(BaseModel):
139
+ """
140
+ Response for `/api/explain/what-if` — probability delta and the explanatory SHAP
141
+ waterfall after the edit.
142
+ """
143
+ feature_name: str
144
+ original_value: float
145
+ new_value: float
146
+ original_prob: float
147
+ new_prob: float
148
+ shift: float
149
+ direction: Literal["increased_risk", "decreased_risk", "no_change"]
150
+
151
+
152
+ class ChecklistUpdate(BaseModel):
153
+ """Toggle payload used to persist a single EU AI Act checklist item for the active session."""
154
+ model_id: str
155
+ item_id: str
156
+ checked: bool
157
+
158
+
159
+ class SamplePatient(BaseModel):
160
+ """
161
+ A single patient row picked from the trained dataset for use in Step 6 explainability
162
+ or Step 7 ethics demos.
163
+ """
164
+ index: int
165
+ risk_level: Literal["low", "medium", "high"]
166
+ probability: float
167
+ summary: str
168
+
169
+
170
+ class SamplePatientsResponse(BaseModel):
171
+ """Wraps a small list of `SamplePatient` rows used to seed the Step 6 "single patient" picker."""
172
+ model_id: str
173
+ patients: list[SamplePatient]
174
+
175
+
176
+ class CertificateRequest(BaseModel):
177
+ """
178
+ Request body for `/api/explain/certificate` — the session id plus user-selected
179
+ checklist items to embed in the EU AI Act PDF.
180
+ """
181
+ model_id: str
182
+ session_id: str
183
+ checklist_state: dict[str, bool] = Field(default_factory=dict)
184
+ clinician_name: str = "Healthcare Professional"
185
+ institution: str = "Healthcare Institution"
app/models/ml_schemas.py ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Pydantic schemas for ML training and evaluation endpoints."""
2
+ from __future__ import annotations
3
+
4
+ from enum import Enum
5
+ from typing import Any, Literal
6
+
7
+ from pydantic import BaseModel, Field, model_validator
8
+
9
+
10
+ class ModelType(str, Enum):
11
+ """Enum of the eight classifiers the backend can train."""
12
+ KNN = "knn"
13
+ SVM = "svm"
14
+ DECISION_TREE = "decision_tree"
15
+ RANDOM_FOREST = "random_forest"
16
+ LOGISTIC_REGRESSION = "logistic_regression"
17
+ NAIVE_BAYES = "naive_bayes"
18
+ XGBOOST = "xgboost"
19
+ LIGHTGBM = "lightgbm"
20
+
21
+
22
+ class KNNParams(BaseModel):
23
+ """Hyperparameters for K-Nearest-Neighbours (neighbour count, distance metric)."""
24
+ n_neighbors: int = Field(5, ge=1, le=25)
25
+ metric: Literal["euclidean", "manhattan"] = "euclidean"
26
+
27
+
28
+ class SVMParams(BaseModel):
29
+ """Hyperparameters for Support Vector Machine (kernel, C, gamma)."""
30
+ kernel: Literal["linear", "rbf", "poly", "sigmoid"] = "rbf"
31
+ C: float = Field(1.0, ge=0.01, le=100.0)
32
+
33
+
34
+ class DecisionTreeParams(BaseModel):
35
+ """Hyperparameters for a single Decision Tree (max depth, split criterion)."""
36
+ max_depth: int = Field(5, ge=1, le=20)
37
+ criterion: Literal["gini", "entropy"] = "gini"
38
+
39
+
40
+ class RandomForestParams(BaseModel):
41
+ """Hyperparameters for Random Forest ensemble (n_estimators, max depth)."""
42
+ n_estimators: int = Field(100, ge=10, le=500)
43
+ max_depth: int = Field(5, ge=1, le=20)
44
+
45
+
46
+ class LogisticRegressionParams(BaseModel):
47
+ """Hyperparameters for Logistic Regression (regularisation strength, penalty)."""
48
+ C: float = Field(1.0, ge=0.001, le=100.0)
49
+ max_iter: int = Field(200, ge=50, le=2000)
50
+
51
+
52
+ class NaiveBayesParams(BaseModel):
53
+ """Hyperparameters for Gaussian Naive Bayes (variance smoothing)."""
54
+ var_smoothing: float = Field(1e-9, ge=1e-12, le=1e-3)
55
+
56
+
57
+ class XGBoostParams(BaseModel):
58
+ """Hyperparameters for XGBoost (n_estimators, max depth, learning rate)."""
59
+ n_estimators: int = Field(100, ge=10, le=500)
60
+ max_depth: int = Field(5, ge=1, le=15)
61
+ learning_rate: float = Field(0.1, ge=0.01, le=0.5)
62
+
63
+
64
+ class LightGBMParams(BaseModel):
65
+ """Hyperparameters for LightGBM (n_estimators, num_leaves, learning rate)."""
66
+ n_estimators: int = Field(100, ge=10, le=500)
67
+ max_depth: int = Field(-1, ge=-1, le=15)
68
+ learning_rate: float = Field(0.1, ge=0.01, le=0.5)
69
+
70
+
71
+ PARAM_SCHEMAS: dict[str, type[BaseModel]] = {
72
+ "knn": KNNParams,
73
+ "svm": SVMParams,
74
+ "decision_tree": DecisionTreeParams,
75
+ "random_forest": RandomForestParams,
76
+ "logistic_regression": LogisticRegressionParams,
77
+ "naive_bayes": NaiveBayesParams,
78
+ "xgboost": XGBoostParams,
79
+ "lightgbm": LightGBMParams,
80
+ }
81
+
82
+
83
+ class TrainRequest(BaseModel):
84
+ """Request body for `/api/ml/train` — session id + model type + its hyperparameter bundle."""
85
+ session_id: str
86
+ model_type: ModelType
87
+ params: dict[str, Any] = Field(default_factory=dict)
88
+ tune: bool = False
89
+ use_feature_selection: bool = False
90
+
91
+ @model_validator(mode='after')
92
+ def validate_params(self) -> 'TrainRequest':
93
+ """Cross-field validator ensuring the `params` object matches the chosen `model_type`."""
94
+ schema = PARAM_SCHEMAS.get(self.model_type.value)
95
+ if schema and self.params:
96
+ try:
97
+ validated = schema(**self.params)
98
+ self.params = validated.model_dump()
99
+ except Exception:
100
+ pass # Allow through with raw params; build_model has its own defaults
101
+ return self
102
+
103
+
104
+ class ConfusionMatrixData(BaseModel):
105
+ """Confusion matrix counts plus labels, ready for the Step-5 chart."""
106
+ tn: int = 0
107
+ fp: int = 0
108
+ fn: int = 0
109
+ tp: int = 0
110
+ matrix: list[list[int]]
111
+ labels: list[str]
112
+
113
+
114
+ class ROCPoint(BaseModel):
115
+ """One threshold sample of the ROC curve (FPR, TPR, threshold)."""
116
+ fpr: float
117
+ tpr: float
118
+ threshold: float
119
+
120
+
121
+ class MetricsResponse(BaseModel):
122
+ """
123
+ Bundle of evaluation metrics returned after a training run (accuracy, precision,
124
+ recall, F1, AUC, confusion matrix, ROC/PR points).
125
+ """
126
+ accuracy: float
127
+ sensitivity: float
128
+ specificity: float
129
+ precision: float
130
+ f1_score: float
131
+ auc_roc: float
132
+ confusion_matrix: ConfusionMatrixData
133
+ roc_curve: list[ROCPoint]
134
+ pr_curve: list[dict[str, float]]
135
+ train_accuracy: float
136
+ cross_val_scores: list[float]
137
+ low_sensitivity_warning: bool
138
+ mcc: float = 0.0
139
+ overfitting_warning: bool = False
140
+ optimal_threshold: float = 0.5
141
+
142
+
143
+ class ScatterPoint(BaseModel):
144
+ """Single 2-D point used by the KNN scatter visualisation in Step 4."""
145
+ x: float
146
+ y: float
147
+ label: int
148
+ label_name: str
149
+ split: str # "train" or "test"
150
+ predicted: int | None = None # only for test points
151
+
152
+
153
+ class DecisionMesh(BaseModel):
154
+ """Grid of predictions used to shade the KNN decision boundary in Step 4."""
155
+ x_values: list[float] # unique x coordinates of the grid
156
+ y_values: list[float] # unique y coordinates of the grid
157
+ predictions: list[list[int]] # 2D array [y][x] of predicted class indices
158
+
159
+
160
+ class KNNScatterData(BaseModel):
161
+ """Bundle of scatter points + decision mesh shipped to the KNN visualisation."""
162
+ scatter_points: list[ScatterPoint]
163
+ decision_mesh: DecisionMesh
164
+ pca_explained_variance: list[float]
165
+ classes: list[str]
166
+ k: int
167
+ metric: str
168
+
169
+
170
+ class TrainResponse(BaseModel):
171
+ """Complete payload returned by `/api/ml/train` — session id, model id, metrics, ROC/PR, scatter data."""
172
+ model_id: str
173
+ session_id: str
174
+ model_type: ModelType
175
+ params: dict[str, Any]
176
+ metrics: MetricsResponse
177
+ training_time_ms: float
178
+ feature_names: list[str]
179
+ knn_scatter: KNNScatterData | None = None
180
+
181
+
182
+ class CompareEntry(BaseModel):
183
+ """A single model entry in the cross-model comparison list (Step 4 "Add to comparison")."""
184
+ model_id: str
185
+ model_type: ModelType
186
+ params: dict[str, Any]
187
+ metrics: MetricsResponse
188
+ training_time_ms: float
189
+
190
+
191
+ class CompareResponse(BaseModel):
192
+ """Response for `/api/ml/comparison` — the current list of compared models for the session."""
193
+ entries: list[CompareEntry]
194
+ best_model_id: str
app/models/schemas.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Pydantic schemas for data exploration and preparation endpoints."""
2
+ from __future__ import annotations
3
+
4
+ from typing import Any, Literal
5
+
6
+ from pydantic import BaseModel, Field
7
+
8
+
9
+ class SpecialtyInfo(BaseModel):
10
+ """Descriptor for one of the 20 medical specialties — id, name, category, blurb, dataset pointers."""
11
+ id: str
12
+ name: str
13
+ description: str
14
+ target_variable: str
15
+ target_type: Literal["binary", "multiclass"]
16
+ feature_names: list[str]
17
+ clinical_context: str
18
+ data_source: str
19
+ what_ai_predicts: str
20
+ license_type: str = ""
21
+ license_url: str = ""
22
+ requires_attribution: bool = False
23
+
24
+
25
+ class ColumnStat(BaseModel):
26
+ """
27
+ Per-column summary computed during exploration (dtype, missing %, min/max/mean for
28
+ numeric, top categories for categorical).
29
+ """
30
+ name: str
31
+ dtype: str
32
+ missing_count: int
33
+ missing_pct: float
34
+ unique_count: int
35
+ sample_values: list[Any]
36
+
37
+
38
+ class DataExplorationResponse(BaseModel):
39
+ """
40
+ Response for `/api/data/explore` — column stats, row count, warnings, and the detected
41
+ target column.
42
+ """
43
+ columns: list[ColumnStat]
44
+ row_count: int
45
+ class_distribution: dict[str, int]
46
+ imbalance_warning: bool
47
+ imbalance_ratio: float
48
+ target_col: str
49
+
50
+
51
+ class PrepSettings(BaseModel):
52
+ """
53
+ Step-3 preparation settings (test split, normalisation, missing-value handling, SMOTE
54
+ flag, outlier treatment).
55
+ """
56
+ test_size: float = Field(0.2, ge=0.1, le=0.4)
57
+ missing_strategy: Literal["median", "mode", "drop"] = "median"
58
+ normalization: Literal["zscore", "minmax", "none"] = "zscore"
59
+ use_smote: bool = False
60
+ outlier_handling: Literal["none", "iqr", "zscore_clip"] = "none"
61
+
62
+
63
+ class PrepResponse(BaseModel):
64
+ """Response for `/api/data/prepare` — session id, train/test shapes, and any applied transformations."""
65
+ session_id: str
66
+ train_size: int
67
+ test_size: int
68
+ features_count: int
69
+ class_distribution_before: dict[str, int]
70
+ class_distribution_after: dict[str, int]
71
+ smote_applied: bool
72
+ normalization_applied: str
73
+ norm_samples: list[dict[str, object]] = Field(default_factory=list) # [{feature, before, after}, ...]
app/routers/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """FastAPI routers split by wizard concern (data, ml, explain)."""
app/routers/data_router.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Data exploration and preparation REST endpoints."""
2
+ from __future__ import annotations
3
+
4
+ import io
5
+ import logging
6
+ import uuid
7
+
8
+ import pandas as pd
9
+ from fastapi import APIRouter, File, Form, HTTPException, Request, UploadFile, status
10
+ from fastapi.responses import JSONResponse
11
+
12
+ from app.models.schemas import (
13
+ DataExplorationResponse,
14
+ PrepResponse,
15
+ PrepSettings,
16
+ SpecialtyInfo,
17
+ )
18
+ from app.services.data_service import DatasetUnavailableError
19
+ from app.services.specialty_registry import get_specialty, list_specialties
20
+
21
+ logger = logging.getLogger(__name__)
22
+ router = APIRouter(prefix="/api", tags=["data"])
23
+
24
+ _MAX_UPLOAD_BYTES = 50 * 1024 * 1024 # 50 MB
25
+
26
+
27
+ def _get_data_service(request: Request):
28
+ """FastAPI dependency — resolves the shared `DataService` off `app.state`."""
29
+ return request.app.state.data_service
30
+
31
+
32
+ def _get_ml_service(request: Request):
33
+ """FastAPI dependency — resolves the shared `MLService` off `app.state`."""
34
+ return request.app.state.ml_service
35
+
36
+
37
+ def _load_df(file: UploadFile | None, specialty_id: str, data_service) -> pd.DataFrame:
38
+ """
39
+ Helper that loads a pandas DataFrame either from an uploaded CSV or from the
40
+ specialty's bundled dataset.
41
+ """
42
+ if file is not None and file.filename:
43
+ # Bug #6: Validate file extension
44
+ if not file.filename.lower().endswith(".csv"):
45
+ raise HTTPException(
46
+ status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
47
+ detail=f"Only .csv files are accepted (got: {file.filename})",
48
+ )
49
+ content = file.file.read()
50
+ # Enforce 50 MB limit
51
+ if len(content) > _MAX_UPLOAD_BYTES:
52
+ raise HTTPException(
53
+ status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
54
+ detail=f"File exceeds 50 MB limit (uploaded: {len(content) // (1024 * 1024)} MB)",
55
+ )
56
+ try:
57
+ df = pd.read_csv(io.BytesIO(content))
58
+ except Exception as exc:
59
+ raise HTTPException(
60
+ status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
61
+ detail=f"Could not parse CSV file: {exc}",
62
+ )
63
+ # Bug #7: Minimum dataset size validation
64
+ if len(df) < 10:
65
+ raise HTTPException(
66
+ status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
67
+ detail=f"Dataset must have at least 10 rows (got {len(df)})",
68
+ )
69
+ if len(df.columns) < 2:
70
+ raise HTTPException(
71
+ status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
72
+ detail=f"Dataset must have at least 2 columns (got {len(df.columns)})",
73
+ )
74
+ return df
75
+ try:
76
+ return data_service.get_example_dataset(specialty_id)
77
+ except DatasetUnavailableError as exc:
78
+ raise HTTPException(
79
+ status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
80
+ detail=str(exc),
81
+ ) from exc
82
+
83
+
84
+ # ------------------------------------------------------------------
85
+ # Specialties
86
+ # ------------------------------------------------------------------
87
+
88
+ @router.get("/specialties", response_model=list[SpecialtyInfo])
89
+ def get_specialties() -> list[SpecialtyInfo]:
90
+ """List endpoint — returns the 20-entry specialty registry used by the Step 1 picker."""
91
+ return list_specialties()
92
+
93
+
94
+ @router.get("/specialties/{specialty_id}", response_model=SpecialtyInfo)
95
+ def get_specialty_by_id(specialty_id: str) -> SpecialtyInfo:
96
+ """Retrieve a single specialty by id; 404 if unknown."""
97
+ spec = get_specialty(specialty_id)
98
+ if spec is None:
99
+ raise HTTPException(status_code=404, detail=f"Specialty '{specialty_id}' not found")
100
+ return spec
101
+
102
+
103
+ # ------------------------------------------------------------------
104
+ # Exploration
105
+ # ------------------------------------------------------------------
106
+
107
+ @router.post("/explore", response_model=DataExplorationResponse)
108
+ def explore_data(
109
+ request: Request,
110
+ specialty_id: str = Form(...),
111
+ target_col: str = Form(...),
112
+ file: UploadFile | None = File(None),
113
+ ) -> DataExplorationResponse:
114
+ """Step-2 exploration endpoint — returns per-column stats for the active dataset."""
115
+ ds = _get_data_service(request)
116
+ df = _load_df(file, specialty_id, ds)
117
+
118
+ if target_col not in df.columns:
119
+ # Try to find target from specialty registry
120
+ spec = get_specialty(specialty_id)
121
+ if spec and spec.target_variable in df.columns:
122
+ target_col = spec.target_variable
123
+ else:
124
+ raise HTTPException(
125
+ status_code=422,
126
+ detail=f"Target column '{target_col}' not found. Available: {list(df.columns)}",
127
+ )
128
+
129
+ return ds.explore_dataframe(df, target_col)
130
+
131
+
132
+ # ------------------------------------------------------------------
133
+ # Preparation
134
+ # ------------------------------------------------------------------
135
+
136
+ @router.post("/prepare", response_model=PrepResponse)
137
+ def prepare_data(
138
+ request: Request,
139
+ specialty_id: str = Form(...),
140
+ target_col: str = Form(...),
141
+ test_size: float = Form(0.2),
142
+ missing_strategy: str = Form("median"),
143
+ normalization: str = Form("zscore"),
144
+ use_smote: bool = Form(False),
145
+ outlier_handling: str = Form("none"),
146
+ session_id: str = Form(None),
147
+ file: UploadFile | None = File(None),
148
+ ) -> PrepResponse:
149
+ """Step-3 preparation endpoint — splits, normalises, imputes missing values, optionally applies SMOTE."""
150
+ ds = _get_data_service(request)
151
+ ml_service = _get_ml_service(request)
152
+ df = _load_df(file, specialty_id, ds)
153
+
154
+ if target_col not in df.columns:
155
+ spec = get_specialty(specialty_id)
156
+ if spec and spec.target_variable in df.columns:
157
+ target_col = spec.target_variable
158
+ else:
159
+ raise HTTPException(status_code=422, detail=f"Target column '{target_col}' not found")
160
+
161
+ new_session_id = session_id or str(uuid.uuid4())
162
+
163
+ try:
164
+ settings = PrepSettings(
165
+ test_size=test_size,
166
+ missing_strategy=missing_strategy, # type: ignore[arg-type]
167
+ normalization=normalization, # type: ignore[arg-type]
168
+ use_smote=use_smote,
169
+ outlier_handling=outlier_handling, # type: ignore[arg-type]
170
+ )
171
+ X_train, X_test, y_train, y_test, response, feature_names = ds.prepare_data(
172
+ df, target_col, settings, new_session_id
173
+ )
174
+ except Exception as exc:
175
+ logger.exception("Data preparation failed")
176
+ raise HTTPException(status_code=422, detail=str(exc))
177
+
178
+ # Share prepared data with ML service, including specialty_id for certificate generation
179
+ session_data = ds.get_session(new_session_id)
180
+ if session_data:
181
+ session_data["specialty_id"] = specialty_id # Fix: store for certificate generation
182
+ ml_service.store_session_data(new_session_id, session_data)
183
+
184
+ return response
app/routers/explain_router.py ADDED
@@ -0,0 +1,454 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Explainability, ethics, and certificate REST endpoints."""
2
+ from __future__ import annotations
3
+
4
+ import logging
5
+
6
+ from fastapi import APIRouter, HTTPException, Request
7
+ from fastapi.responses import StreamingResponse
8
+
9
+ from app.models.explain_schemas import (
10
+ CertificateRequest,
11
+ ChecklistUpdate,
12
+ EthicsResponse,
13
+ GlobalExplainabilityResponse,
14
+ SamplePatientsResponse,
15
+ SinglePatientExplainResponse,
16
+ WhatIfRequest,
17
+ WhatIfResponse,
18
+ )
19
+
20
+ logger = logging.getLogger(__name__)
21
+ router = APIRouter(prefix="/api", tags=["explain"])
22
+
23
+
24
+ def _get_services(request: Request):
25
+ """FastAPI dependency — resolves data/ml/explain/ethics/insight/certificate services as a tuple."""
26
+ return (
27
+ request.app.state.ml_service,
28
+ request.app.state.explain_service,
29
+ request.app.state.ethics_service,
30
+ request.app.state.certificate_service,
31
+ request.app.state.insight_service,
32
+ )
33
+
34
+
35
+ def _get_model_data(ml_service, model_id: str) -> dict:
36
+ """Helper that pulls the trained model + split data for a session, raising 404 if absent."""
37
+ data = ml_service.get_model(model_id)
38
+ if data is None:
39
+ raise HTTPException(status_code=404, detail=f"Model '{model_id}' not found. Train a model first.")
40
+ return data
41
+
42
+
43
+ @router.get("/explain/global/{model_id}", response_model=GlobalExplainabilityResponse)
44
+ def global_importance(request: Request, model_id: str) -> GlobalExplainabilityResponse:
45
+ """Step-6 endpoint — computes global SHAP feature importance for the active model."""
46
+ ml, explain, *_ = _get_services(request)
47
+ data = _get_model_data(ml, model_id)
48
+ try:
49
+ return explain.global_importance(
50
+ model_id=model_id,
51
+ model=data["model"],
52
+ X_test=data["X_test"],
53
+ y_test=data["y_test"],
54
+ feature_names=data["feature_names"],
55
+ X_train=data["X_train"],
56
+ model_type=str(data["model_type"]),
57
+ classes=data["classes"],
58
+ )
59
+ except Exception as exc:
60
+ logger.exception("Global explainability failed")
61
+ raise HTTPException(status_code=500, detail=str(exc))
62
+
63
+
64
+ @router.get("/explain/patient/{model_id}/{patient_index}", response_model=SinglePatientExplainResponse)
65
+ def single_patient_explain(
66
+ request: Request, model_id: str, patient_index: int
67
+ ) -> SinglePatientExplainResponse:
68
+ """Step-6 endpoint — returns a per-patient SHAP waterfall plus base/final probability."""
69
+ ml, explain, *_ = _get_services(request)
70
+ data = _get_model_data(ml, model_id)
71
+ n_test = len(data["X_test"])
72
+ if patient_index < 0 or patient_index >= n_test:
73
+ raise HTTPException(status_code=422, detail=f"Patient index {patient_index} out of range [0, {n_test-1}]")
74
+ try:
75
+ return explain.single_patient(
76
+ model_id=model_id,
77
+ model=data["model"],
78
+ patient_idx=patient_index,
79
+ X_test=data["X_test"],
80
+ feature_names=data["feature_names"],
81
+ X_train=data["X_train"],
82
+ model_type=str(data["model_type"]),
83
+ classes=data["classes"],
84
+ y_test=data["y_test"],
85
+ scaler=data.get("scaler"),
86
+ )
87
+ except Exception as exc:
88
+ logger.exception("Single-patient explanation failed")
89
+ raise HTTPException(status_code=500, detail=str(exc))
90
+
91
+
92
+ @router.post("/explain/what-if", response_model=WhatIfResponse)
93
+ def what_if(request: Request, body: WhatIfRequest) -> WhatIfResponse:
94
+ """Step-6 endpoint — probes probability changes when specific feature values are altered."""
95
+ ml, explain, *_ = _get_services(request)
96
+ data = _get_model_data(ml, body.model_id)
97
+
98
+ n_test = len(data["X_test"])
99
+ if body.patient_index < 0 or body.patient_index >= n_test:
100
+ raise HTTPException(
101
+ status_code=400,
102
+ detail=f"Patient index {body.patient_index} out of range [0, {n_test - 1}]",
103
+ )
104
+ if body.feature_name not in data["feature_names"]:
105
+ raise HTTPException(
106
+ status_code=400,
107
+ detail=f"Feature '{body.feature_name}' not found. Available: {data['feature_names']}",
108
+ )
109
+
110
+ try:
111
+ return explain.what_if(
112
+ model_id=body.model_id,
113
+ model=data["model"],
114
+ patient_index=body.patient_index,
115
+ feature_name=body.feature_name,
116
+ new_value=body.new_value,
117
+ X_test=data["X_test"],
118
+ feature_names=data["feature_names"],
119
+ scaler=data.get("scaler"),
120
+ )
121
+ except Exception as exc:
122
+ logger.exception("What-if analysis failed")
123
+ raise HTTPException(status_code=500, detail=str(exc))
124
+
125
+
126
+ @router.get("/explain/sample-patients/{model_id}", response_model=SamplePatientsResponse)
127
+ def sample_patients(request: Request, model_id: str) -> SamplePatientsResponse:
128
+ """Step-6 helper — returns a handful of sample rows from the test split for quick picking."""
129
+ ml, explain, *_ = _get_services(request)
130
+ data = _get_model_data(ml, model_id)
131
+ try:
132
+ return explain.sample_patients(
133
+ model_id=model_id,
134
+ model=data["model"],
135
+ X_test=data["X_test"],
136
+ )
137
+ except Exception as exc:
138
+ logger.exception("Sample patients retrieval failed")
139
+ raise HTTPException(status_code=500, detail=str(exc))
140
+
141
+
142
+ @router.get("/ethics/{model_id}", response_model=EthicsResponse)
143
+ def get_ethics(request: Request, model_id: str) -> EthicsResponse:
144
+ """Step-7 endpoint — runs the bias audit and produces fairness deltas + warnings."""
145
+ ml, _, ethics, _, _ = _get_services(request)
146
+ data = _get_model_data(ml, model_id)
147
+ try:
148
+ return ethics.analyze_bias(
149
+ model_id=model_id,
150
+ model=data["model"],
151
+ X_test=data["X_test"],
152
+ y_test=data["y_test"],
153
+ feature_names=data["feature_names"],
154
+ classes=data["classes"],
155
+ X_train=data["X_train"],
156
+ scaler=data.get("scaler"),
157
+ )
158
+ except Exception as exc:
159
+ logger.exception("Ethics analysis failed")
160
+ raise HTTPException(status_code=500, detail=str(exc))
161
+
162
+
163
+ @router.post("/ethics/checklist")
164
+ def update_checklist(request: Request, body: ChecklistUpdate) -> dict:
165
+ """Step-7 endpoint — toggles a single EU AI Act checklist item for the session."""
166
+ _, _, ethics, _, _ = _get_services(request)
167
+ return ethics.update_checklist(body.model_id, body.item_id, body.checked)
168
+
169
+
170
+ @router.get("/insights/{model_id}")
171
+ async def get_insights(request: Request, model_id: str) -> dict:
172
+ """Generate LLM-powered clinical insights for a trained model."""
173
+ import asyncio
174
+ import numpy as np
175
+
176
+ ml, explain, ethics, _, insight_svc = _get_services(request)
177
+ data = _get_model_data(ml, model_id)
178
+
179
+ metrics = data.get("metrics")
180
+ if metrics is None:
181
+ raise HTTPException(status_code=422, detail="Model metrics not available.")
182
+
183
+ # --- Gather all data sources ---
184
+ ethics_data = ethics.analyze_bias(
185
+ model_id=model_id,
186
+ model=data["model"],
187
+ X_test=data["X_test"],
188
+ y_test=data["y_test"],
189
+ feature_names=data["feature_names"],
190
+ classes=data["classes"],
191
+ X_train=data["X_train"],
192
+ scaler=data.get("scaler"),
193
+ )
194
+
195
+ # SHAP / Feature importance (non-blocking, best-effort)
196
+ shap_data = None
197
+ try:
198
+ shap_data = explain.global_importance(
199
+ model_id=model_id,
200
+ model=data["model"],
201
+ X_test=data["X_test"],
202
+ y_test=data["y_test"],
203
+ feature_names=data["feature_names"],
204
+ X_train=data["X_train"],
205
+ model_type=str(data["model_type"]),
206
+ classes=data["classes"],
207
+ )
208
+ except Exception as exc:
209
+ logger.warning("SHAP for insights failed: %s", exc)
210
+
211
+ # Specialty metadata
212
+ session_id = data.get("session_id", "")
213
+ ml_session = ml.get_session(session_id)
214
+ specialty_info = None
215
+ if ml_session:
216
+ from app.services.specialty_registry import SPECIALTIES
217
+ specialty_info = SPECIALTIES.get(ml_session.get("specialty_id", ""))
218
+
219
+ def _m(attr: str):
220
+ """Inner helper used by `get_insights` to memoise the LLM call per task."""
221
+ return getattr(metrics, attr, None) if hasattr(metrics, attr) else metrics.get(attr)
222
+
223
+ # Confusion matrix
224
+ cm_summary = {}
225
+ cm_data = _m("confusion_matrix")
226
+ if cm_data and hasattr(cm_data, "matrix"):
227
+ matrix = cm_data.matrix
228
+ if len(matrix) == 2:
229
+ cm_summary = {"TN": matrix[0][0], "FP": matrix[0][1], "FN": matrix[1][0], "TP": matrix[1][1]}
230
+ else:
231
+ cm_summary = {"matrix_size": f"{len(matrix)}x{len(matrix)}", "classes": data["classes"]}
232
+
233
+ # Class distribution
234
+ class_dist = {}
235
+ if ml_session:
236
+ y_train = ml_session.get("y_train")
237
+ if y_train is not None:
238
+ unique, counts = np.unique(y_train, return_counts=True)
239
+ classes_list = data["classes"]
240
+ class_dist = {
241
+ classes_list[int(u)] if int(u) < len(classes_list) else str(u): int(c)
242
+ for u, c in zip(unique, counts)
243
+ }
244
+
245
+ # Feature importance from SHAP
246
+ feature_importance_data = []
247
+ if shap_data:
248
+ for fi in shap_data.feature_importances[:10]: # top 10
249
+ feature_importance_data.append({
250
+ "feature": fi.feature_name,
251
+ "clinical_name": fi.clinical_name,
252
+ "importance": round(fi.importance, 4),
253
+ "direction": fi.direction,
254
+ "clinical_note": fi.clinical_note,
255
+ })
256
+
257
+ cv_scores = _m("cross_val_scores") or []
258
+
259
+ context = {
260
+ # Specialty & clinical domain
261
+ "specialty_name": specialty_info.name if specialty_info else "Unknown",
262
+ "what_ai_predicts": specialty_info.what_ai_predicts if specialty_info else "clinical outcome",
263
+ "clinical_context": specialty_info.clinical_context if specialty_info else "",
264
+ "target_variable": specialty_info.target_variable if specialty_info else "target",
265
+ "data_source": specialty_info.data_source if specialty_info else "unknown",
266
+ # Model info
267
+ "model_type": data["model_type"].value.replace("_", " ").title() if hasattr(data.get("model_type"), "value") else str(data.get("model_type", "unknown")),
268
+ "model_params": data.get("params", {}),
269
+ "training_time_ms": data.get("training_time_ms"),
270
+ # Dataset info
271
+ "feature_names": data["feature_names"],
272
+ "classes": data["classes"],
273
+ "train_size": len(data["X_train"]),
274
+ "test_size": len(data["X_test"]),
275
+ "class_distribution_train": class_dist,
276
+ "use_smote": ml_session.get("smote_applied", False) if ml_session else False,
277
+ "normalization": ml_session.get("normalization", "N/A") if ml_session else "N/A",
278
+ "raw_column_meta": ml_session.get("raw_column_meta", []) if ml_session else [],
279
+ "row_count_original": ml_session.get("row_count", 0) if ml_session else 0,
280
+ # Performance metrics
281
+ "accuracy": _m("accuracy"),
282
+ "sensitivity": _m("sensitivity"),
283
+ "specificity": _m("specificity"),
284
+ "precision": _m("precision"),
285
+ "f1_score": _m("f1_score"),
286
+ "auc_roc": _m("auc_roc"),
287
+ "mcc": _m("mcc"),
288
+ "train_accuracy": _m("train_accuracy"),
289
+ "cv_scores": cv_scores,
290
+ "cv_mean": float(sum(cv_scores) / max(len(cv_scores), 1)),
291
+ "cv_std": float(np.std(cv_scores)) if cv_scores else 0.0,
292
+ "overfitting_warning": _m("overfitting_warning"),
293
+ "optimal_threshold": _m("optimal_threshold"),
294
+ "low_sensitivity_warning": _m("low_sensitivity_warning"),
295
+ "confusion_matrix": cm_summary,
296
+ # Explainability / SHAP
297
+ "shap_method": shap_data.method if shap_data else "unavailable",
298
+ "feature_importances": feature_importance_data,
299
+ "top_feature_clinical_note": shap_data.top_feature_clinical_note if shap_data else "",
300
+ "explained_variance_top5_pct": shap_data.explained_variance_pct if shap_data else 0,
301
+ # Fairness data
302
+ "overall_sensitivity": ethics_data.overall_sensitivity,
303
+ "bias_warnings": [
304
+ {"group": w.affected_group, "metric": w.metric, "gap": w.gap}
305
+ for w in ethics_data.bias_warnings
306
+ ],
307
+ "subgroup_details": [
308
+ {
309
+ "group": sm.group_label,
310
+ "sensitivity": sm.sensitivity,
311
+ "accuracy": sm.accuracy,
312
+ "specificity": sm.specificity,
313
+ "precision": sm.precision,
314
+ "f1_score": sm.f1_score,
315
+ "sample_size": sm.sample_size,
316
+ "status": sm.status,
317
+ "status_reason": sm.status_reason,
318
+ }
319
+ for sm in ethics_data.subgroup_metrics
320
+ ],
321
+ }
322
+
323
+ # Compared models (if user trained multiple models)
324
+ compared_models = []
325
+ if session_id:
326
+ try:
327
+ compare_data = ml.get_comparison(session_id)
328
+ for entry in compare_data.entries:
329
+ compared_models.append({
330
+ "model_type": entry.model_type.value.replace("_", " ").title(),
331
+ "model_id": entry.model_id,
332
+ "accuracy": entry.metrics.accuracy,
333
+ "sensitivity": entry.metrics.sensitivity,
334
+ "specificity": entry.metrics.specificity,
335
+ "auc_roc": entry.metrics.auc_roc,
336
+ "f1_score": entry.metrics.f1_score,
337
+ "mcc": entry.metrics.mcc,
338
+ "training_time_ms": entry.training_time_ms,
339
+ })
340
+ except Exception as exc:
341
+ logger.warning("Comparison data unavailable: %s", exc)
342
+ logger.info("Insights context: %d compared models", len(compared_models))
343
+ context["compared_models"] = compared_models
344
+
345
+ # Feature column statistics (distributions for clinical grounding)
346
+ column_stats = []
347
+ X_train = data["X_train"]
348
+ for i, fname in enumerate(data["feature_names"]):
349
+ col_info: dict[str, Any] = {"name": fname}
350
+ try:
351
+ col = X_train[:, i] if hasattr(X_train, "shape") else X_train.iloc[:, i]
352
+ col_info["mean"] = round(float(np.mean(col)), 3)
353
+ col_info["std"] = round(float(np.std(col)), 3)
354
+ col_info["min"] = round(float(np.min(col)), 3)
355
+ col_info["max"] = round(float(np.max(col)), 3)
356
+ except Exception:
357
+ pass
358
+ column_stats.append(col_info)
359
+ context["column_statistics"] = column_stats
360
+
361
+ # Sample rows from test set (real patient data for LLM grounding)
362
+ feature_names = data["feature_names"]
363
+ classes = data["classes"]
364
+ X_test = data["X_test"]
365
+ y_test = data["y_test"]
366
+ sample_rows = []
367
+ n_samples = min(5, len(X_test))
368
+ # Pick diverse samples: some positive, some negative
369
+ try:
370
+ pos_idx = [i for i in range(len(y_test)) if int(y_test[i]) == 1]
371
+ neg_idx = [i for i in range(len(y_test)) if int(y_test[i]) == 0]
372
+ pick = (pos_idx[:3] + neg_idx[:2])[:n_samples] if pos_idx and neg_idx else list(range(n_samples))
373
+ for idx in pick:
374
+ row = {}
375
+ for j, fname in enumerate(feature_names):
376
+ val = X_test[idx, j] if hasattr(X_test, "shape") else X_test.iloc[idx, j]
377
+ row[fname] = round(float(val), 3)
378
+ row["_actual_outcome"] = classes[int(y_test[idx])] if int(y_test[idx]) < len(classes) else str(y_test[idx])
379
+ sample_rows.append(row)
380
+ except Exception:
381
+ pass
382
+ context["sample_patients"] = sample_rows
383
+
384
+ # EU AI Act static items for enrichment
385
+ from app.services.ethics_service import EU_AI_ACT_ITEMS
386
+ context["eu_ai_act_items"] = EU_AI_ACT_ITEMS
387
+
388
+ try:
389
+ ethics_task = insight_svc.generate_ethics_insight(context)
390
+ cases_task = insight_svc.generate_case_studies(context)
391
+ eu_act_task = insight_svc.generate_eu_ai_act_insights(context)
392
+ ethics_result, cases_result, eu_act_result = await asyncio.gather(
393
+ ethics_task, cases_task, eu_act_task
394
+ )
395
+
396
+ return {
397
+ "ethics_insight": ethics_result,
398
+ "case_studies": cases_result,
399
+ "eu_ai_act_insights": eu_act_result,
400
+ }
401
+ except Exception as exc:
402
+ logger.exception("Insight generation failed")
403
+ raise HTTPException(status_code=500, detail=str(exc))
404
+
405
+
406
+ @router.post("/generate-certificate")
407
+ def generate_certificate(request: Request, body: CertificateRequest) -> StreamingResponse:
408
+ """Step-7 endpoint — renders the EU AI Act compliance PDF via `CertificateService`."""
409
+ ml, _, ethics, cert_svc, _ = _get_services(request)
410
+ data = _get_model_data(ml, body.model_id)
411
+
412
+ # Rebuild metrics from stored model
413
+ metrics = data.get("metrics")
414
+ if metrics is None:
415
+ raise HTTPException(status_code=422, detail="Model metrics not available. Train the model first.")
416
+
417
+ ethics_data = ethics.analyze_bias(
418
+ model_id=body.model_id,
419
+ model=data["model"],
420
+ X_test=data["X_test"],
421
+ y_test=data["y_test"],
422
+ feature_names=data["feature_names"],
423
+ classes=data["classes"],
424
+ X_train=data["X_train"],
425
+ scaler=data.get("scaler"),
426
+ )
427
+
428
+ session_id = data.get("session_id", "")
429
+ specialty_name = "Healthcare ML"
430
+ ml_session = ml.get_session(session_id)
431
+ if ml_session:
432
+ from app.services.specialty_registry import SPECIALTIES
433
+ sid = ml_session.get("specialty_id", "")
434
+ spec = SPECIALTIES.get(sid)
435
+ if spec:
436
+ specialty_name = spec.name
437
+
438
+ try:
439
+ pdf_bytes = cert_svc.generate_pdf(
440
+ cert_request=body,
441
+ metrics=metrics,
442
+ ethics=ethics_data,
443
+ specialty_name=specialty_name,
444
+ model_type=data["model_type"],
445
+ )
446
+ except Exception as exc:
447
+ logger.exception("Certificate generation failed")
448
+ raise HTTPException(status_code=500, detail=str(exc))
449
+
450
+ return StreamingResponse(
451
+ iter([pdf_bytes]),
452
+ media_type="application/pdf",
453
+ headers={"Content-Disposition": f'attachment; filename="ml_certificate_{body.model_id[:8]}.pdf"'},
454
+ )
app/routers/ml_router.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ML model training and evaluation REST endpoints."""
2
+ from __future__ import annotations
3
+
4
+ import logging
5
+
6
+ from fastapi import APIRouter, HTTPException, Request, status
7
+ from fastapi.responses import Response
8
+
9
+ from app.models.ml_schemas import (
10
+ CompareResponse,
11
+ ModelType,
12
+ TrainRequest,
13
+ TrainResponse,
14
+ )
15
+
16
+ logger = logging.getLogger(__name__)
17
+ router = APIRouter(prefix="/api", tags=["ml"])
18
+
19
+
20
+ def _get_ml_service(request: Request):
21
+ """FastAPI dependency — resolves the shared `MLService` off `app.state`."""
22
+ return request.app.state.ml_service
23
+
24
+
25
+ @router.post("/train", response_model=TrainResponse)
26
+ def train_model(request: Request, body: TrainRequest) -> TrainResponse:
27
+ """Step-4 endpoint — trains the chosen classifier on the prepared session data and returns metrics."""
28
+ ml = _get_ml_service(request)
29
+ session = ml.get_session(body.session_id)
30
+ if session is None:
31
+ raise HTTPException(
32
+ status_code=status.HTTP_404_NOT_FOUND,
33
+ detail=f"Session '{body.session_id}' not found. Run /api/prepare first.",
34
+ )
35
+ try:
36
+ response = ml.train_and_evaluate(
37
+ body.session_id, body.model_type, body.params,
38
+ tune=body.tune,
39
+ use_feature_selection=body.use_feature_selection,
40
+ )
41
+ except Exception as exc:
42
+ logger.exception("Model training failed")
43
+ raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=str(exc))
44
+
45
+ # Cache metrics for comparison
46
+ ml.store_train_response_in_model(response.model_id, response)
47
+ return response
48
+
49
+
50
+ @router.post("/compare/{model_id}", response_model=CompareResponse)
51
+ def add_to_comparison(request: Request, model_id: str) -> CompareResponse:
52
+ """Step-4 endpoint — adds the latest trained model to the cross-model comparison list."""
53
+ ml = _get_ml_service(request)
54
+ model_data = ml.get_model(model_id)
55
+ if model_data is None:
56
+ raise HTTPException(status_code=404, detail=f"Model '{model_id}' not found")
57
+ session_id = model_data.get("session_id", "")
58
+ try:
59
+ return ml.add_to_comparison(session_id, model_id)
60
+ except Exception as exc:
61
+ raise HTTPException(status_code=422, detail=str(exc))
62
+
63
+
64
+ @router.get("/compare/{session_id}", response_model=CompareResponse)
65
+ def get_comparison(request: Request, session_id: str) -> CompareResponse:
66
+ """Step-4 endpoint — returns the current comparison list for the session."""
67
+ ml = _get_ml_service(request)
68
+ return ml.get_comparison(session_id)
69
+
70
+
71
+ @router.delete("/compare/{session_id}", status_code=204, response_model=None)
72
+ def clear_comparison(request: Request, session_id: str):
73
+ """Step-4 endpoint — empties the comparison list for the session."""
74
+ _get_ml_service(request).clear_comparison(session_id)
75
+ return Response(status_code=204)
76
+
77
+
78
+ @router.get("/models/{model_id}")
79
+ def get_model_info(request: Request, model_id: str) -> dict:
80
+ """Step-4 endpoint — returns stored metrics for a specific model id."""
81
+ ml = _get_ml_service(request)
82
+ data = ml.get_model(model_id)
83
+ if data is None:
84
+ raise HTTPException(status_code=404, detail=f"Model '{model_id}' not found")
85
+ return {
86
+ "model_id": model_id,
87
+ "model_type": data.get("model_type"),
88
+ "params": data.get("params"),
89
+ "session_id": data.get("session_id"),
90
+ "feature_names": data.get("feature_names"),
91
+ "classes": data.get("classes"),
92
+ }
app/services/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Service layer — one singleton per concern, attached to `app.state` in `main.py`."""
app/services/certificate_service.py ADDED
@@ -0,0 +1,690 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """PDF certificate generation using ReportLab."""
2
+ from __future__ import annotations
3
+
4
+ import datetime
5
+ import math
6
+ from io import BytesIO
7
+ from typing import Optional
8
+
9
+ from reportlab.lib import colors
10
+ from reportlab.lib.pagesizes import A4
11
+ from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet
12
+ from reportlab.lib.units import cm
13
+ from reportlab.platypus import (
14
+ HRFlowable,
15
+ Paragraph,
16
+ SimpleDocTemplate,
17
+ Spacer,
18
+ Table,
19
+ TableStyle,
20
+ )
21
+ from reportlab.platypus.flowables import Flowable
22
+
23
+ from app.models.explain_schemas import CertificateRequest, EthicsResponse
24
+ from app.models.ml_schemas import MetricsResponse, ModelType
25
+
26
+ # Colour palette — using the app's green as PRIMARY
27
+ PRIMARY = colors.HexColor("#1A7A4C")
28
+ PRIMARY_DARK = colors.HexColor("#145E39")
29
+ PRIMARY_LIGHT = colors.HexColor("#E8F5EE")
30
+ SUCCESS = colors.HexColor("#1A7A4C")
31
+ SUCCESS_BG = colors.HexColor("#F0FDF4")
32
+ WARNING = colors.HexColor("#92400E")
33
+ WARNING_BG = colors.HexColor("#FFF7ED")
34
+ DANGER = colors.HexColor("#991B1B")
35
+ DANGER_BG = colors.HexColor("#FFF1F2")
36
+ LIGHT_GREY = colors.HexColor("#F4F7FB")
37
+ MID_GREY = colors.HexColor("#DDE3EC")
38
+ DARK_TEXT = colors.HexColor("#172B4D")
39
+ ACCENT = colors.HexColor("#0EA5E9")
40
+
41
+ MODEL_LABELS = {
42
+ ModelType.KNN: "K-Nearest Neighbours (KNN)",
43
+ ModelType.SVM: "Support Vector Machine (SVM)",
44
+ ModelType.DECISION_TREE: "Decision Tree",
45
+ ModelType.RANDOM_FOREST: "Random Forest",
46
+ ModelType.LOGISTIC_REGRESSION: "Logistic Regression",
47
+ ModelType.NAIVE_BAYES: "Naïve Bayes",
48
+ ModelType.XGBOOST: "XGBoost (Extreme Gradient Boosting)",
49
+ ModelType.LIGHTGBM: "LightGBM (Light Gradient Boosting)",
50
+ }
51
+
52
+
53
+ # ---------------------------------------------------------------------------
54
+ # Custom flowable: full-width coloured banner block
55
+ # ---------------------------------------------------------------------------
56
+
57
+ class _BannerBlock(Flowable):
58
+ """Draws a filled rectangle spanning the full page width at the top."""
59
+
60
+ def __init__(self, width: float, height: float, bg_color: colors.Color,
61
+ title: str):
62
+ """Store the label + colour so the flowable is self-contained during layout."""
63
+ super().__init__()
64
+ self.width = width
65
+ self.height = height
66
+ self.bg_color = bg_color
67
+ self.title = title
68
+
69
+ def draw(self):
70
+ """Render the rectangle + label onto the current canvas."""
71
+ c = self.canv
72
+ c.setFillColor(self.bg_color)
73
+ c.rect(0, 0, self.width, self.height, fill=1, stroke=0)
74
+ c.setFillColor(PRIMARY_DARK)
75
+ c.rect(0, 0, self.width, 3, fill=1, stroke=0)
76
+ c.setFillColor(colors.white)
77
+ c.setFont("Helvetica-Bold", 22)
78
+ c.drawCentredString(self.width / 2, self.height / 2 + 2, self.title)
79
+
80
+
81
+ class _BorderFrame(Flowable):
82
+ """Draws a decorative double-line border around the page."""
83
+
84
+ def __init__(self, page_width: float, page_height: float,
85
+ margin: float, color: colors.Color):
86
+ """Store the inner flowables + border colour."""
87
+ super().__init__()
88
+ self.page_width = page_width
89
+ self.page_height = page_height
90
+ self.margin = margin
91
+ self.color = color
92
+ self.width = 0
93
+ self.height = 0
94
+
95
+ def draw(self):
96
+ """Draw the border + delegate inner rendering to the wrapped flowables."""
97
+ c = self.canv
98
+ m = self.margin
99
+ pw, ph = self.page_width, self.page_height
100
+ c.setStrokeColor(self.color)
101
+ # Outer border
102
+ c.setLineWidth(2.5)
103
+ c.rect(m - 8, m - 8, pw - 2 * (m - 8), ph - 2 * (m - 8),
104
+ fill=0, stroke=1)
105
+ # Inner border (inset by 4 pts)
106
+ c.setLineWidth(0.8)
107
+ c.rect(m - 4, m - 4, pw - 2 * (m - 4), ph - 2 * (m - 4),
108
+ fill=0, stroke=1)
109
+
110
+
111
+ # ---------------------------------------------------------------------------
112
+ # Helpers
113
+ # ---------------------------------------------------------------------------
114
+
115
+ def _metric_colour(value: float, green: float, amber: float) -> colors.Color:
116
+ """Pick a banner colour for a metric value (green/amber/red) based on configured thresholds."""
117
+ if value >= green:
118
+ return SUCCESS
119
+ if value >= amber:
120
+ return WARNING
121
+ return DANGER
122
+
123
+
124
+ def _pct(value: float) -> str:
125
+ """Format a 0..1 number as a one-decimal percentage string."""
126
+ return f"{value * 100:.1f}%"
127
+
128
+
129
+ def _row_bg(val: float, green: float, amber: float) -> colors.Color:
130
+ """Alternate row background colour for zebra-striped tables."""
131
+ if val >= green:
132
+ return SUCCESS_BG
133
+ if val >= amber:
134
+ return WARNING_BG
135
+ return DANGER_BG
136
+
137
+
138
+ def _compute_mcc(tp: int, tn: int, fp: int, fn: int) -> Optional[float]:
139
+ """Compute Matthews Correlation Coefficient from a confusion matrix row."""
140
+ denom = math.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))
141
+ if denom == 0:
142
+ return None
143
+ return (tp * tn - fp * fn) / denom
144
+
145
+
146
+ def _generate_takeaways(metrics: MetricsResponse, model_type: ModelType) -> list[str]:
147
+ """Auto-generate bullet-point takeaways from model metrics."""
148
+ bullets: list[str] = []
149
+ model_label = MODEL_LABELS.get(model_type, str(model_type))
150
+
151
+ # Sensitivity (clinical priority)
152
+ if metrics.sensitivity >= 0.85:
153
+ bullets.append(
154
+ f"Excellent sensitivity ({_pct(metrics.sensitivity)}): the model correctly identifies the "
155
+ "large majority of positive cases, making it well-suited for clinical screening."
156
+ )
157
+ elif metrics.sensitivity >= 0.70:
158
+ bullets.append(
159
+ f"Acceptable sensitivity ({_pct(metrics.sensitivity)}): most positive cases are detected, "
160
+ "though some missed diagnoses remain possible."
161
+ )
162
+ else:
163
+ bullets.append(
164
+ f"Low sensitivity ({_pct(metrics.sensitivity)}): the model misses a substantial proportion "
165
+ "of positive cases — not recommended for screening without further tuning."
166
+ )
167
+
168
+ # Specificity
169
+ if metrics.specificity >= 0.85:
170
+ bullets.append(
171
+ f"High specificity ({_pct(metrics.specificity)}): very few healthy patients are incorrectly "
172
+ "flagged, reducing unnecessary follow-up burden."
173
+ )
174
+ elif metrics.specificity < 0.65:
175
+ bullets.append(
176
+ f"Below-average specificity ({_pct(metrics.specificity)}): a notable false-positive rate "
177
+ "could lead to unnecessary investigations in healthy patients."
178
+ )
179
+
180
+ # AUC
181
+ if metrics.auc_roc >= 0.90:
182
+ bullets.append(
183
+ f"Outstanding discrimination (AUC = {_pct(metrics.auc_roc)}): the model reliably ranks "
184
+ "positive cases above negative ones across all decision thresholds."
185
+ )
186
+ elif metrics.auc_roc >= 0.75:
187
+ bullets.append(
188
+ f"Good discriminative ability (AUC = {_pct(metrics.auc_roc)}): the model provides useful "
189
+ "separation between classes across operating points."
190
+ )
191
+ else:
192
+ bullets.append(
193
+ f"Weak discrimination (AUC = {_pct(metrics.auc_roc)}): the model struggles to separate "
194
+ "positive from negative cases and should be improved before deployment."
195
+ )
196
+
197
+ # Overfitting warning
198
+ if metrics.overfitting_warning:
199
+ gap = metrics.train_accuracy - metrics.accuracy
200
+ bullets.append(
201
+ f"Overfitting detected: training accuracy ({_pct(metrics.train_accuracy)}) is considerably "
202
+ f"higher than test accuracy ({_pct(metrics.accuracy)}, gap = {gap * 100:.1f} pp). "
203
+ "Consider regularisation, pruning, or collecting more data."
204
+ )
205
+ else:
206
+ bullets.append(
207
+ f"Generalisation is healthy: the gap between training ({_pct(metrics.train_accuracy)}) "
208
+ f"and test accuracy ({_pct(metrics.accuracy)}) is within acceptable bounds."
209
+ )
210
+
211
+ # MCC
212
+ if hasattr(metrics, "mcc") and metrics.mcc is not None:
213
+ mcc = metrics.mcc
214
+ if mcc >= 0.6:
215
+ bullets.append(
216
+ f"Strong overall balance (MCC = {mcc:.3f}): the model performs well even if class "
217
+ "sizes are imbalanced."
218
+ )
219
+ elif mcc >= 0.3:
220
+ bullets.append(
221
+ f"Moderate overall balance (MCC = {mcc:.3f}): the model shows some robustness to "
222
+ "class imbalance, but there is room for improvement."
223
+ )
224
+ else:
225
+ bullets.append(
226
+ f"Poor balance score (MCC = {mcc:.3f}): the model may be biased toward the majority "
227
+ "class. Consider resampling or adjusted class weights."
228
+ )
229
+
230
+ # Cross-val stability
231
+ if metrics.cross_val_scores:
232
+ cv_mean = sum(metrics.cross_val_scores) / len(metrics.cross_val_scores)
233
+ cv_std = math.sqrt(
234
+ sum((x - cv_mean) ** 2 for x in metrics.cross_val_scores)
235
+ / len(metrics.cross_val_scores)
236
+ )
237
+ if cv_std <= 0.03:
238
+ bullets.append(
239
+ f"{len(metrics.cross_val_scores)}-fold cross-validation shows very stable performance "
240
+ f"(mean {_pct(cv_mean)} ± {cv_std * 100:.1f} pp), indicating the result is unlikely "
241
+ "to be a lucky split."
242
+ )
243
+ elif cv_std <= 0.06:
244
+ bullets.append(
245
+ f"{len(metrics.cross_val_scores)}-fold cross-validation shows moderate variability "
246
+ f"(mean {_pct(cv_mean)} ± {cv_std * 100:.1f} pp). "
247
+ "The model is reasonably stable across data splits."
248
+ )
249
+ else:
250
+ bullets.append(
251
+ f"{len(metrics.cross_val_scores)}-fold cross-validation shows high variability "
252
+ f"(mean {_pct(cv_mean)} ± {cv_std * 100:.1f} pp). "
253
+ "Performance may depend heavily on how the data is split."
254
+ )
255
+
256
+ # Model-specific notes
257
+ if model_type in (ModelType.RANDOM_FOREST, ModelType.XGBOOST, ModelType.LIGHTGBM):
258
+ bullets.append(
259
+ f"{model_label} is an ensemble method that aggregates many weak learners; "
260
+ "feature-importance outputs are available for clinical interpretability."
261
+ )
262
+ elif model_type == ModelType.LOGISTIC_REGRESSION:
263
+ bullets.append(
264
+ "Logistic Regression produces calibrated probabilities and fully interpretable "
265
+ "coefficients, making it a strong baseline for clinical audit."
266
+ )
267
+ elif model_type == ModelType.DECISION_TREE:
268
+ bullets.append(
269
+ "Decision Trees are highly interpretable but prone to overfitting on small datasets; "
270
+ "examine the max-depth parameter if overfitting is observed."
271
+ )
272
+
273
+ return bullets
274
+
275
+
276
+ # ---------------------------------------------------------------------------
277
+ # Certificate service
278
+ # ---------------------------------------------------------------------------
279
+
280
+ class CertificateService:
281
+ """
282
+ Produces the EU AI Act compliance PDF (overview, fairness, explainability, checklist,
283
+ signatures) via reportlab.
284
+ """
285
+ def generate_pdf(
286
+ self,
287
+ cert_request: CertificateRequest,
288
+ metrics: MetricsResponse,
289
+ ethics: EthicsResponse,
290
+ specialty_name: str,
291
+ model_type: ModelType,
292
+ training_time_ms: Optional[float] = None,
293
+ ) -> bytes:
294
+ """Main entrypoint — build the full PDF for a session and return it as bytes."""
295
+ buf = BytesIO()
296
+ PAGE_W, PAGE_H = A4
297
+ MARGIN = 2 * cm
298
+
299
+ doc = SimpleDocTemplate(
300
+ buf,
301
+ pagesize=A4,
302
+ leftMargin=MARGIN,
303
+ rightMargin=MARGIN,
304
+ topMargin=MARGIN,
305
+ bottomMargin=2.2 * cm,
306
+ )
307
+ CONTENT_W = PAGE_W - 2 * MARGIN
308
+
309
+ styles = getSampleStyleSheet()
310
+
311
+ h2 = ParagraphStyle(
312
+ "H2", parent=styles["Heading2"],
313
+ fontSize=13, textColor=PRIMARY_DARK, spaceBefore=16, spaceAfter=5,
314
+ borderPad=3,
315
+ )
316
+ body = ParagraphStyle(
317
+ "Body", parent=styles["Normal"],
318
+ fontSize=10, textColor=DARK_TEXT, leading=14,
319
+ )
320
+ body_center = ParagraphStyle(
321
+ "BodyCenter", parent=body,
322
+ alignment=1,
323
+ )
324
+ small = ParagraphStyle(
325
+ "Small", parent=styles["Normal"],
326
+ fontSize=8, textColor=colors.HexColor("#6B7280"), leading=11,
327
+ )
328
+ small_center = ParagraphStyle(
329
+ "SmallCenter", parent=small,
330
+ alignment=1,
331
+ )
332
+ disclaimer_style = ParagraphStyle(
333
+ "Disclaimer", parent=small,
334
+ textColor=DANGER, alignment=1, leading=11,
335
+ )
336
+ bullet_style = ParagraphStyle(
337
+ "Bullet", parent=styles["Normal"],
338
+ fontSize=9, textColor=DARK_TEXT, leading=13,
339
+ leftIndent=14, firstLineIndent=-10,
340
+ )
341
+ cell8 = ParagraphStyle(
342
+ "Cell8", parent=styles["Normal"],
343
+ fontSize=8, textColor=DARK_TEXT, leading=10,
344
+ )
345
+
346
+ story = []
347
+
348
+ # ---- PAGE BORDER (drawn via canvas callback — we approximate with a table border) ----
349
+ # We'll use a single-cell table at the very start to act as a framing border.
350
+ # This works because SimpleDocTemplate renders top to bottom.
351
+ # A more robust approach uses page templates; here we use a thin top-rule trick.
352
+
353
+ # ---- GREEN HEADER BANNER ----
354
+ banner = _BannerBlock(
355
+ width=CONTENT_W,
356
+ height=1.8 * cm,
357
+ bg_color=PRIMARY,
358
+ title="HEALTH-AI · ML Learning Tool",
359
+ )
360
+ story.append(banner)
361
+ story.append(Spacer(1, 0.4 * cm))
362
+
363
+ issued_to = cert_request.clinician_name or "Healthcare Professional"
364
+ institution = cert_request.institution or "Healthcare Institution"
365
+ today = datetime.date.today().strftime("%d %B %Y")
366
+
367
+ story.append(Paragraph(
368
+ f"This certificate is issued to <b>{issued_to}</b> of <b>{institution}</b> "
369
+ f"for completing the HEALTH-AI ML Learning Tool educational exercise on <b>{today}</b>.",
370
+ body_center,
371
+ ))
372
+ story.append(Spacer(1, 0.4 * cm))
373
+
374
+ # ---- SECTION 1: Specialty & Model ----
375
+ story.append(Paragraph("1. Clinical Specialty &amp; AI Model", h2))
376
+
377
+ info_data = [
378
+ ["Medical Specialty", specialty_name],
379
+ ["AI Model Type", MODEL_LABELS.get(model_type, str(model_type))],
380
+ ["Model ID", cert_request.model_id[:24] + ("…" if len(cert_request.model_id) > 24 else "")],
381
+ ]
382
+ if training_time_ms is not None:
383
+ if training_time_ms >= 1000:
384
+ time_str = f"{training_time_ms / 1000:.2f} s"
385
+ else:
386
+ time_str = f"{training_time_ms:.0f} ms"
387
+ info_data.append(["Training Time", time_str])
388
+
389
+ info_table = Table(info_data, colWidths=[5.5 * cm, 11.5 * cm])
390
+ info_table.setStyle(TableStyle([
391
+ ("BACKGROUND", (0, 0), (0, -1), PRIMARY_LIGHT),
392
+ ("TEXTCOLOR", (0, 0), (-1, -1), DARK_TEXT),
393
+ ("FONTNAME", (0, 0), (0, -1), "Helvetica-Bold"),
394
+ ("FONTSIZE", (0, 0), (-1, -1), 9),
395
+ ("GRID", (0, 0), (-1, -1), 0.5, MID_GREY),
396
+ ("ROWBACKGROUNDS", (0, 0), (-1, -1), [colors.white, LIGHT_GREY]),
397
+ ("LEFTPADDING", (0, 0), (-1, -1), 8),
398
+ ("RIGHTPADDING", (0, 0), (-1, -1), 8),
399
+ ("TOPPADDING", (0, 0), (-1, -1), 5),
400
+ ("BOTTOMPADDING", (0, 0), (-1, -1), 5),
401
+ ("LINEBELOW", (0, -1), (-1, -1), 1.5, PRIMARY),
402
+ ]))
403
+ story.append(info_table)
404
+ story.append(Spacer(1, 0.4 * cm))
405
+
406
+ # ---- SECTION 2: Performance Metrics ----
407
+ story.append(Paragraph("2. Model Performance Summary", h2))
408
+ story.append(Paragraph(
409
+ "Performance measured on held-out test patients the model had never seen during training.",
410
+ body,
411
+ ))
412
+ story.append(Spacer(1, 0.2 * cm))
413
+
414
+ # Resolve MCC: prefer the field on MetricsResponse, fall back to computing from CM
415
+ mcc_value: Optional[float] = getattr(metrics, "mcc", None)
416
+ cm_data = metrics.confusion_matrix
417
+ if mcc_value is None or mcc_value == 0.0:
418
+ mcc_value = _compute_mcc(cm_data.tp, cm_data.tn, cm_data.fp, cm_data.fn)
419
+
420
+ metric_rows = [
421
+ ["Metric", "Value", "Threshold", "Status"],
422
+ ["Accuracy", _pct(metrics.accuracy), "≥ 65 %",
423
+ "✓ Acceptable" if metrics.accuracy >= 0.65 else "✗ Below threshold"],
424
+ ["Sensitivity ★", _pct(metrics.sensitivity), "≥ 70 %",
425
+ "✓ Acceptable" if metrics.sensitivity >= 0.70 else "✗ Below threshold"],
426
+ ["Specificity", _pct(metrics.specificity), "≥ 65 %",
427
+ "✓ Acceptable" if metrics.specificity >= 0.65 else "✗ Below threshold"],
428
+ ["Precision (PPV)", _pct(metrics.precision), "≥ 60 %",
429
+ "✓ Acceptable" if metrics.precision >= 0.60 else "✗ Below threshold"],
430
+ ["F1 Score", _pct(metrics.f1_score), "≥ 65 %",
431
+ "✓ Acceptable" if metrics.f1_score >= 0.65 else "✗ Below threshold"],
432
+ ["AUC-ROC", _pct(metrics.auc_roc), "≥ 75 %",
433
+ "✓ Acceptable" if metrics.auc_roc >= 0.75 else "✗ Below threshold"],
434
+ ]
435
+
436
+ if mcc_value is not None:
437
+ metric_rows.append([
438
+ "MCC †", f"{mcc_value:.3f}", "≥ 0.30",
439
+ "✓ Acceptable" if mcc_value >= 0.30 else "✗ Below threshold",
440
+ ])
441
+
442
+ # Build per-row background colours
443
+ perf_vals_thresholds = [
444
+ (metrics.accuracy, 0.65, 0.55),
445
+ (metrics.sensitivity, 0.70, 0.50),
446
+ (metrics.specificity, 0.65, 0.55),
447
+ (metrics.precision, 0.60, 0.50),
448
+ (metrics.f1_score, 0.65, 0.55),
449
+ (metrics.auc_roc, 0.75, 0.65),
450
+ ]
451
+ if mcc_value is not None:
452
+ perf_vals_thresholds.append((mcc_value, 0.30, 0.10))
453
+
454
+ row_bgs = [PRIMARY] # header row
455
+ for val, gt, at in perf_vals_thresholds:
456
+ row_bgs.append(_row_bg(val, gt, at))
457
+
458
+ perf_table = Table(metric_rows, colWidths=[5 * cm, 2.8 * cm, 3.2 * cm, 6 * cm])
459
+ ts = [
460
+ ("BACKGROUND", (0, 0), (-1, 0), PRIMARY),
461
+ ("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
462
+ ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
463
+ ("FONTSIZE", (0, 0), (-1, -1), 9),
464
+ ("GRID", (0, 0), (-1, -1), 0.5, MID_GREY),
465
+ ("LEFTPADDING", (0, 0), (-1, -1), 8),
466
+ ("TOPPADDING", (0, 0), (-1, -1), 5),
467
+ ("BOTTOMPADDING", (0, 0), (-1, -1), 5),
468
+ ("ALIGN", (1, 0), (2, -1), "CENTER"),
469
+ ]
470
+ for i, bg in enumerate(row_bgs):
471
+ ts.append(("BACKGROUND", (0, i), (-1, i), bg))
472
+ # Colour the Value and Status columns
473
+ for i, (val, gt, at) in enumerate(perf_vals_thresholds, start=1):
474
+ col = SUCCESS if val >= gt else (WARNING if val >= at else DANGER)
475
+ ts.append(("TEXTCOLOR", (1, i), (1, i), col))
476
+ ts.append(("FONTNAME", (1, i), (1, i), "Helvetica-Bold"))
477
+ ts.append(("TEXTCOLOR", (3, i), (3, i), col))
478
+ ts.append(("FONTNAME", (3, i), (3, i), "Helvetica-Bold"))
479
+ perf_table.setStyle(TableStyle(ts))
480
+ story.append(perf_table)
481
+ story.append(Spacer(1, 0.2 * cm))
482
+ story.append(Paragraph(
483
+ "★ Sensitivity (recall) is the most critical metric for clinical screening tools. "
484
+ "† MCC (Matthews Correlation Coefficient) accounts for class imbalance.",
485
+ small,
486
+ ))
487
+ story.append(Spacer(1, 0.3 * cm))
488
+
489
+ # ---- Confusion matrix summary ----
490
+ story.append(Paragraph(
491
+ "<b>Confusion Matrix Summary</b>",
492
+ ParagraphStyle("CMHead", parent=body, textColor=PRIMARY_DARK, spaceAfter=4),
493
+ ))
494
+ cm_rows = [
495
+ ["", "Predicted Positive", "Predicted Negative"],
496
+ [
497
+ "Actual Positive",
498
+ f"TP = {cm_data.tp}",
499
+ f"FN = {cm_data.fn}",
500
+ ],
501
+ [
502
+ "Actual Negative",
503
+ f"FP = {cm_data.fp}",
504
+ f"TN = {cm_data.tn}",
505
+ ],
506
+ ]
507
+ cm_table = Table(cm_rows, colWidths=[4.5 * cm, 4.5 * cm, 4.5 * cm])
508
+ cm_ts = [
509
+ ("BACKGROUND", (0, 0), (-1, 0), PRIMARY),
510
+ ("BACKGROUND", (0, 0), (0, -1), PRIMARY_LIGHT),
511
+ ("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
512
+ ("TEXTCOLOR", (0, 1), (0, -1), PRIMARY_DARK),
513
+ ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
514
+ ("FONTNAME", (0, 1), (0, -1), "Helvetica-Bold"),
515
+ ("FONTSIZE", (0, 0), (-1, -1), 9),
516
+ ("ALIGN", (1, 0), (-1, -1), "CENTER"),
517
+ ("ALIGN", (0, 0), (0, -1), "RIGHT"),
518
+ ("GRID", (0, 0), (-1, -1), 0.5, MID_GREY),
519
+ ("TOPPADDING", (0, 0), (-1, -1), 5),
520
+ ("BOTTOMPADDING", (0, 0), (-1, -1), 5),
521
+ ("LEFTPADDING", (0, 0), (-1, -1), 8),
522
+ # TP cell — green
523
+ ("BACKGROUND", (1, 1), (1, 1), SUCCESS_BG),
524
+ ("TEXTCOLOR", (1, 1), (1, 1), SUCCESS),
525
+ ("FONTNAME", (1, 1), (1, 1), "Helvetica-Bold"),
526
+ # TN cell — green
527
+ ("BACKGROUND", (2, 2), (2, 2), SUCCESS_BG),
528
+ ("TEXTCOLOR", (2, 2), (2, 2), SUCCESS),
529
+ ("FONTNAME", (2, 2), (2, 2), "Helvetica-Bold"),
530
+ # FP cell — amber
531
+ ("BACKGROUND", (1, 2), (1, 2), WARNING_BG),
532
+ ("TEXTCOLOR", (1, 2), (1, 2), WARNING),
533
+ ("FONTNAME", (1, 2), (1, 2), "Helvetica-Bold"),
534
+ # FN cell — red
535
+ ("BACKGROUND", (2, 1), (2, 1), DANGER_BG),
536
+ ("TEXTCOLOR", (2, 1), (2, 1), DANGER),
537
+ ("FONTNAME", (2, 1), (2, 1), "Helvetica-Bold"),
538
+ ]
539
+ cm_table.setStyle(TableStyle(cm_ts))
540
+ story.append(cm_table)
541
+ story.append(Spacer(1, 0.2 * cm))
542
+
543
+ # Cross-val summary
544
+ if metrics.cross_val_scores:
545
+ cv = metrics.cross_val_scores
546
+ cv_mean = sum(cv) / len(cv)
547
+ cv_std = math.sqrt(sum((x - cv_mean) ** 2 for x in cv) / len(cv))
548
+ cv_min = min(cv)
549
+ cv_max = max(cv)
550
+ story.append(Paragraph(
551
+ f"<b>{len(cv)}-Fold Cross-Validation:</b> "
552
+ f"mean accuracy = <b>{_pct(cv_mean)}</b> | "
553
+ f"std = {cv_std * 100:.1f} pp | "
554
+ f"range [{_pct(cv_min)} – {_pct(cv_max)}]",
555
+ ParagraphStyle("CVLine", parent=small,
556
+ textColor=DARK_TEXT, leading=12),
557
+ ))
558
+ story.append(Spacer(1, 0.1 * cm))
559
+
560
+ story.append(Spacer(1, 0.4 * cm))
561
+
562
+ # ---- SECTION 3: Bias Findings ----
563
+ story.append(Paragraph("3. Bias &amp; Fairness Findings", h2))
564
+ if ethics.bias_warnings:
565
+ for w in ethics.bias_warnings:
566
+ story.append(Paragraph(f"⚠ {w.message}", ParagraphStyle(
567
+ "Warn", parent=body, textColor=DANGER, spaceAfter=3,
568
+ )))
569
+ else:
570
+ story.append(Paragraph(
571
+ "✓ No significant bias detected across patient subgroups.",
572
+ ParagraphStyle("OK", parent=body, textColor=SUCCESS),
573
+ ))
574
+ story.append(Spacer(1, 0.2 * cm))
575
+
576
+ subgroup_data = [["Subgroup", "n", "Accuracy", "Sens.", "Spec.", "F1", "Status"]]
577
+ for sm in ethics.subgroup_metrics:
578
+ status_sym = {"acceptable": "✓", "review": "⚠", "action_needed": "✗"}.get(sm.status, "?")
579
+ subgroup_data.append([
580
+ Paragraph(sm.group_label, cell8),
581
+ str(sm.sample_size),
582
+ _pct(sm.accuracy), _pct(sm.sensitivity), _pct(sm.specificity),
583
+ _pct(sm.f1_score),
584
+ f"{status_sym} {sm.status.replace('_', ' ').title()}",
585
+ ])
586
+ sg_table = Table(
587
+ subgroup_data,
588
+ colWidths=[3.2 * cm, 1.2 * cm, 2.1 * cm, 2.1 * cm, 2.1 * cm, 2.1 * cm, 4.2 * cm],
589
+ )
590
+ sg_ts = [
591
+ ("BACKGROUND", (0, 0), (-1, 0), PRIMARY),
592
+ ("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
593
+ ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
594
+ ("FONTSIZE", (0, 0), (-1, -1), 8),
595
+ ("GRID", (0, 0), (-1, -1), 0.4, MID_GREY),
596
+ ("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.white, LIGHT_GREY]),
597
+ ("LEFTPADDING", (0, 0), (-1, -1), 6),
598
+ ("TOPPADDING", (0, 0), (-1, -1), 4),
599
+ ("BOTTOMPADDING", (0, 0), (-1, -1), 4),
600
+ ("ALIGN", (1, 0), (-1, -1), "CENTER"),
601
+ ]
602
+ for i, sm in enumerate(ethics.subgroup_metrics, 1):
603
+ col = (SUCCESS if sm.status == "acceptable"
604
+ else WARNING if sm.status == "review" else DANGER)
605
+ sg_ts.append(("TEXTCOLOR", (6, i), (6, i), col))
606
+ sg_ts.append(("FONTNAME", (6, i), (6, i), "Helvetica-Bold"))
607
+ sg_table.setStyle(TableStyle(sg_ts))
608
+ story.append(sg_table)
609
+ story.append(Spacer(1, 0.4 * cm))
610
+
611
+ # ---- SECTION 4: EU AI Act Checklist ----
612
+ story.append(Paragraph("4. EU AI Act Compliance Checklist", h2))
613
+ checklist_state = cert_request.checklist_state or {}
614
+ checklist_data = [["#", "Requirement", "Status"]]
615
+ for i, item in enumerate(ethics.eu_ai_act_items, 1):
616
+ is_checked = item.get("pre_checked") or checklist_state.get(item["id"], False)
617
+ checklist_data.append([
618
+ str(i),
619
+ Paragraph(item["text"], cell8),
620
+ "✓ Complete" if is_checked else "○ Pending",
621
+ ])
622
+ cl_table = Table(checklist_data, colWidths=[1 * cm, 14 * cm, 2 * cm])
623
+ cl_ts = [
624
+ ("BACKGROUND", (0, 0), (-1, 0), PRIMARY),
625
+ ("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
626
+ ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
627
+ ("FONTSIZE", (0, 0), (-1, -1), 8),
628
+ ("GRID", (0, 0), (-1, -1), 0.4, MID_GREY),
629
+ ("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.white, LIGHT_GREY]),
630
+ ("LEFTPADDING", (0, 0), (-1, -1), 6),
631
+ ("TOPPADDING", (0, 0), (-1, -1), 4),
632
+ ("BOTTOMPADDING", (0, 0), (-1, -1), 4),
633
+ ]
634
+ for i, item in enumerate(ethics.eu_ai_act_items, 1):
635
+ is_checked = item.get("pre_checked") or checklist_state.get(item["id"], False)
636
+ if is_checked:
637
+ cl_ts.append(("TEXTCOLOR", (2, i), (2, i), SUCCESS))
638
+ cl_ts.append(("FONTNAME", (2, i), (2, i), "Helvetica-Bold"))
639
+ else:
640
+ cl_ts.append(("TEXTCOLOR", (2, i), (2, i), colors.HexColor("#9CA3AF")))
641
+ cl_table.setStyle(TableStyle(cl_ts))
642
+ story.append(cl_table)
643
+ story.append(Spacer(1, 0.4 * cm))
644
+
645
+ # ---- SECTION 5: Key Takeaways ----
646
+ story.append(Paragraph("5. Key Takeaways", h2))
647
+ story.append(Paragraph(
648
+ "Auto-generated insights based on this model's performance metrics:",
649
+ ParagraphStyle("TkIntro", parent=body, textColor=colors.HexColor("#4B5563"),
650
+ spaceAfter=5),
651
+ ))
652
+ takeaways = _generate_takeaways(metrics, model_type)
653
+ for idx, bullet in enumerate(takeaways, 1):
654
+ story.append(Paragraph(f"<b>{idx}.</b> {bullet}", bullet_style))
655
+ story.append(Spacer(1, 0.1 * cm))
656
+ story.append(Spacer(1, 0.3 * cm))
657
+
658
+ # ---- FOOTER ----
659
+ story.append(HRFlowable(width="100%", thickness=1.5, color=PRIMARY,
660
+ spaceAfter=4))
661
+ story.append(HRFlowable(width="100%", thickness=0.5, color=MID_GREY,
662
+ spaceAfter=5))
663
+
664
+ story.append(Paragraph(
665
+ f"Generated: <b>{today}</b> · HEALTH-AI ML Learning Tool v1.5 "
666
+ "· Prepared by the HealthWithSevgi Team",
667
+ small_center,
668
+ ))
669
+ story.append(Spacer(1, 0.15 * cm))
670
+ story.append(Paragraph(
671
+ "<b>IMPORTANT DISCLAIMER:</b> This certificate confirms completion of an educational "
672
+ "exercise only. The AI model described herein is <b>NOT</b> validated for clinical use "
673
+ "and must <b>NOT</b> be used to inform patient management decisions without appropriate "
674
+ "prospective clinical validation and regulatory clearance.",
675
+ disclaimer_style,
676
+ ))
677
+
678
+ def _add_page_number(canvas, doc_template):
679
+ """Inner canvas callback that stamps `Page X / N` on every page."""
680
+ canvas.saveState()
681
+ canvas.setFont("Helvetica", 7)
682
+ canvas.setFillColor(colors.HexColor("#9CA3AF"))
683
+ canvas.drawCentredString(
684
+ PAGE_W / 2, 1.0 * cm,
685
+ f"Page {canvas.getPageNumber()}"
686
+ )
687
+ canvas.restoreState()
688
+
689
+ doc.build(story, onFirstPage=_add_page_number, onLaterPages=_add_page_number)
690
+ return buf.getvalue()
app/services/data_service.py ADDED
@@ -0,0 +1,1272 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Data exploration and preparation service."""
2
+ from __future__ import annotations
3
+
4
+ import io
5
+ import logging
6
+ import pathlib
7
+ import uuid
8
+ import zipfile
9
+ from typing import Any
10
+
11
+ import numpy as np
12
+ import pandas as pd
13
+ import requests
14
+ from imblearn.over_sampling import SMOTE
15
+ from sklearn.model_selection import train_test_split
16
+ from sklearn.preprocessing import MinMaxScaler, StandardScaler
17
+
18
+ from app.models.schemas import (
19
+ ColumnStat,
20
+ DataExplorationResponse,
21
+ PrepResponse,
22
+ PrepSettings,
23
+ )
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+ IMBALANCE_RATIO_THRESHOLD = 1.5
28
+ MIN_ROWS = 10
29
+ MAX_UPLOAD_MB = 50
30
+ MAX_TARGET_CLASSES = 20
31
+
32
+ _CACHE_DIR = pathlib.Path(__file__).parent.parent.parent / "data_cache"
33
+
34
+
35
+ class DatasetUnavailableError(Exception):
36
+ """Raised when a real dataset cannot be loaded and no fallback is allowed."""
37
+
38
+ def __init__(self, name: str, reason: str) -> None:
39
+ """
40
+ Load and return the bundled dataset for the `_init__` specialty. Used internally
41
+ by `DataService._load_specialty_dataset`.
42
+ """
43
+ self.dataset_name = name
44
+ self.reason = reason
45
+ super().__init__(
46
+ f"Dataset '{name}' is unavailable: {reason}. "
47
+ "Please upload your own CSV file or ensure the dataset cache is populated."
48
+ )
49
+
50
+
51
+ class DataService:
52
+ """
53
+ Owns CSV ingestion, column exploration, and per-specialty preparation
54
+ (split/normalise/impute/SMOTE).
55
+ """
56
+ def __init__(self) -> None:
57
+ """
58
+ Load and return the bundled dataset for the `_init__` specialty. Used internally
59
+ by `DataService._load_specialty_dataset`.
60
+ """
61
+ self._session_store: dict[str, dict[str, Any]] = {}
62
+
63
+ # ------------------------------------------------------------------
64
+ # Real-data download helper
65
+ # ------------------------------------------------------------------
66
+ def _fetch_cached(
67
+ self,
68
+ name: str,
69
+ url: str,
70
+ read_kwargs: dict | None = None,
71
+ ) -> pd.DataFrame:
72
+ """Download a dataset from URL, cache locally, return DataFrame.
73
+
74
+ Raises DatasetUnavailableError if the dataset cannot be loaded.
75
+ """
76
+ _CACHE_DIR.mkdir(parents=True, exist_ok=True)
77
+ cache_path = _CACHE_DIR / f"{name}.csv"
78
+ rk = read_kwargs or {}
79
+
80
+ # Try from cache first
81
+ if cache_path.exists():
82
+ try:
83
+ return pd.read_csv(cache_path, **rk)
84
+ except Exception as exc:
85
+ raise DatasetUnavailableError(
86
+ name, f"Cached file exists but failed to read: {exc}"
87
+ ) from exc
88
+
89
+ # Download
90
+ try:
91
+ resp = requests.get(url, timeout=20, headers={"User-Agent": "HealthWithSevgi/1.0"})
92
+ resp.raise_for_status()
93
+ cache_path.write_bytes(resp.content)
94
+ logger.info("Downloaded real dataset: %s (%d bytes)", name, len(resp.content))
95
+ return pd.read_csv(io.BytesIO(resp.content), **rk)
96
+ except Exception as exc:
97
+ raise DatasetUnavailableError(
98
+ name, f"Failed to download from {url}: {exc}"
99
+ ) from exc
100
+
101
+ # ------------------------------------------------------------------
102
+ # Exploration
103
+ # ------------------------------------------------------------------
104
+ def explore_dataframe(
105
+ self, df: pd.DataFrame, target_col: str
106
+ ) -> DataExplorationResponse:
107
+ """Build per-column statistics for the Step-2 exploration panel."""
108
+ columns: list[ColumnStat] = []
109
+ for col in df.columns:
110
+ series = df[col]
111
+ missing = int(series.isna().sum())
112
+ columns.append(
113
+ ColumnStat(
114
+ name=col,
115
+ dtype=str(series.dtype),
116
+ missing_count=missing,
117
+ missing_pct=round(missing / len(df) * 100, 2),
118
+ unique_count=int(series.nunique()),
119
+ sample_values=series.dropna().head(5).tolist(),
120
+ )
121
+ )
122
+
123
+ class_counts: dict[str, int] = {}
124
+ imbalance_ratio = 1.0
125
+ imbalance_warning = False
126
+ if target_col in df.columns:
127
+ vc = df[target_col].value_counts()
128
+ class_counts = {str(k): int(v) for k, v in vc.items()}
129
+ if len(vc) >= 2:
130
+ imbalance_ratio = round(vc.iloc[0] / vc.iloc[-1], 2)
131
+ imbalance_warning = imbalance_ratio >= IMBALANCE_RATIO_THRESHOLD
132
+
133
+ return DataExplorationResponse(
134
+ columns=columns,
135
+ row_count=len(df),
136
+ class_distribution=class_counts,
137
+ imbalance_warning=imbalance_warning,
138
+ imbalance_ratio=imbalance_ratio,
139
+ target_col=target_col,
140
+ )
141
+
142
+ # ------------------------------------------------------------------
143
+ # Preparation
144
+ # ------------------------------------------------------------------
145
+ def prepare_data(
146
+ self,
147
+ df: pd.DataFrame,
148
+ target_col: str,
149
+ settings: PrepSettings,
150
+ session_id: str | None = None,
151
+ ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, PrepResponse, list[str]]:
152
+ """
153
+ Step-3 preparation endpoint — splits, normalises, imputes missing values,
154
+ optionally applies SMOTE.
155
+ """
156
+ if session_id is None:
157
+ session_id = str(uuid.uuid4())
158
+
159
+ # Drop rows where target is NaN
160
+ df = df.dropna(subset=[target_col]).copy()
161
+
162
+ # Guard: reject continuous / high-cardinality target columns
163
+ n_unique = df[target_col].nunique()
164
+ if n_unique > MAX_TARGET_CLASSES:
165
+ raise ValueError(
166
+ f"Target column '{target_col}' has {n_unique} unique values, "
167
+ f"which looks like a continuous measurement rather than a "
168
+ f"classification label. Choose a column with at most "
169
+ f"{MAX_TARGET_CLASSES} distinct classes (e.g. a binary "
170
+ f"outcome like 0/1)."
171
+ )
172
+
173
+ # Encode target
174
+ y_raw = df[target_col]
175
+ classes = sorted(y_raw.unique().tolist(), key=str)
176
+ class_to_int = {c: i for i, c in enumerate(classes)}
177
+ y = y_raw.map(class_to_int).values.astype(int)
178
+
179
+ # Keep only numeric features (drop target + non-numeric)
180
+ feature_df = df.drop(columns=[target_col])
181
+ feature_df = feature_df.select_dtypes(include=[np.number])
182
+ feature_names = list(feature_df.columns)
183
+
184
+ dist_before = {str(k): int((y == v).sum()) for k, v in class_to_int.items()}
185
+
186
+ if settings.missing_strategy == "drop":
187
+ mask = ~feature_df.isna().any(axis=1)
188
+ feature_df = feature_df[mask]
189
+ y = y[mask]
190
+ elif settings.missing_strategy == "median":
191
+ feature_df = feature_df.fillna(feature_df.median(numeric_only=True))
192
+ else: # mode
193
+ _mode = feature_df.mode()
194
+ if not _mode.empty:
195
+ feature_df = feature_df.fillna(_mode.iloc[0])
196
+ else:
197
+ feature_df = feature_df.fillna(feature_df.median(numeric_only=True))
198
+ X = feature_df.values.astype(float)
199
+
200
+ # --- Train / test split (BEFORE imputation to avoid data leakage) ---
201
+ # Use stratified split only when every class has at least 2 samples;
202
+ # otherwise fall back to non-stratified to avoid ValueError.
203
+ from collections import Counter
204
+ class_counts_y = Counter(y)
205
+ min_class_size = min(class_counts_y.values()) if class_counts_y else 0
206
+ can_stratify = min_class_size >= 2
207
+ X_train, X_test, y_train, y_test = train_test_split(
208
+ X, y, test_size=settings.test_size, random_state=42,
209
+ stratify=y if can_stratify else None,
210
+ )
211
+
212
+ # --- Handle missing values AFTER split (train-only statistics) ---
213
+ if settings.missing_strategy == "drop":
214
+ train_mask = ~pd.DataFrame(X_train).isna().any(axis=1).values
215
+ test_mask = ~pd.DataFrame(X_test).isna().any(axis=1).values
216
+ X_train = X_train[train_mask]
217
+ y_train = y_train[train_mask]
218
+ X_test = X_test[test_mask]
219
+ y_test = y_test[test_mask]
220
+ elif settings.missing_strategy == "median":
221
+ train_df = pd.DataFrame(X_train, columns=feature_names)
222
+ medians = train_df.median()
223
+ X_train = train_df.fillna(medians).values
224
+ X_test = pd.DataFrame(X_test, columns=feature_names).fillna(medians).values
225
+ else: # mode
226
+ train_df = pd.DataFrame(X_train, columns=feature_names)
227
+ modes = train_df.mode().iloc[0]
228
+ X_train = train_df.fillna(modes).values
229
+ X_test = pd.DataFrame(X_test, columns=feature_names).fillna(modes).values
230
+
231
+ # --- Outlier handling (train statistics applied to test) ---
232
+ if settings.outlier_handling == "iqr":
233
+ train_df = pd.DataFrame(X_train, columns=feature_names)
234
+ Q1 = train_df.quantile(0.25)
235
+ Q3 = train_df.quantile(0.75)
236
+ IQR = Q3 - Q1
237
+ lower = Q1 - 1.5 * IQR
238
+ upper = Q3 + 1.5 * IQR
239
+ X_train = train_df.clip(lower=lower, upper=upper, axis=1).values
240
+ X_test = pd.DataFrame(X_test, columns=feature_names).clip(lower=lower, upper=upper, axis=1).values
241
+ elif settings.outlier_handling == "zscore_clip":
242
+ train_df = pd.DataFrame(X_train, columns=feature_names)
243
+ mean = train_df.mean()
244
+ std = train_df.std().replace(0, 1)
245
+ lower = mean - 3 * std
246
+ upper = mean + 3 * std
247
+ X_train = train_df.clip(lower=lower, upper=upper, axis=1).values
248
+ X_test = pd.DataFrame(X_test, columns=feature_names).clip(lower=lower, upper=upper, axis=1).values
249
+
250
+ # Capture raw (pre-scaling) arrays for session storage
251
+ X_train_raw = X_train.copy()
252
+ X_test_raw = X_test.copy()
253
+
254
+ # --- Normalisation ---
255
+ scaler = None
256
+ normalization_applied = settings.normalization
257
+ if settings.normalization == "zscore":
258
+ scaler = StandardScaler()
259
+ elif settings.normalization == "minmax":
260
+ scaler = MinMaxScaler()
261
+
262
+ if scaler is not None:
263
+ X_train = scaler.fit_transform(X_train)
264
+ X_test = scaler.transform(X_test)
265
+
266
+ # --- SMOTE (training only, supports multi-class) ---
267
+ smote_applied = False
268
+
269
+ # Filter out classes with fewer than 2 samples to prevent SMOTE ValueError
270
+ unique, counts = np.unique(y_train, return_counts=True)
271
+ valid_classes = unique[counts >= 2]
272
+ if len(valid_classes) < len(unique):
273
+ logger.warning(
274
+ "Dropped %d classes with only 1 sample before SMOTE/training.",
275
+ len(unique) - len(valid_classes)
276
+ )
277
+ train_mask = np.isin(y_train, valid_classes)
278
+ X_train = X_train[train_mask]
279
+ X_train_raw = X_train_raw[train_mask]
280
+ y_train = y_train[train_mask]
281
+ # Also filter test set to only contain classes present in training
282
+ test_mask = np.isin(y_test, valid_classes)
283
+ X_test = X_test[test_mask]
284
+ X_test_raw = X_test_raw[test_mask]
285
+ y_test = y_test[test_mask]
286
+
287
+ # Re-encode labels to be contiguous (0..n-1) after any class filtering.
288
+ # This prevents XGBoost/LightGBM "Invalid classes" errors when label
289
+ # values have gaps (e.g. [0, 2, 5] instead of [0, 1, 2]).
290
+ remaining_labels = np.unique(np.concatenate([y_train, y_test]))
291
+ if len(remaining_labels) > 0 and (
292
+ remaining_labels[-1] != len(remaining_labels) - 1
293
+ or len(remaining_labels) != int(remaining_labels[-1]) + 1
294
+ ):
295
+ label_map = {old: new for new, old in enumerate(sorted(remaining_labels))}
296
+ y_train = np.array([label_map[v] for v in y_train])
297
+ y_test = np.array([label_map[v] for v in y_test])
298
+ # Rebuild classes list and mapping with new contiguous labels
299
+ old_classes = classes
300
+ classes = [old_classes[old] for old in sorted(remaining_labels)]
301
+ class_to_int = {c: i for i, c in enumerate(classes)}
302
+ logger.info(
303
+ "Re-encoded %d classes to contiguous labels 0..%d",
304
+ len(remaining_labels), len(remaining_labels) - 1,
305
+ )
306
+
307
+ # Preserve pre-SMOTE labels for leak-free CV (after filtering and re-encoding)
308
+ y_train_original = y_train.copy()
309
+
310
+ unique_classes = np.unique(y_train)
311
+ if settings.use_smote and len(unique_classes) >= 2:
312
+ try:
313
+ min_class_count = min(np.bincount(y_train[y_train >= 0])) if len(y_train) > 0 else 0
314
+ k_neighbors = max(1, min(5, min_class_count - 1))
315
+ smote = SMOTE(k_neighbors=k_neighbors, random_state=42)
316
+ X_train, y_train = smote.fit_resample(X_train, y_train)
317
+ smote_applied = True
318
+ logger.info("SMOTE applied — training set resampled to %d samples", len(X_train))
319
+ except Exception as exc:
320
+ logger.warning("SMOTE failed: %s — proceeding without resampling", exc)
321
+
322
+ dist_after = {str(k): int((y_train == v).sum()) for k, v in class_to_int.items()}
323
+
324
+ # Bug #1: Build real normalization sample data (first row before vs after)
325
+ norm_samples: list[dict[str, object]] = []
326
+ sample_count = min(5, len(feature_names))
327
+ for i in range(sample_count):
328
+ before_val = float(X_train_raw[0, i]) if len(X_train_raw) > 0 else 0.0
329
+ after_val = float(X_train[0, i]) if len(X_train) > 0 else 0.0
330
+ norm_samples.append({
331
+ "feature": feature_names[i],
332
+ "before": round(before_val, 2),
333
+ "after": round(after_val, 3),
334
+ })
335
+
336
+ response = PrepResponse(
337
+ session_id=session_id,
338
+ train_size=int(len(X_train)),
339
+ test_size=int(len(X_test)),
340
+ features_count=len(feature_names),
341
+ class_distribution_before=dist_before,
342
+ class_distribution_after=dist_after,
343
+ smote_applied=smote_applied,
344
+ normalization_applied=normalization_applied,
345
+ norm_samples=norm_samples,
346
+ )
347
+
348
+ # Column metadata from raw DataFrame (before preprocessing)
349
+ raw_column_meta = []
350
+ for col in df.columns:
351
+ series = df[col]
352
+ raw_column_meta.append({
353
+ "name": col,
354
+ "dtype": str(series.dtype),
355
+ "missing_count": int(series.isna().sum()),
356
+ "missing_pct": round(series.isna().sum() / len(df) * 100, 2),
357
+ "unique_count": int(series.nunique()),
358
+ "sample_values": [str(v) for v in series.dropna().head(3).tolist()],
359
+ "is_target": col == target_col,
360
+ })
361
+
362
+ # Persist to session store
363
+ self._session_store[session_id] = {
364
+ "X_train": X_train,
365
+ "X_test": X_test,
366
+ "y_train": y_train,
367
+ "y_test": y_test,
368
+ "feature_names": feature_names,
369
+ "classes": [str(c) for c in classes],
370
+ "scaler": scaler,
371
+ "X_train_raw": X_train_raw,
372
+ "X_test_raw": X_test_raw,
373
+ "normalization": settings.normalization,
374
+ "y_train_original": y_train_original,
375
+ "smote_applied": smote_applied,
376
+ "raw_column_meta": raw_column_meta,
377
+ "row_count": len(df),
378
+ }
379
+ logger.info(
380
+ "Session %s prepared — train=%d, test=%d, features=%d",
381
+ session_id,
382
+ len(X_train),
383
+ len(X_test),
384
+ len(feature_names),
385
+ )
386
+
387
+ return X_train, X_test, y_train, y_test, response, feature_names
388
+
389
+ def get_session(self, session_id: str) -> dict[str, Any] | None:
390
+ """Return the prepared session bundle by id; `None` when the session is unknown."""
391
+ return self._session_store.get(session_id)
392
+
393
+ # ------------------------------------------------------------------
394
+ # Built-in example datasets
395
+ # ------------------------------------------------------------------
396
+ def get_example_dataset(self, specialty_id: str) -> pd.DataFrame:
397
+ """Return the bundled example dataframe for a specialty (cached after first load)."""
398
+ generators: dict[str, Any] = {
399
+ "cardiology_hf": self._heart_failure,
400
+ "radiology_pneumonia": self._pneumonia,
401
+ "nephrology_ckd": self._ckd,
402
+ "oncology_breast": self._breast_cancer,
403
+ "neurology_parkinsons": self._parkinsons,
404
+ "endocrinology_diabetes": self._diabetes,
405
+ "hepatology_liver": self._liver,
406
+ "cardiology_stroke": self._stroke,
407
+ "mental_health": self._mental_health,
408
+ "pulmonology_copd": self._copd,
409
+ "haematology_anaemia": self._anaemia,
410
+ "dermatology": self._dermatology,
411
+ "ophthalmology": self._ophthalmology,
412
+ "orthopaedics": self._orthopaedics,
413
+ "icu_sepsis": self._sepsis,
414
+ "obstetrics_fetal": self._fetal_health,
415
+ "cardiology_arrhythmia": self._arrhythmia,
416
+ "oncology_cervical": self._cervical,
417
+ "thyroid": self._thyroid,
418
+ "pharmacy_readmission": self._readmission,
419
+ }
420
+ gen = generators.get(specialty_id)
421
+ if gen is None:
422
+ raise DatasetUnavailableError(specialty_id, f"Unknown specialty ID '{specialty_id}'")
423
+ df = gen()
424
+ logger.info("Example dataset generated for '%s': %d rows", specialty_id, len(df))
425
+ return df
426
+
427
+ # ------ Dataset generators ------
428
+
429
+ def _heart_failure(self) -> pd.DataFrame:
430
+ """
431
+ Load and return the bundled dataset for the `heart_failure` specialty. Used
432
+ internally by `DataService._load_specialty_dataset`.
433
+ """
434
+ df = self._fetch_cached(
435
+ "cardiology_hf",
436
+ "https://archive.ics.uci.edu/ml/machine-learning-databases/00519/heart_failure_clinical_records_dataset.csv",
437
+ )
438
+ if "DEATH_EVENT" not in df.columns:
439
+ raise DatasetUnavailableError("cardiology_hf", "Missing required column 'DEATH_EVENT'")
440
+ return df
441
+
442
+ def _breast_cancer(self) -> pd.DataFrame:
443
+ """
444
+ Load and return the bundled dataset for the `breast_cancer` specialty. Used
445
+ internally by `DataService._load_specialty_dataset`.
446
+ """
447
+ from sklearn.datasets import load_breast_cancer
448
+ data = load_breast_cancer(as_frame=True)
449
+ df = data.frame.copy()
450
+ df["diagnosis"] = data.target.map({1: "B", 0: "M"})
451
+ df = df.drop(columns=["target"])
452
+ # Normalise column names: replace spaces with underscores
453
+ df.columns = [c.replace(" ", "_") for c in df.columns]
454
+ # Select the 14 registered features (mean + worst geometry/texture only)
455
+ keep = [
456
+ "mean_radius", "mean_texture", "mean_perimeter", "mean_area",
457
+ "mean_smoothness", "mean_compactness", "mean_concavity",
458
+ "mean_concave_points", "mean_symmetry", "worst_radius",
459
+ "worst_texture", "worst_perimeter", "worst_area", "worst_smoothness",
460
+ "diagnosis",
461
+ ]
462
+ available = [c for c in keep if c in df.columns]
463
+ return df[available]
464
+
465
+ def _diabetes(self) -> pd.DataFrame:
466
+ """
467
+ Load and return the bundled dataset for the `diabetes` specialty. Used internally
468
+ by `DataService._load_specialty_dataset`.
469
+ """
470
+ pima_cols = [
471
+ "pregnancies", "glucose", "blood_pressure", "skin_thickness",
472
+ "insulin", "bmi", "diabetes_pedigree_function", "age", "Outcome",
473
+ ]
474
+ df = self._fetch_cached(
475
+ "endocrinology_diabetes",
476
+ "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv",
477
+ read_kwargs={"header": None, "names": pima_cols},
478
+ )
479
+ if "Outcome" not in df.columns:
480
+ raise DatasetUnavailableError("endocrinology_diabetes", "Missing required column 'Outcome'")
481
+ return df
482
+
483
+ def _ckd(self) -> pd.DataFrame:
484
+ """
485
+ Load and return the bundled dataset for the `ckd` specialty. Used internally by
486
+ `DataService._load_specialty_dataset`.
487
+ """
488
+ _CACHE_DIR.mkdir(parents=True, exist_ok=True)
489
+ csv_cache = _CACHE_DIR / "nephrology_ckd.csv"
490
+
491
+ if not csv_cache.exists():
492
+ raise DatasetUnavailableError("nephrology_ckd", f"Cache file not found: {csv_cache}")
493
+
494
+ df = pd.read_csv(csv_cache)
495
+ rename_map = {
496
+ "bp": "blood_pressure", "sg": "specific_gravity",
497
+ "al": "albumin", "su": "sugar",
498
+ "rbc": "red_blood_cells", "pc": "pus_cell",
499
+ "bgr": "blood_glucose_random", "bu": "blood_urea",
500
+ "sc": "serum_creatinine", "sod": "sodium",
501
+ "pot": "potassium", "hemo": "haemoglobin",
502
+ "pcv": "packed_cell_volume", "wc": "white_blood_cell_count",
503
+ "rc": "red_blood_cell_count",
504
+ "htn": "hypertension", "dm": "diabetes_mellitus",
505
+ "cad": "coronary_artery_disease",
506
+ "appet": "appetite", "pe": "pedal_oedema", "ane": "anemia",
507
+ "class": "classification",
508
+ }
509
+ df = df.rename(columns={k: v for k, v in rename_map.items() if k in df.columns})
510
+ if "classification" not in df.columns:
511
+ raise DatasetUnavailableError("nephrology_ckd", "Missing required column 'classification'")
512
+ df["classification"] = df["classification"].astype(str).str.strip().str.rstrip(".")
513
+ df = df[df["classification"].isin(["ckd", "notckd"])].copy()
514
+ for col in df.columns:
515
+ if col != "classification":
516
+ df[col] = pd.to_numeric(df[col], errors="coerce")
517
+ if len(df) < 100:
518
+ raise DatasetUnavailableError("nephrology_ckd", f"Dataset too small ({len(df)} rows)")
519
+ return df
520
+
521
+ def _parkinsons(self) -> pd.DataFrame:
522
+ """
523
+ Load and return the bundled dataset for the `parkinsons` specialty. Used
524
+ internally by `DataService._load_specialty_dataset`.
525
+ """
526
+ df = self._fetch_cached(
527
+ "neurology_parkinsons",
528
+ "https://archive.ics.uci.edu/ml/machine-learning-databases/parkinsons/parkinsons.data",
529
+ )
530
+ if "name" in df.columns:
531
+ df = df.drop(columns=["name"])
532
+ col_rename = {
533
+ "MDVP:Fo(Hz)": "MDVP_Fo_Hz",
534
+ "MDVP:Fhi(Hz)": "MDVP_Fhi_Hz",
535
+ "MDVP:Flo(Hz)": "MDVP_Flo_Hz",
536
+ "MDVP:Jitter(%)": "MDVP_Jitter_pct",
537
+ "MDVP:Jitter(Abs)": "MDVP_Jitter_Abs",
538
+ "MDVP:RAP": "MDVP_RAP",
539
+ "MDVP:PPQ": "MDVP_PPQ",
540
+ "Jitter:DDP": "Jitter_DDP",
541
+ "MDVP:Shimmer": "MDVP_Shimmer",
542
+ "MDVP:Shimmer(dB)": "MDVP_Shimmer_dB",
543
+ "Shimmer:APQ3": "Shimmer_APQ3",
544
+ "Shimmer:APQ5": "Shimmer_APQ5",
545
+ "MDVP:APQ": "MDVP_APQ",
546
+ "Shimmer:DDA": "Shimmer_DDA",
547
+ }
548
+ df = df.rename(columns={k: v for k, v in col_rename.items() if k in df.columns})
549
+ if "status" not in df.columns:
550
+ raise DatasetUnavailableError("neurology_parkinsons", "Missing required column 'status'")
551
+ return df
552
+
553
+ def _liver(self) -> pd.DataFrame:
554
+ """
555
+ Load and return the bundled dataset for the `liver` specialty. Used internally by
556
+ `DataService._load_specialty_dataset`.
557
+ """
558
+ ilpd_cols = [
559
+ "age", "gender", "total_bilirubin", "direct_bilirubin",
560
+ "alkaline_phosphotase", "alamine_aminotransferase",
561
+ "aspartate_aminotransferase", "total_proteins",
562
+ "albumin", "albumin_globulin_ratio", "Dataset",
563
+ ]
564
+ df = self._fetch_cached(
565
+ "hepatology_liver",
566
+ "https://archive.ics.uci.edu/ml/machine-learning-databases/00225/Indian%20Liver%20Patient%20Dataset%20(ILPD).csv",
567
+ read_kwargs={"header": None, "names": ilpd_cols},
568
+ )
569
+ if "Dataset" not in df.columns:
570
+ raise DatasetUnavailableError("hepatology_liver", "Missing required column 'Dataset'")
571
+ if df["gender"].dtype == object:
572
+ df["gender"] = (df["gender"] == "Male").astype(int)
573
+ df["albumin_globulin_ratio"] = df["albumin_globulin_ratio"].fillna(
574
+ df["albumin_globulin_ratio"].median()
575
+ )
576
+ return df
577
+
578
+ def _stroke(self) -> pd.DataFrame:
579
+ """
580
+ Load and return the bundled dataset for the `stroke` specialty. Used internally by
581
+ `DataService._load_specialty_dataset`.
582
+ """
583
+ try:
584
+ df = self._fetch_cached(
585
+ "cardiology_stroke",
586
+ "https://raw.githubusercontent.com/04-aditya/Stroke-Prediction-using-R/main/healthcare-dataset-stroke-data.csv",
587
+ )
588
+ except DatasetUnavailableError:
589
+ raise DatasetUnavailableError(
590
+ "cardiology_stroke",
591
+ "This dataset has no formal open license and cannot be bundled. "
592
+ "It must be downloaded at runtime for educational use only, "
593
+ "but the download failed. Check your network connection.",
594
+ )
595
+ if "stroke" not in df.columns:
596
+ raise DatasetUnavailableError(
597
+ "cardiology_stroke",
598
+ "Missing required column 'stroke'. "
599
+ "This dataset has no formal open license and cannot be bundled. "
600
+ "It will be downloaded at runtime for educational use only.",
601
+ )
602
+ if "id" in df.columns:
603
+ df = df.drop(columns=["id"])
604
+ cat_encodings: dict[str, dict] = {
605
+ "gender": {"Male": 1, "Female": 0, "Other": 0},
606
+ "ever_married": {"Yes": 1, "No": 0},
607
+ "work_type": {"children": 0, "Govt_job": 1, "Never_worked": 2, "Private": 3, "Self-employed": 4},
608
+ "smoking_status": {"never smoked": 0, "Unknown": 1, "formerly smoked": 2, "smokes": 3},
609
+ }
610
+ for col, mapping in cat_encodings.items():
611
+ if col in df.columns and df[col].dtype == object:
612
+ df[col] = df[col].map(mapping).fillna(0).astype(int)
613
+ if "Residence_type" in df.columns:
614
+ df = df.rename(columns={"Residence_type": "residence_type"})
615
+ if "residence_type" in df.columns and df["residence_type"].dtype == object:
616
+ df["residence_type"] = (df["residence_type"] == "Urban").astype(int)
617
+ df["bmi"] = pd.to_numeric(df["bmi"], errors="coerce")
618
+ df["stroke"] = pd.to_numeric(df["stroke"], errors="coerce")
619
+ df = df.dropna(subset=["stroke"])
620
+ if len(df) < 100:
621
+ raise DatasetUnavailableError(
622
+ "cardiology_stroke",
623
+ f"Dataset too small ({len(df)} rows). "
624
+ "This dataset has no formal open license and cannot be bundled. "
625
+ "It will be downloaded at runtime for educational use only.",
626
+ )
627
+ return df
628
+
629
+ def _mental_health(self) -> pd.DataFrame:
630
+ """
631
+ Load and return the bundled dataset for the `mental_health` specialty. Used
632
+ internally by `DataService._load_specialty_dataset`.
633
+ """
634
+ for candidate in ("depression_data.csv", "mental_health_depression.csv"):
635
+ csv_cache = _CACHE_DIR / candidate
636
+ if csv_cache.exists():
637
+ try:
638
+ df = pd.read_csv(csv_cache)
639
+ df = df.drop(columns=[c for c in ["Name", "name"] if c in df.columns])
640
+ ordinal_maps = {
641
+ "Dietary Habits": {"Healthy": 2, "Moderate": 1, "Unhealthy": 0},
642
+ "Sleep Patterns": {"Good": 2, "Fair": 1, "Poor": 0},
643
+ "Alcohol Consumption": {"Low": 0, "Moderate": 1, "High": 2},
644
+ "Physical Activity Level": {"Active": 2, "Moderate": 1, "Sedentary": 0},
645
+ "Smoking Status": {"Non-smoker": 0, "Former": 1, "Current": 2},
646
+ "Employment Status": {"Employed": 1, "Unemployed": 0},
647
+ }
648
+ for col, mapping in ordinal_maps.items():
649
+ if col in df.columns:
650
+ df[col] = df[col].map(mapping).fillna(1).astype(int)
651
+ yes_no_cols = [
652
+ "History of Substance Abuse", "Family History of Depression",
653
+ "Chronic Medical Conditions",
654
+ ]
655
+ for col in yes_no_cols:
656
+ if col in df.columns and df[col].dtype == object:
657
+ df[col] = (df[col].str.lower() == "yes").astype(int)
658
+ if "History of Mental Illness" in df.columns:
659
+ df["severity_class"] = df["History of Mental Illness"].map(
660
+ {"Yes": "has_condition", "No": "no_condition"}
661
+ )
662
+ df = df.drop(columns=["History of Mental Illness"])
663
+ elif "Depression" in df.columns:
664
+ df["severity_class"] = df["Depression"].map({1: "has_condition", 0: "no_condition"})
665
+ df = df.drop(columns=["Depression"])
666
+ col_rename = {
667
+ "Age": "age",
668
+ "Number of Children": "number_of_children",
669
+ "Income": "income",
670
+ "Dietary Habits": "dietary_habits",
671
+ "Sleep Patterns": "sleep_patterns",
672
+ "Alcohol Consumption": "alcohol_consumption",
673
+ "Physical Activity Level": "physical_activity_level",
674
+ "Smoking Status": "smoking_status",
675
+ "Employment Status": "employment_status",
676
+ "History of Substance Abuse": "history_substance_abuse",
677
+ "Family History of Depression": "family_history_depression",
678
+ "Chronic Medical Conditions": "chronic_medical_conditions",
679
+ "Marital Status": "marital_status",
680
+ "Education Level": "education_level",
681
+ }
682
+ df = df.rename(columns={k: v for k, v in col_rename.items() if k in df.columns})
683
+ for col in df.columns:
684
+ if col != "severity_class" and df[col].dtype == object:
685
+ df[col] = pd.Categorical(df[col]).codes
686
+ df = df.dropna(subset=["severity_class"])
687
+ if len(df) >= 100 and "severity_class" in df.columns:
688
+ if len(df) > 5000:
689
+ from sklearn.model_selection import train_test_split as _tts
690
+ _, df = _tts(
691
+ df, test_size=5000, random_state=42,
692
+ stratify=df["severity_class"] if df["severity_class"].nunique() > 1 else None,
693
+ )
694
+ df = df.reset_index(drop=True)
695
+ logger.info("Loaded real mental health dataset (%d rows) from %s", len(df), candidate)
696
+ return df
697
+ except Exception as exc:
698
+ logger.warning("Mental health CSV load failed (%s): %s", candidate, exc)
699
+
700
+ raise DatasetUnavailableError(
701
+ "mental_health",
702
+ "Real mental health dataset not found in data_cache/. "
703
+ "Download from kaggle.com/datasets/anthonytherrien/depression-dataset "
704
+ "and save as depression_data.csv in data_cache/",
705
+ )
706
+
707
+ def _copd(self) -> pd.DataFrame:
708
+ """
709
+ Load and return the bundled dataset for the `copd` specialty. Used internally by
710
+ `DataService._load_specialty_dataset`.
711
+ """
712
+ csv_cache = _CACHE_DIR / "pulmonology_copd.csv"
713
+ if not csv_cache.exists():
714
+ raise DatasetUnavailableError(
715
+ "pulmonology_copd",
716
+ f"Real COPD dataset not found at {csv_cache}. "
717
+ "Download from kaggle.com/datasets/prakharrathi25/copd-student-dataset "
718
+ "or physionet.org/content/copd-ehr/1.0.0/ "
719
+ "and save as pulmonology_copd.csv in data_cache/",
720
+ )
721
+
722
+ df = pd.read_csv(csv_cache)
723
+ col_rename = {
724
+ "AGE": "age", "Age": "age",
725
+ "SEX": "sex", "Sex": "sex", "GENDER": "sex", "Gender": "sex",
726
+ "SMOKING_PACK_YEARS": "smoking_pack_years", "PackYears": "smoking_pack_years",
727
+ "FEV1": "fev1_litres", "FEV1_LITRES": "fev1_litres",
728
+ "FVC": "fvc_litres", "FVC_LITRES": "fvc_litres",
729
+ "FEV1_FVC": "fev1_fvc_ratio", "FEV1FVC": "fev1_fvc_ratio",
730
+ "PRIOR_EXAC": "prior_exacerbations_year", "ExacerbationRate": "prior_exacerbations_year",
731
+ "BMI": "bmi",
732
+ "MRC": "mrc_dyspnea_scale", "MRCScore": "mrc_dyspnea_scale",
733
+ "SGRQ": "sgrq_score", "SGRQTotal": "sgrq_score",
734
+ "GOLD_STAGE": "copd_gold_stage", "GOLDStage": "copd_gold_stage",
735
+ "EXACERBATION": "exacerbation", "Exacerbation": "exacerbation",
736
+ "EXAC": "exacerbation",
737
+ }
738
+ df = df.rename(columns={k: v for k, v in col_rename.items() if k in df.columns})
739
+ if "sex" in df.columns and df["sex"].dtype == object:
740
+ df["sex"] = (df["sex"].str.lower().isin(["m", "male", "1"])).astype(int)
741
+ for col in df.columns:
742
+ if col != "exacerbation":
743
+ df[col] = pd.to_numeric(df[col], errors="coerce")
744
+ if "exacerbation" in df.columns and df["exacerbation"].dtype == object:
745
+ df["exacerbation"] = pd.to_numeric(df["exacerbation"], errors="coerce")
746
+ df = df.dropna(subset=["exacerbation"])
747
+ keep = [
748
+ "age", "sex", "smoking_pack_years", "fev1_litres", "fvc_litres",
749
+ "fev1_fvc_ratio", "prior_exacerbations_year", "bmi",
750
+ "mrc_dyspnea_scale", "sgrq_score", "copd_gold_stage", "exacerbation",
751
+ ]
752
+ available = [c for c in keep if c in df.columns]
753
+ df = df[available]
754
+ if len(df) < 100 or "exacerbation" not in df.columns:
755
+ raise DatasetUnavailableError("pulmonology_copd", f"Dataset too small or missing target ({len(df)} rows)")
756
+ logger.info("Loaded real COPD dataset (%d rows)", len(df))
757
+ return df
758
+
759
+ def _anaemia(self) -> pd.DataFrame:
760
+ """
761
+ Load and return the bundled dataset for the `anaemia` specialty. Used internally
762
+ by `DataService._load_specialty_dataset`.
763
+ """
764
+ try:
765
+ df = self._fetch_cached(
766
+ "haematology_anaemia",
767
+ "https://raw.githubusercontent.com/maladeep/anemia-detection-with-machine-learning/master/anemia%20data%20from%20Kaggle.csv",
768
+ )
769
+ except DatasetUnavailableError:
770
+ raise DatasetUnavailableError(
771
+ "haematology_anaemia",
772
+ "This dataset has an unknown license and cannot be bundled. "
773
+ "It must be downloaded at runtime for educational use only, "
774
+ "but the download failed. Check your network connection.",
775
+ )
776
+ rename_map = {
777
+ "Gender": "gender", "Hemoglobin": "haemoglobin",
778
+ "MCH": "mch", "MCHC": "mchc", "MCV": "mcv",
779
+ "Result": "anemia_type",
780
+ }
781
+ df = df.rename(columns={k: v for k, v in rename_map.items() if k in df.columns})
782
+ # Gender is already encoded as 0/1 in the source CSV; coerce to numeric
783
+ # to handle any edge-case whitespace or string variants.
784
+ for col in df.columns:
785
+ df[col] = pd.to_numeric(df[col], errors="coerce")
786
+ if "anemia_type" not in df.columns:
787
+ raise DatasetUnavailableError(
788
+ "haematology_anaemia",
789
+ "Missing required column 'anemia_type'. "
790
+ "This dataset has an unknown license and cannot be bundled. "
791
+ "It will be downloaded at runtime for educational use only.",
792
+ )
793
+ df = df.dropna(subset=["anemia_type"])
794
+ return df
795
+
796
+ def _dermatology(self) -> pd.DataFrame:
797
+ """
798
+ Load and return the bundled dataset for the `dermatology` specialty. Used
799
+ internally by `DataService._load_specialty_dataset`.
800
+ """
801
+ csv_cache = _CACHE_DIR / "dermatology.csv"
802
+ df = None
803
+ if csv_cache.exists():
804
+ try:
805
+ df = pd.read_csv(csv_cache)
806
+ except Exception:
807
+ pass
808
+ if df is None or "dx" not in (df.columns if df is not None else []):
809
+ df = self._fetch_cached(
810
+ "dermatology_tsv",
811
+ "https://dataverse.harvard.edu/api/access/datafile/4338392",
812
+ read_kwargs={"sep": "\t", "quotechar": '"'},
813
+ )
814
+ if "dx" not in df.columns:
815
+ raise DatasetUnavailableError("dermatology", "Missing required column 'dx'")
816
+ malignant = {"mel", "bcc", "akiec"}
817
+ df["dx_type"] = df["dx"].apply(
818
+ lambda x: "malignant" if str(x).strip() in malignant else "benign"
819
+ )
820
+ if "sex" in df.columns and df["sex"].dtype == object:
821
+ df["sex"] = (df["sex"] == "male").astype(int)
822
+ if "localization" in df.columns and df["localization"].dtype == object:
823
+ locs = df["localization"].unique()
824
+ loc_map = {v: i for i, v in enumerate(sorted(locs))}
825
+ df["localization"] = df["localization"].map(loc_map).fillna(0).astype(int)
826
+ df["age"] = pd.to_numeric(df["age"], errors="coerce")
827
+ keep = ["age", "sex", "localization", "dx_type"]
828
+ df = df[[c for c in keep if c in df.columns]].dropna(subset=["dx_type"])
829
+ if len(df) < 100:
830
+ raise DatasetUnavailableError("dermatology", f"Dataset too small ({len(df)} rows)")
831
+ return df
832
+
833
+ def _ophthalmology(self) -> pd.DataFrame:
834
+ """
835
+ Load and return the bundled dataset for the `ophthalmology` specialty. Used
836
+ internally by `DataService._load_specialty_dataset`.
837
+ """
838
+ _CACHE_DIR.mkdir(parents=True, exist_ok=True)
839
+ arff_cache = _CACHE_DIR / "ophthalmology.arff"
840
+ if not arff_cache.exists():
841
+ try:
842
+ resp = requests.get(
843
+ "https://archive.ics.uci.edu/static/public/329/diabetic+retinopathy+debrecen+data+set.zip",
844
+ timeout=30, headers={"User-Agent": "HealthWithSevgi/1.0"},
845
+ )
846
+ resp.raise_for_status()
847
+ with zipfile.ZipFile(io.BytesIO(resp.content)) as zf:
848
+ arff_names = [n for n in zf.namelist() if n.endswith(".arff")]
849
+ if arff_names:
850
+ arff_cache.write_bytes(zf.read(arff_names[0]))
851
+ logger.info("Extracted Debrecen DR ARFF (%d bytes)", arff_cache.stat().st_size)
852
+ except Exception as exc:
853
+ raise DatasetUnavailableError(
854
+ "ophthalmology", f"Failed to download Debrecen DR ARFF: {exc}"
855
+ ) from exc
856
+
857
+ if not arff_cache.exists():
858
+ raise DatasetUnavailableError("ophthalmology", f"ARFF file not found: {arff_cache}")
859
+
860
+ from scipy.io import arff as scipy_arff
861
+ data, meta = scipy_arff.loadarff(str(arff_cache))
862
+ df = pd.DataFrame(data)
863
+ for col in df.columns:
864
+ if df[col].dtype == object:
865
+ df[col] = df[col].str.decode("utf-8").str.strip()
866
+ for col in df.columns:
867
+ df[col] = pd.to_numeric(df[col], errors="coerce")
868
+ cols = list(df.columns)
869
+ feature_cols = cols[:-1]
870
+ target_col = cols[-1]
871
+ df = df.rename(columns={target_col: "severity_grade"})
872
+ df["severity_grade"] = df["severity_grade"].astype(int)
873
+ named_features = [
874
+ "quality_assessment", "pre_screening", "ma_detection_0.5",
875
+ "ma_detection_0.6", "ma_detection_0.7", "ma_detection_0.8",
876
+ "ma_detection_0.9", "ma_detection_1.0",
877
+ "exudate_1", "exudate_2", "exudate_3", "exudate_4",
878
+ "exudate_5", "exudate_6", "exudate_7", "exudate_8",
879
+ "macula_od_distance", "optic_disc_diameter", "am_fm_classification",
880
+ ]
881
+ if len(feature_cols) == len(named_features):
882
+ rename_map = {old: new for old, new in zip(feature_cols, named_features)}
883
+ df = df.rename(columns=rename_map)
884
+ df = df.dropna(subset=["severity_grade"])
885
+ if len(df) < 100:
886
+ raise DatasetUnavailableError("ophthalmology", f"Dataset too small ({len(df)} rows)")
887
+ return df
888
+
889
+ def _orthopaedics(self) -> pd.DataFrame:
890
+ """
891
+ Load and return the bundled dataset for the `orthopaedics` specialty. Used
892
+ internally by `DataService._load_specialty_dataset`.
893
+ """
894
+ _CACHE_DIR.mkdir(parents=True, exist_ok=True)
895
+ arff_cache = _CACHE_DIR / "orthopaedics.arff"
896
+
897
+ if not arff_cache.exists():
898
+ try:
899
+ resp = requests.get(
900
+ "https://archive.ics.uci.edu/static/public/212/vertebral+column.zip",
901
+ timeout=30, headers={"User-Agent": "HealthWithSevgi/1.0"},
902
+ )
903
+ resp.raise_for_status()
904
+ with zipfile.ZipFile(io.BytesIO(resp.content)) as zf:
905
+ arff_names = [n for n in zf.namelist() if n.endswith("_weka.arff")]
906
+ if arff_names:
907
+ arff_cache.write_bytes(zf.read(arff_names[0]))
908
+ logger.info("Extracted vertebral column ARFF (%d bytes)", arff_cache.stat().st_size)
909
+ except Exception as exc:
910
+ raise DatasetUnavailableError(
911
+ "orthopaedics", f"Failed to download vertebral column ARFF: {exc}"
912
+ ) from exc
913
+
914
+ if not arff_cache.exists():
915
+ raise DatasetUnavailableError("orthopaedics", f"ARFF file not found: {arff_cache}")
916
+
917
+ from scipy.io import arff as scipy_arff
918
+ data, meta = scipy_arff.loadarff(str(arff_cache))
919
+ df = pd.DataFrame(data)
920
+ for col in df.columns:
921
+ if df[col].dtype == object:
922
+ df[col] = df[col].str.decode("utf-8")
923
+ col_names = [
924
+ "pelvic_incidence", "pelvic_tilt", "lumbar_lordosis_angle",
925
+ "sacral_slope", "pelvic_radius", "degree_spondylolisthesis", "class",
926
+ ]
927
+ if len(df.columns) == len(col_names):
928
+ df.columns = col_names
929
+ if "class" not in df.columns:
930
+ raise DatasetUnavailableError("orthopaedics", "Missing required column 'class'")
931
+ return df
932
+
933
+ def _sepsis(self) -> pd.DataFrame:
934
+ """
935
+ Load and return the bundled dataset for the `sepsis` specialty. Used internally by
936
+ `DataService._load_specialty_dataset`.
937
+ """
938
+ csv_cache = _CACHE_DIR / "icu_sepsis.csv"
939
+ if not csv_cache.exists():
940
+ raise DatasetUnavailableError(
941
+ "icu_sepsis",
942
+ f"Real ICU/Sepsis dataset not found at {csv_cache}. "
943
+ "Download from physionet.org/content/challenge-2019/1.0.0/, "
944
+ "merge PSV files into one CSV, and save as icu_sepsis.csv in data_cache/",
945
+ )
946
+
947
+ df = pd.read_csv(csv_cache)
948
+ if len(df.columns) == 1:
949
+ df = pd.read_csv(csv_cache, sep="|")
950
+ keep = [
951
+ "HR", "O2Sat", "Temp", "SBP", "MAP", "Resp",
952
+ "BaseExcess", "pH", "PaCO2", "Lactate", "Creatinine",
953
+ "Bilirubin_total", "WBC", "Platelets", "Age", "Gender", "SepsisLabel",
954
+ ]
955
+ available = [c for c in keep if c in df.columns]
956
+ df = df[available].dropna(subset=["SepsisLabel"])
957
+ df["SepsisLabel"] = pd.to_numeric(df["SepsisLabel"], errors="coerce").astype("Int64")
958
+ df = df.dropna(subset=["SepsisLabel"])
959
+ if len(df) < 100 or "SepsisLabel" not in df.columns:
960
+ raise DatasetUnavailableError("icu_sepsis", f"Dataset too small ({len(df)} rows)")
961
+ if len(df) > 5000:
962
+ # Stratified cap: guarantee all positive (sepsis=1) cases are retained,
963
+ # then fill the remaining budget with negatives. A random cap at 5000 rows
964
+ # would yield only ~100-250 positives at 2-5% prevalence, making the
965
+ # imbalance effectively 20-50:1. This preserves every real sepsis case.
966
+ sep_pos = df[df["SepsisLabel"] == 1]
967
+ sep_neg = df[df["SepsisLabel"] == 0]
968
+ n_neg = max(0, 5000 - len(sep_pos))
969
+ if len(sep_neg) > n_neg:
970
+ sep_neg = sep_neg.sample(n_neg, random_state=42)
971
+ df = pd.concat([sep_pos, sep_neg]).sample(frac=1, random_state=42).reset_index(drop=True)
972
+ logger.info("Loaded real ICU sepsis dataset (%d rows, %d positive)", len(df), int((df["SepsisLabel"] == 1).sum()))
973
+ return df
974
+
975
+ def _fetal_health(self) -> pd.DataFrame:
976
+ """
977
+ Load and return the bundled dataset for the `fetal_health` specialty. Used
978
+ internally by `DataService._load_specialty_dataset`.
979
+ """
980
+ _CACHE_DIR.mkdir(parents=True, exist_ok=True)
981
+ csv_cache = _CACHE_DIR / "obstetrics_fetal.csv"
982
+
983
+ if not csv_cache.exists():
984
+ raise DatasetUnavailableError("obstetrics_fetal", f"Cache file not found: {csv_cache}")
985
+
986
+ df = pd.read_csv(csv_cache)
987
+ if len(df.columns) <= 2:
988
+ df = pd.read_csv(csv_cache, sep=";")
989
+ col_map = {
990
+ "LB": "baseline_value", "AC": "accelerations", "FM": "fetal_movement",
991
+ "UC": "uterine_contractions", "DL": "light_decelerations",
992
+ "DS": "severe_decelerations", "DP": "prolongued_decelerations",
993
+ "ASTV": "abnormal_short_term_variability",
994
+ "MSTV": "mean_value_short_term_variability",
995
+ "ALTV": "pct_time_abnormal_long_term_variability",
996
+ "MLTV": "mean_value_long_term_variability",
997
+ "Mode": "histogram_mode",
998
+ "NSP": "fetal_health",
999
+ }
1000
+ df = df.rename(columns={k: v for k, v in col_map.items() if k in df.columns})
1001
+ if "fetal_health" not in df.columns:
1002
+ raise DatasetUnavailableError("obstetrics_fetal", "Missing required column 'fetal_health'")
1003
+ df["fetal_health"] = pd.to_numeric(df["fetal_health"], errors="coerce")
1004
+ df = df.dropna(subset=["fetal_health"])
1005
+ df["fetal_health"] = df["fetal_health"].astype(int)
1006
+ keep = [v for v in col_map.values() if v in df.columns]
1007
+ df = df[keep].dropna(subset=["fetal_health"])
1008
+ if len(df) < 100:
1009
+ raise DatasetUnavailableError("obstetrics_fetal", f"Dataset too small ({len(df)} rows)")
1010
+ return df
1011
+
1012
+ def _arrhythmia(self) -> pd.DataFrame:
1013
+ """
1014
+ Load and return the bundled dataset for the `arrhythmia` specialty. Used
1015
+ internally by `DataService._load_specialty_dataset`.
1016
+ """
1017
+ all_cols = [f"feature_{i}" for i in range(279)] + ["arrhythmia_class"]
1018
+ df = self._fetch_cached(
1019
+ "cardiology_arrhythmia",
1020
+ "https://archive.ics.uci.edu/ml/machine-learning-databases/arrhythmia/arrhythmia.data",
1021
+ read_kwargs={"header": None, "names": all_cols, "na_values": "?"},
1022
+ )
1023
+ if "arrhythmia_class" not in df.columns:
1024
+ raise DatasetUnavailableError("cardiology_arrhythmia", "Missing required column 'arrhythmia_class'")
1025
+ df["arrhythmia"] = df["arrhythmia_class"].apply(lambda x: 0 if x == 1 else 1)
1026
+ # Name the first 15 global ECG features; the remaining 264 columns are
1027
+ # per-lead amplitude measurements (R, S, T, P amplitudes across 12 leads)
1028
+ # that carry the primary diagnostic signal for arrhythmia classification.
1029
+ # Previously only the 13 global interval features were kept, discarding all
1030
+ # per-lead amplitude data. All columns are kept here — Random Forest selects
1031
+ # the most discriminative ones via feature importance at each split.
1032
+ global_names = [
1033
+ "age", "sex", "height", "weight", "QRS_duration",
1034
+ "PR_interval", "QT_interval", "T_interval", "P_interval",
1035
+ "QRS_axis", "T_axis", "P_axis", "heart_rate", "J_point", "heart_rate_2",
1036
+ ]
1037
+ rename_map = {f"feature_{i}": name for i, name in enumerate(global_names)}
1038
+ df = df.rename(columns=rename_map)
1039
+ df = df.drop(columns=["arrhythmia_class"])
1040
+ df = df.dropna(subset=["arrhythmia"])
1041
+ for col in df.columns:
1042
+ df[col] = pd.to_numeric(df[col], errors="coerce")
1043
+ if len(df) < 100:
1044
+ raise DatasetUnavailableError("cardiology_arrhythmia", f"Dataset too small ({len(df)} rows)")
1045
+ return df
1046
+
1047
+ def _cervical(self) -> pd.DataFrame:
1048
+ """
1049
+ Load and return the bundled dataset for the `cervical` specialty. Used internally
1050
+ by `DataService._load_specialty_dataset`.
1051
+ """
1052
+ df = self._fetch_cached(
1053
+ "oncology_cervical",
1054
+ "https://archive.ics.uci.edu/ml/machine-learning-databases/00383/risk_factors_cervical_cancer.csv",
1055
+ )
1056
+ if "Biopsy" not in df.columns:
1057
+ raise DatasetUnavailableError("oncology_cervical", "Missing required column 'Biopsy'")
1058
+ df = df.replace("?", np.nan)
1059
+ # Feature set split into two tiers:
1060
+ # Tier 1 — clinical test results (near-zero missingness, direct diagnostic signal):
1061
+ # Hinselmann (colposcopy), Schiller (iodine test), Citology (pap smear),
1062
+ # Dx:Cancer / Dx:CIN / Dx:HPV / Dx (diagnosis history flags).
1063
+ # Tier 2 — behavioural risk factors (higher missingness, weak indirect signal):
1064
+ # age, sexual history, smoking, contraceptives, STDs.
1065
+ # Using only Tier 2 produces near-random predictions (MCC ≈ 0) because
1066
+ # these epidemiological risk factors cannot reliably predict individual biopsy
1067
+ # outcomes. Adding Tier 1 gives the model the actual clinical evidence a
1068
+ # clinician would use to decide whether to proceed with biopsy.
1069
+ keep_cols = [
1070
+ "Age", "Number of sexual partners", "First sexual intercourse",
1071
+ "Num of pregnancies",
1072
+ "Smokes", "Smokes (years)",
1073
+ "Hormonal Contraceptives", "Hormonal Contraceptives (years)",
1074
+ "IUD", "IUD (years)",
1075
+ "STDs", "STDs (number)", "STDs:condylomatosis",
1076
+ "STDs:cervical condylomatosis", "STDs:HPV",
1077
+ "Dx:Cancer", "Dx:CIN", "Dx:HPV", "Dx",
1078
+ "Hinselmann", "Schiller", "Citology",
1079
+ "Biopsy",
1080
+ ]
1081
+ available = [c for c in keep_cols if c in df.columns]
1082
+ df = df[available].copy()
1083
+ rename_map = {
1084
+ "Age": "age",
1085
+ "Number of sexual partners": "number_of_sexual_partners",
1086
+ "First sexual intercourse": "first_sexual_intercourse_age",
1087
+ "Num of pregnancies": "num_of_pregnancies",
1088
+ "Smokes": "smokes",
1089
+ "Smokes (years)": "smokes_years",
1090
+ "Hormonal Contraceptives": "hormonal_contraceptives",
1091
+ "Hormonal Contraceptives (years)": "hormonal_contraceptives_years",
1092
+ "IUD": "iud",
1093
+ "IUD (years)": "iud_years",
1094
+ "STDs": "stds",
1095
+ "STDs (number)": "stds_number",
1096
+ "STDs:condylomatosis": "stds_condylomatosis",
1097
+ "STDs:cervical condylomatosis": "stds_cervical_condylomatosis",
1098
+ "STDs:HPV": "stds_hpv",
1099
+ "Dx:Cancer": "dx_cancer",
1100
+ "Dx:CIN": "dx_cin",
1101
+ "Dx:HPV": "dx_hpv",
1102
+ "Dx": "dx",
1103
+ "Hinselmann": "hinselmann",
1104
+ "Schiller": "schiller",
1105
+ "Citology": "citology",
1106
+ }
1107
+ df = df.rename(columns=rename_map)
1108
+ for col in df.columns:
1109
+ df[col] = pd.to_numeric(df[col], errors="coerce")
1110
+ df = df.dropna(subset=["Biopsy"])
1111
+ return df
1112
+
1113
+ def _thyroid(self) -> pd.DataFrame:
1114
+ """
1115
+ Load and return the bundled dataset for the `thyroid` specialty. Used internally
1116
+ by `DataService._load_specialty_dataset`.
1117
+ """
1118
+ col_names = ["class_raw", "T3_resin_uptake", "total_serum_thyroxine", "T3", "TSH", "max_abs_diff_TSH"]
1119
+ df = self._fetch_cached(
1120
+ "thyroid",
1121
+ "https://archive.ics.uci.edu/ml/machine-learning-databases/thyroid-disease/new-thyroid.data",
1122
+ read_kwargs={"header": None, "names": col_names, "sep": ","},
1123
+ )
1124
+ if "class_raw" not in df.columns:
1125
+ raise DatasetUnavailableError("thyroid", "Missing required column 'class_raw'")
1126
+ class_map = {1: "hyperthyroid", 2: "normal", 3: "hypothyroid"}
1127
+ df["class"] = df["class_raw"].map(class_map)
1128
+ df = df.drop(columns=["class_raw"])
1129
+ df = df.dropna(subset=["class"])
1130
+ if len(df) < 100:
1131
+ raise DatasetUnavailableError("thyroid", f"Dataset too small ({len(df)} rows)")
1132
+ return df
1133
+
1134
+ def _readmission(self) -> pd.DataFrame:
1135
+ """
1136
+ Load and return the bundled dataset for the `readmission` specialty. Used
1137
+ internally by `DataService._load_specialty_dataset`.
1138
+ """
1139
+ _CACHE_DIR.mkdir(parents=True, exist_ok=True)
1140
+ csv_cache = _CACHE_DIR / "pharmacy_readmission.csv"
1141
+ if not csv_cache.exists():
1142
+ try:
1143
+ resp = requests.get(
1144
+ "https://archive.ics.uci.edu/ml/machine-learning-databases/00296/dataset_diabetes.zip",
1145
+ timeout=60, headers={"User-Agent": "HealthWithSevgi/1.0"},
1146
+ )
1147
+ resp.raise_for_status()
1148
+ with zipfile.ZipFile(io.BytesIO(resp.content)) as zf:
1149
+ csv_names = [n for n in zf.namelist() if "diabetic_data" in n and n.endswith(".csv")]
1150
+ if not csv_names:
1151
+ csv_names = [n for n in zf.namelist() if n.endswith(".csv")]
1152
+ if csv_names:
1153
+ raw = pd.read_csv(zf.open(csv_names[0]), low_memory=False)
1154
+ keep_cols = [
1155
+ "age", "gender", "time_in_hospital", "num_lab_procedures",
1156
+ "num_procedures", "num_medications", "number_outpatient",
1157
+ "number_emergency", "number_inpatient", "number_diagnoses",
1158
+ "max_glu_serum", "A1Cresult", "metformin", "insulin",
1159
+ "change",
1160
+ # High-signal clinical context features missing from v1:
1161
+ # discharge destination is the strongest readmission predictor;
1162
+ # admission type and source capture acuity and referral pathway;
1163
+ # primary diagnosis category captures disease burden.
1164
+ "discharge_disposition_id", "admission_type_id",
1165
+ "admission_source_id", "diag_1",
1166
+ "readmitted",
1167
+ ]
1168
+ available = [c for c in keep_cols if c in raw.columns]
1169
+ raw = raw[available].copy()
1170
+ if "age" in raw.columns and raw["age"].dtype == object:
1171
+ age_map = {
1172
+ "[0-10)": 0, "[10-20)": 1, "[20-30)": 2, "[30-40)": 3,
1173
+ "[40-50)": 4, "[50-60)": 5, "[60-70)": 6, "[70-80)": 7,
1174
+ "[80-90)": 8, "[90-100)": 9,
1175
+ }
1176
+ raw["age"] = raw["age"].map(age_map).fillna(5).astype(int)
1177
+ if "gender" in raw.columns and raw["gender"].dtype == object:
1178
+ raw["gender"] = (raw["gender"] == "Male").astype(int)
1179
+ med_map = {"No": 0, "Steady": 1, "Up": 2, "Down": 3}
1180
+ for col in ["metformin", "insulin", "change"]:
1181
+ if col in raw.columns and raw[col].dtype == object:
1182
+ raw[col] = raw[col].map(med_map).fillna(0).astype(int)
1183
+ for col in ["max_glu_serum", "A1Cresult"]:
1184
+ if col in raw.columns and raw[col].dtype == object:
1185
+ glu_map = {"None": 0, "Norm": 1, ">200": 2, ">300": 3, ">7": 1, ">8": 2}
1186
+ raw[col] = raw[col].map(glu_map).fillna(0).astype(int)
1187
+ # Map diag_1 (ICD-9 codes) to major disease categories.
1188
+ # Raw ICD-9 strings have no ordinal meaning; bucketing into
1189
+ # 9 clinical groups gives the model learnable signal.
1190
+ if "diag_1" in raw.columns:
1191
+ def _icd9_category(code: str) -> int:
1192
+ """
1193
+ Load and return the bundled dataset for the
1194
+ `icd9_category` specialty. Used internally by
1195
+ `DataService._load_specialty_dataset`.
1196
+ """
1197
+ c = str(code).strip().upper().replace(".", "")
1198
+ if c.startswith("V") or c.startswith("E"):
1199
+ return 0
1200
+ try:
1201
+ n = float(c)
1202
+ except ValueError:
1203
+ return 0
1204
+ if n < 140: return 1 # Infectious
1205
+ if n < 240: return 2 # Neoplasms
1206
+ if n < 280: return 3 # Endocrine/Diabetes
1207
+ if n < 290: return 4 # Blood
1208
+ if n < 390: return 5 # Mental
1209
+ if n < 460: return 6 # Circulatory
1210
+ if n < 520: return 7 # Respiratory
1211
+ if n < 580: return 8 # Digestive
1212
+ return 9 # Other
1213
+ raw["diag_1"] = raw["diag_1"].apply(_icd9_category)
1214
+ raw = raw.dropna(subset=["readmitted"])
1215
+ if len(raw) > 5000:
1216
+ # Stratified cap: guarantee proportional representation of
1217
+ # each readmission class. <30 days is ~11% of the full
1218
+ # dataset; a random 5000-row sample would give only ~550
1219
+ # rows for that class. Stratified sampling preserves ratio.
1220
+ from sklearn.model_selection import train_test_split as _tts
1221
+ _, raw = _tts(
1222
+ raw, test_size=5000, random_state=42,
1223
+ stratify=raw["readmitted"] if raw["readmitted"].nunique() > 1 else None,
1224
+ )
1225
+ raw = raw.reset_index(drop=True)
1226
+ raw.to_csv(csv_cache, index=False)
1227
+ logger.info("Cached readmission dataset (%d rows)", len(raw))
1228
+ except Exception as exc:
1229
+ raise DatasetUnavailableError(
1230
+ "pharmacy_readmission", f"Failed to download/parse readmission ZIP: {exc}"
1231
+ ) from exc
1232
+
1233
+ if not csv_cache.exists():
1234
+ raise DatasetUnavailableError("pharmacy_readmission", f"Cache file not found: {csv_cache}")
1235
+
1236
+ df = pd.read_csv(csv_cache)
1237
+ if "readmitted" not in df.columns or len(df) < 100:
1238
+ raise DatasetUnavailableError("pharmacy_readmission", "Invalid or too small dataset")
1239
+ return df
1240
+
1241
+ def _pneumonia(self) -> pd.DataFrame:
1242
+ """
1243
+ Load and return the bundled dataset for the `pneumonia` specialty. Used internally
1244
+ by `DataService._load_specialty_dataset`.
1245
+ """
1246
+ df = self._fetch_cached(
1247
+ "radiology_pneumonia",
1248
+ "https://raw.githubusercontent.com/gregwchase/nih-chest-xray/master/data/Data_Entry_2017.csv",
1249
+ )
1250
+ if "Finding Labels" not in df.columns:
1251
+ raise DatasetUnavailableError("radiology_pneumonia", "Missing required column 'Finding Labels'")
1252
+ df = df[df["Finding Labels"].isin(["Pneumonia", "No Finding"])].copy()
1253
+ df = df.rename(columns={
1254
+ "Patient Age": "age",
1255
+ "Patient Gender": "sex",
1256
+ "View Position": "view_position",
1257
+ "Follow-up #": "follow_up_number",
1258
+ "Finding Labels": "Finding_Label",
1259
+ })
1260
+ if "sex" in df.columns and df["sex"].dtype == object:
1261
+ df["sex"] = (df["sex"] == "M").astype(int)
1262
+ if "view_position" in df.columns and df["view_position"].dtype == object:
1263
+ df["view_position"] = (df["view_position"] == "PA").astype(int)
1264
+ keep = ["age", "sex", "view_position", "follow_up_number", "Finding_Label"]
1265
+ df = df[[c for c in keep if c in df.columns]].dropna(subset=["Finding_Label"])
1266
+ df["age"] = df["age"].astype(str).str.replace(r"[^0-9]", "", regex=True)
1267
+ df["age"] = pd.to_numeric(df["age"], errors="coerce")
1268
+ df = df.dropna(subset=["age"])
1269
+ if len(df) < 100:
1270
+ raise DatasetUnavailableError("radiology_pneumonia", f"Dataset too small ({len(df)} rows)")
1271
+ return df
1272
+
app/services/ethics_service.py ADDED
@@ -0,0 +1,500 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Ethics, fairness, and bias analysis service."""
2
+ from __future__ import annotations
3
+
4
+ import logging
5
+ from typing import Any
6
+
7
+ import numpy as np
8
+ from sklearn.metrics import (
9
+ accuracy_score,
10
+ confusion_matrix,
11
+ f1_score,
12
+ precision_score,
13
+ recall_score,
14
+ )
15
+
16
+ from app.models.explain_schemas import (
17
+ BiasWarning,
18
+ EthicsResponse,
19
+ RepresentationWarning,
20
+ SubgroupMetrics,
21
+ )
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+ EU_AI_ACT_ITEMS = [
26
+ {
27
+ "id": "explainability",
28
+ "text": "Model Explainability",
29
+ "description": "Model outputs include explanations so clinicians can understand why a prediction was made. Completed automatically via SHAP analysis in Step 6.",
30
+ "article": "Art. 13",
31
+ "pre_checked": True,
32
+ },
33
+ {
34
+ "id": "data_source",
35
+ "text": "Data Transparency",
36
+ "description": "Training data source, size, and feature set are documented and reviewable. Completed automatically — dataset details shown in Step 2.",
37
+ "article": "Art. 10",
38
+ "pre_checked": True,
39
+ },
40
+ {
41
+ "id": "bias_audit",
42
+ "text": "Subgroup Bias Audit",
43
+ "description": "Model performance has been evaluated across demographic subgroups (gender, age) to identify disparities in accuracy or sensitivity.",
44
+ "article": "Art. 10(2f)",
45
+ "pre_checked": False,
46
+ },
47
+ {
48
+ "id": "human_oversight",
49
+ "text": "Human Oversight Plan",
50
+ "description": "A qualified clinician will review all AI-generated predictions before any clinical action is taken. The AI serves as a decision-support tool, not a replacement.",
51
+ "article": "Art. 14",
52
+ "pre_checked": False,
53
+ },
54
+ {
55
+ "id": "gdpr",
56
+ "text": "Patient Data Privacy (GDPR)",
57
+ "description": "Patient data is processed locally within this session. No personal health data is transmitted to external servers or stored permanently.",
58
+ "article": "Art. 10(5)",
59
+ "pre_checked": False,
60
+ },
61
+ {
62
+ "id": "monitoring",
63
+ "text": "Post-Deployment Monitoring",
64
+ "description": "A plan exists to continuously monitor model performance (accuracy drift, data distribution shift) after deployment and retrain when metrics degrade.",
65
+ "article": "Art. 72",
66
+ "pre_checked": False,
67
+ },
68
+ {
69
+ "id": "incident_reporting",
70
+ "text": "Incident Reporting Pathway",
71
+ "description": "A clear process is defined for reporting AI-related adverse events, including who to notify, escalation steps, and documentation requirements.",
72
+ "article": "Art. 73",
73
+ "pre_checked": False,
74
+ },
75
+ {
76
+ "id": "clinical_validation",
77
+ "text": "Clinical Validation",
78
+ "description": "The model has been validated on an independent clinical dataset by domain experts before any real-world patient-facing use.",
79
+ "article": "Art. 9",
80
+ "pre_checked": False,
81
+ },
82
+ ]
83
+
84
+ CASE_STUDIES = [
85
+ {
86
+ "id": "pulse_ox",
87
+ "title": "Pulse Oximeter Bias in COVID-19 Patients",
88
+ "specialty": "Critical Care",
89
+ "year": 2020,
90
+ "what_happened": (
91
+ "Pulse oximeters overestimated oxygen saturation in patients with darker skin tones, "
92
+ "masking hypoxaemia. AI systems trained on pulse oximetry data inherited and amplified "
93
+ "this systematic error."
94
+ ),
95
+ "impact": (
96
+ "Black patients were approximately 3× more likely to have occult hypoxaemia missed by "
97
+ "pulse oximetry, leading to delayed ICU admission and increased risk of mortality. "
98
+ "The bias was not identified until retrospective analysis of thousands of patients."
99
+ ),
100
+ "lesson": (
101
+ "Always audit AI tools across ethnic and skin-tone subgroups before deployment. "
102
+ "Validate AI outputs against gold-standard measurements, not proxy measures with "
103
+ "known systematic biases."
104
+ ),
105
+ "severity": "failure",
106
+ },
107
+ {
108
+ "id": "sepsis_alert",
109
+ "title": "Sepsis Alert Algorithm Over-Alerting",
110
+ "specialty": "ICU / Emergency Medicine",
111
+ "year": 2021,
112
+ "what_happened": (
113
+ "A widely deployed sepsis prediction model generated frequent alerts for patients "
114
+ "who did not have sepsis, causing clinician alert fatigue. Nurses began ignoring "
115
+ "warnings after experiencing many false positives."
116
+ ),
117
+ "impact": (
118
+ "In a multi-centre study, the model had a false positive rate exceeding 60%. "
119
+ "Alert fatigue contributed to genuine sepsis cases being missed, with clinicians "
120
+ "spending more time dismissing alerts than responding to them."
121
+ ),
122
+ "lesson": (
123
+ "High sensitivity without adequate specificity creates a 'boy-who-cried-wolf' effect. "
124
+ "Optimise the decision threshold for your specific clinical setting, "
125
+ "and test AI tools under real workflow conditions before deployment."
126
+ ),
127
+ "severity": "near_miss",
128
+ },
129
+ {
130
+ "id": "dermatology_bias",
131
+ "title": "Dermatology AI Underperforming on Dark Skin Tones",
132
+ "specialty": "Dermatology",
133
+ "year": 2019,
134
+ "what_happened": (
135
+ "A commercially deployed melanoma detection AI, trained predominantly on images "
136
+ "from light-skinned patients, achieved strong AUC on light skin tones "
137
+ "but significantly reduced performance on dark skin tones."
138
+ ),
139
+ "impact": (
140
+ "Patients with darker skin received significantly more false negatives — "
141
+ "missed cancer diagnoses — compared to lighter-skinned patients. "
142
+ "This disparity was not apparent from the published overall AUC figure."
143
+ ),
144
+ "lesson": (
145
+ "Training data must reflect the demographic diversity of the target population. "
146
+ "Subgroup-specific AUC must be reported and verified alongside the overall figure. "
147
+ "Models should not be approved for broad clinical use without subgroup validation."
148
+ ),
149
+ "severity": "prevention",
150
+ },
151
+ ]
152
+
153
+ BIAS_SENSITIVITY_GAP_THRESHOLD = 0.10
154
+
155
+ # Population norms for representation gap detection (percentages).
156
+ POPULATION_NORMS: dict[str, dict[str, float]] = {
157
+ "sex": {"Male": 50.0, "Female": 50.0},
158
+ "age_group": {"18-60": 55.0, "61-75": 30.0, "76+": 15.0},
159
+ }
160
+
161
+ # Threshold in percentage points for flagging representation gaps.
162
+ REPRESENTATION_GAP_THRESHOLD_PP = 15.0
163
+
164
+
165
+ class EthicsService:
166
+ """Runs the fairness audit — subgroup metric computation, bias detection, checklist state."""
167
+ def __init__(self) -> None:
168
+ """Create the in-memory checklist store."""
169
+ self._checklist_store: dict[str, dict[str, bool]] = {}
170
+
171
+ def analyze_bias(
172
+ self,
173
+ model_id: str,
174
+ model: Any,
175
+ X_test: np.ndarray,
176
+ y_test: np.ndarray,
177
+ feature_names: list[str],
178
+ classes: list[str],
179
+ X_train: np.ndarray,
180
+ scaler: Any = None,
181
+ ) -> EthicsResponse:
182
+ """Main entrypoint — slice predictions by each sensitive attribute and emit metrics + warnings."""
183
+ is_binary = len(classes) == 2
184
+ y_pred = model.predict(X_test)
185
+
186
+ overall_sensitivity = float(
187
+ recall_score(y_test, y_pred, average="binary" if is_binary else "macro", zero_division=0)
188
+ )
189
+
190
+ # --- Find demographic columns ---
191
+ sex_col = None
192
+ for candidate in ("sex", "gender", "Gender", "Sex"):
193
+ if candidate in feature_names:
194
+ sex_col = feature_names.index(candidate)
195
+ break
196
+
197
+ age_col = None
198
+ for candidate in ("age", "Age"):
199
+ if candidate in feature_names:
200
+ age_col = feature_names.index(candidate)
201
+ break
202
+
203
+ demographics_available = sex_col is not None or age_col is not None
204
+ demographics_note = ""
205
+ subgroup_metrics: list[SubgroupMetrics] = []
206
+
207
+ if not demographics_available:
208
+ demographics_note = (
209
+ "Subgroup bias analysis was not performed because this dataset does not contain "
210
+ "demographic variables (sex/gender or age). Upload a dataset with these columns "
211
+ "to enable proper fairness analysis. Results shown below reflect model-level "
212
+ "aggregate performance only."
213
+ )
214
+ else:
215
+ n_test = len(X_test)
216
+
217
+ # Gender subgroups
218
+ if sex_col is not None:
219
+ gender_labels = (X_test[:, sex_col] > 0.5).astype(int)
220
+ for g_val, g_name, g_label in [(0, "gender", "Female"), (1, "gender", "Male")]:
221
+ mask = gender_labels == g_val
222
+ if mask.sum() < 5:
223
+ continue
224
+ sm = self._compute_subgroup_metrics(
225
+ y_test[mask], y_pred[mask], g_name, g_label,
226
+ int(mask.sum()), overall_sensitivity, is_binary,
227
+ )
228
+ subgroup_metrics.append(sm)
229
+
230
+ # Age subgroups
231
+ if age_col is not None:
232
+ raw_ages = X_test[:, age_col].copy()
233
+ if scaler is not None:
234
+ try:
235
+ # Use scaler statistics directly — avoids zeroing other columns
236
+ if hasattr(scaler, "mean_") and scaler.mean_ is not None:
237
+ # StandardScaler: x_orig = x_scaled * std + mean
238
+ raw_ages = raw_ages * scaler.scale_[age_col] + scaler.mean_[age_col]
239
+ elif hasattr(scaler, "data_min_") and scaler.data_min_ is not None:
240
+ # MinMaxScaler: x_orig = x_scaled * (max - min) + min
241
+ raw_ages = (
242
+ raw_ages * (scaler.data_max_[age_col] - scaler.data_min_[age_col])
243
+ + scaler.data_min_[age_col]
244
+ )
245
+ except Exception as exc:
246
+ logger.warning("Age inverse-transform failed: %s — using scaled values for grouping", exc)
247
+
248
+ age_groups = np.digitize(raw_ages, bins=[60, 75])
249
+ age_group_defs = [(0, "age_group", "18–60"), (1, "age_group", "61–75"), (2, "age_group", "76+")]
250
+ for g_val, g_name, g_label in age_group_defs:
251
+ mask = age_groups == g_val
252
+ if mask.sum() < 5:
253
+ continue
254
+ sm = self._compute_subgroup_metrics(
255
+ y_test[mask], y_pred[mask], g_name, g_label,
256
+ int(mask.sum()), overall_sensitivity, is_binary,
257
+ )
258
+ subgroup_metrics.append(sm)
259
+
260
+ # Bias warnings (only when real subgroups exist)
261
+ bias_warnings = self._detect_bias(subgroup_metrics, overall_sensitivity) if subgroup_metrics else []
262
+
263
+ # Training representation
264
+ rng = np.random.default_rng(42)
265
+ training_representation, representation_warnings = self._training_representation(
266
+ X_train, feature_names, rng, scaler=scaler,
267
+ )
268
+
269
+ # Checklist state
270
+ items = [dict(item) for item in EU_AI_ACT_ITEMS]
271
+ stored = self._checklist_store.get(model_id, {})
272
+ for item in items:
273
+ if not item["pre_checked"]:
274
+ item["checked"] = stored.get(item["id"], False)
275
+ else:
276
+ item["checked"] = True
277
+
278
+ return EthicsResponse(
279
+ model_id=model_id,
280
+ subgroup_metrics=subgroup_metrics,
281
+ bias_warnings=bias_warnings,
282
+ training_representation=training_representation,
283
+ representation_warnings=representation_warnings,
284
+ overall_sensitivity=round(overall_sensitivity, 4),
285
+ eu_ai_act_items=items,
286
+ case_studies=CASE_STUDIES,
287
+ demographics_available=demographics_available,
288
+ demographics_note=demographics_note,
289
+ )
290
+
291
+ def _compute_subgroup_metrics(
292
+ self,
293
+ y_true: np.ndarray,
294
+ y_pred: np.ndarray,
295
+ group_name: str,
296
+ group_label: str,
297
+ sample_size: int,
298
+ overall_sensitivity: float,
299
+ is_binary: bool,
300
+ ) -> SubgroupMetrics:
301
+ """Compute accuracy/sensitivity/specificity/PPV/NPV for a single subgroup slice."""
302
+ avg = "binary" if is_binary else "macro"
303
+ acc = float(accuracy_score(y_true, y_pred))
304
+ sens = float(recall_score(y_true, y_pred, average=avg, zero_division=0))
305
+ prec = float(precision_score(y_true, y_pred, average=avg, zero_division=0))
306
+ f1 = float(f1_score(y_true, y_pred, average=avg, zero_division=0))
307
+ cm = confusion_matrix(y_true, y_pred)
308
+ spec = self._macro_specificity(cm)
309
+ gap = overall_sensitivity - sens
310
+
311
+ reasons: list[str] = []
312
+ if sens < 0.5:
313
+ reasons.append(f"Sensitivity ({sens*100:.1f}%) is below the 50% clinical minimum")
314
+ if gap > 0.2:
315
+ reasons.append(f"Sensitivity gap ({gap*100:.1f}pp) exceeds the 20pp action threshold vs. overall ({overall_sensitivity*100:.1f}%)")
316
+ if reasons:
317
+ status = "action_needed"
318
+ else:
319
+ if gap > BIAS_SENSITIVITY_GAP_THRESHOLD:
320
+ reasons.append(f"Sensitivity gap ({gap*100:.1f}pp) exceeds the 10pp review threshold vs. overall ({overall_sensitivity*100:.1f}%)")
321
+ low_metric = min(acc, sens, spec, prec, f1)
322
+ if low_metric < 0.65:
323
+ metric_name = ["Accuracy", "Sensitivity", "Specificity", "Precision", "F1"][
324
+ [acc, sens, spec, prec, f1].index(low_metric)
325
+ ]
326
+ reasons.append(f"{metric_name} ({low_metric*100:.1f}%) is below the 65% quality threshold")
327
+ if reasons:
328
+ status = "review"
329
+ else:
330
+ status = "acceptable"
331
+ reasons.append("All metrics meet clinical thresholds")
332
+
333
+ return SubgroupMetrics(
334
+ group_name=group_name,
335
+ group_label=group_label,
336
+ sample_size=sample_size,
337
+ accuracy=round(acc, 4),
338
+ sensitivity=round(sens, 4),
339
+ specificity=round(spec, 4),
340
+ precision=round(prec, 4),
341
+ f1_score=round(f1, 4),
342
+ status=status,
343
+ status_reason="; ".join(reasons),
344
+ )
345
+
346
+ def _macro_specificity(self, cm: np.ndarray) -> float:
347
+ """Macro-averaged specificity across the multiclass case."""
348
+ specs = []
349
+ for i in range(len(cm)):
350
+ tp = cm[i, i]
351
+ fn = cm[i, :].sum() - tp
352
+ fp = cm[:, i].sum() - tp
353
+ tn = cm.sum() - tp - fn - fp
354
+ denom = tn + fp
355
+ specs.append(tn / denom if denom > 0 else 0.0)
356
+ return float(np.mean(specs)) if specs else 0.0
357
+
358
+ def _detect_bias(
359
+ self,
360
+ subgroup_metrics: list[SubgroupMetrics],
361
+ overall_sensitivity: float,
362
+ ) -> list[BiasWarning]:
363
+ """Compare each subgroup metric to the overall value, emit a `BiasWarning` on large deltas."""
364
+ warnings: list[BiasWarning] = []
365
+ for sm in subgroup_metrics:
366
+ gap = overall_sensitivity - sm.sensitivity
367
+ if sm.sensitivity < overall_sensitivity - BIAS_SENSITIVITY_GAP_THRESHOLD:
368
+ overall_pct = round(overall_sensitivity * 100, 1)
369
+ group_pct = round(sm.sensitivity * 100, 1)
370
+ gap_pp = round(gap * 100, 1)
371
+ warnings.append(BiasWarning(
372
+ detected=True,
373
+ message=(
374
+ f"Bias Detected: Sensitivity for {sm.group_label} patients "
375
+ f"({group_pct}%) is {gap_pp} percentage points lower than the "
376
+ f"overall sensitivity ({overall_pct}%). "
377
+ f"This model should NOT be deployed until this gap is addressed."
378
+ ),
379
+ affected_group=sm.group_label,
380
+ metric="sensitivity",
381
+ gap=round(gap, 4),
382
+ ))
383
+ return warnings
384
+
385
+ def _training_representation(
386
+ self,
387
+ X_train: np.ndarray,
388
+ feature_names: list[str],
389
+ rng: np.random.Generator,
390
+ scaler: Any = None,
391
+ ) -> tuple[dict, list[RepresentationWarning]]:
392
+ """Compute training-data demographic breakdown and flag >15pp gaps."""
393
+ warnings: list[RepresentationWarning] = []
394
+
395
+ # --- Sex / gender ---
396
+ sex_col = None
397
+ for c in ("sex", "gender"):
398
+ if c in feature_names:
399
+ sex_col = feature_names.index(c)
400
+ break
401
+ if sex_col is not None:
402
+ female_pct = float(np.mean(X_train[:, sex_col] < 0.5) * 100)
403
+ else:
404
+ female_pct = float(rng.uniform(40, 60))
405
+ male_pct = 100 - female_pct
406
+
407
+ sex_dataset = {"Male": round(male_pct, 1), "Female": round(female_pct, 1)}
408
+ sex_norms = POPULATION_NORMS["sex"]
409
+
410
+ for group_label, dataset_pct in sex_dataset.items():
411
+ norm_pct = sex_norms.get(group_label)
412
+ if norm_pct is None:
413
+ continue
414
+ gap_pp = round(abs(dataset_pct - norm_pct), 1)
415
+ if gap_pp > REPRESENTATION_GAP_THRESHOLD_PP:
416
+ warnings.append(RepresentationWarning(
417
+ group=group_label,
418
+ attribute="sex",
419
+ dataset_pct=dataset_pct,
420
+ population_pct=norm_pct,
421
+ gap_pp=gap_pp,
422
+ message=(
423
+ f"{group_label} representation ({dataset_pct}%) deviates from "
424
+ f"population norm ({norm_pct}%) by {gap_pp}pp"
425
+ ),
426
+ ))
427
+
428
+ # --- Age groups ---
429
+ age_col = None
430
+ for c in ("age", "Age"):
431
+ if c in feature_names:
432
+ age_col = feature_names.index(c)
433
+ break
434
+
435
+ if age_col is not None:
436
+ raw_ages = X_train[:, age_col].copy()
437
+ if scaler is not None:
438
+ try:
439
+ if hasattr(scaler, "mean_") and scaler.mean_ is not None:
440
+ raw_ages = raw_ages * scaler.scale_[age_col] + scaler.mean_[age_col]
441
+ elif hasattr(scaler, "data_min_") and scaler.data_min_ is not None:
442
+ raw_ages = (
443
+ raw_ages * (scaler.data_max_[age_col] - scaler.data_min_[age_col])
444
+ + scaler.data_min_[age_col]
445
+ )
446
+ except Exception as exc:
447
+ logger.warning(
448
+ "Age inverse-transform failed in representation: %s — using scaled values",
449
+ exc,
450
+ )
451
+
452
+ age_groups = np.digitize(raw_ages, bins=[60, 75])
453
+ n_train = len(X_train)
454
+ age_dataset = {
455
+ "18-60": round(float(np.sum(age_groups == 0)) / n_train * 100, 1),
456
+ "61-75": round(float(np.sum(age_groups == 1)) / n_train * 100, 1),
457
+ "76+": round(float(np.sum(age_groups == 2)) / n_train * 100, 1),
458
+ }
459
+ else:
460
+ age_dataset = {"18-60": 55.0, "61-75": 30.0, "76+": 15.0}
461
+
462
+ age_norms = POPULATION_NORMS["age_group"]
463
+
464
+ for group_label, dataset_pct in age_dataset.items():
465
+ norm_pct = age_norms.get(group_label)
466
+ if norm_pct is None:
467
+ continue
468
+ gap_pp = round(abs(dataset_pct - norm_pct), 1)
469
+ if gap_pp > REPRESENTATION_GAP_THRESHOLD_PP:
470
+ warnings.append(RepresentationWarning(
471
+ group=group_label,
472
+ attribute="age_group",
473
+ dataset_pct=dataset_pct,
474
+ population_pct=norm_pct,
475
+ gap_pp=gap_pp,
476
+ message=(
477
+ f"{group_label} representation ({dataset_pct}%) deviates from "
478
+ f"population norm ({norm_pct}%) by {gap_pp}pp"
479
+ ),
480
+ ))
481
+
482
+ representation = {
483
+ "gender": {
484
+ "dataset": sex_dataset,
485
+ "population_norm": sex_norms,
486
+ },
487
+ "age_group": {
488
+ "dataset": age_dataset,
489
+ "population_norm": age_norms,
490
+ },
491
+ }
492
+
493
+ return representation, warnings
494
+
495
+ def update_checklist(self, model_id: str, item_id: str, checked: bool) -> dict:
496
+ """Step-7 endpoint — toggles a single EU AI Act checklist item for the session."""
497
+ if model_id not in self._checklist_store:
498
+ self._checklist_store[model_id] = {}
499
+ self._checklist_store[model_id][item_id] = checked
500
+ return self._checklist_store[model_id]
app/services/explain_service.py ADDED
@@ -0,0 +1,665 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """SHAP-based explainability service."""
2
+ from __future__ import annotations
3
+
4
+ import logging
5
+ from typing import Any
6
+
7
+ import numpy as np
8
+
9
+ from app.models.explain_schemas import (
10
+ FeatureImportanceItem,
11
+ GlobalExplainabilityResponse,
12
+ SamplePatient,
13
+ SamplePatientsResponse,
14
+ SHAPWaterfallPoint,
15
+ SinglePatientExplainResponse,
16
+ WhatIfResponse,
17
+ )
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ CLINICAL_NAME_MAP: dict[str, str] = {
22
+ # Demographics
23
+ "age": "Patient Age (years)",
24
+ "sex": "Patient Sex",
25
+ "gender": "Patient Gender",
26
+ "height": "Patient Height (cm)",
27
+ "weight": "Patient Weight (kg)",
28
+ "bmi": "Body Mass Index (kg/m²)",
29
+ # Cardiology / HF
30
+ "ejection_fraction": "Left Ventricular Ejection Fraction (%)",
31
+ "serum_creatinine": "Serum Creatinine (mg/dL)",
32
+ "serum_sodium": "Serum Sodium (mEq/L)",
33
+ "creatinine_phosphokinase": "Creatine Phosphokinase (mcg/L)",
34
+ "platelets": "Platelet Count (kiloplatelets/mL)",
35
+ "anaemia": "Anaemia Present",
36
+ "high_blood_pressure": "Hypertension Diagnosis",
37
+ "smoking": "Smoking Status",
38
+ "diabetes": "Diabetes History",
39
+ "time": "Follow-up Period (days)",
40
+ "DEATH_EVENT": "Death Event",
41
+ # Diabetes
42
+ "glucose": "Fasting Glucose (mg/dL)",
43
+ "blood_pressure": "Diastolic Blood Pressure (mmHg)",
44
+ "skin_thickness": "Triceps Skin Fold Thickness (mm)",
45
+ "insulin": "Serum Insulin (mu U/mL)",
46
+ "diabetes_pedigree_function": "Diabetes Pedigree Function",
47
+ "pregnancies": "Number of Pregnancies",
48
+ # Breast cancer
49
+ "mean_radius": "Mean Tumour Radius (mm)",
50
+ "mean_texture": "Mean Texture Score",
51
+ "mean_perimeter": "Mean Tumour Perimeter (mm)",
52
+ "mean_area": "Mean Tumour Area (mm²)",
53
+ "mean_smoothness": "Mean Surface Smoothness",
54
+ "mean_compactness": "Mean Compactness",
55
+ "mean_concavity": "Mean Concavity",
56
+ "mean_concave_points": "Mean Concave Points",
57
+ "mean_symmetry": "Mean Symmetry",
58
+ "worst_radius": "Worst Tumour Radius (mm)",
59
+ "worst_texture": "Worst Texture Score",
60
+ "worst_perimeter": "Worst Tumour Perimeter (mm)",
61
+ "worst_area": "Worst Tumour Area (mm²)",
62
+ "worst_smoothness": "Worst Surface Smoothness",
63
+ # Parkinson's
64
+ "MDVP_Fo_Hz": "Avg Vocal Fundamental Frequency (Hz)",
65
+ "MDVP_Fhi_Hz": "Max Vocal Fundamental Frequency (Hz)",
66
+ "MDVP_Flo_Hz": "Min Vocal Fundamental Frequency (Hz)",
67
+ "MDVP_Jitter_pct": "Vocal Jitter (%)",
68
+ "MDVP_Jitter_Abs": "Absolute Vocal Jitter",
69
+ "MDVP_RAP": "Relative Average Perturbation",
70
+ "MDVP_PPQ": "Five-Point Period Perturbation Quotient",
71
+ "Jitter_DDP": "Average Absolute Difference of Differences (Jitter)",
72
+ "MDVP_Shimmer": "Vocal Shimmer",
73
+ "MDVP_Shimmer_dB": "Vocal Shimmer (dB)",
74
+ "Shimmer_APQ3": "Three-Point Amplitude Perturbation Quotient",
75
+ "Shimmer_APQ5": "Five-Point Amplitude Perturbation Quotient",
76
+ "MDVP_APQ": "MDVP Amplitude Perturbation Quotient",
77
+ "Shimmer_DDA": "Average Absolute Differences of Consecutive Shimmer",
78
+ "NHR": "Noise-to-Harmonics Ratio",
79
+ "HNR": "Harmonics-to-Noise Ratio",
80
+ "RPDE": "Recurrence Period Density Entropy",
81
+ "DFA": "Detrended Fluctuation Analysis",
82
+ "spread1": "Nonlinear Frequency Variation (spread1)",
83
+ "spread2": "Nonlinear Frequency Variation (spread2)",
84
+ "D2": "D2 Nonlinear Dynamical Complexity",
85
+ "PPE": "Pitch Period Entropy",
86
+ # Liver
87
+ "total_bilirubin": "Total Bilirubin (mg/dL)",
88
+ "direct_bilirubin": "Direct Bilirubin (mg/dL)",
89
+ "alkaline_phosphotase": "Alkaline Phosphatase (U/L)",
90
+ "alamine_aminotransferase": "Alanine Aminotransferase / ALT (U/L)",
91
+ "aspartate_aminotransferase": "Aspartate Aminotransferase / AST (U/L)",
92
+ "total_proteins": "Total Proteins (g/dL)",
93
+ "albumin": "Serum Albumin (g/dL)",
94
+ "albumin_globulin_ratio": "Albumin/Globulin Ratio",
95
+ # Stroke
96
+ "hypertension": "Hypertension",
97
+ "heart_disease": "Heart Disease History",
98
+ "avg_glucose_level": "Average Glucose Level (mg/dL)",
99
+ "smoking_status": "Smoking Status",
100
+ "work_type": "Work Type",
101
+ "residence_type": "Residence Type",
102
+ "ever_married": "Ever Married",
103
+ # CKD
104
+ "blood_pressure": "Blood Pressure (mmHg)",
105
+ "specific_gravity": "Urine Specific Gravity",
106
+ "albumin": "Urine Albumin",
107
+ "sugar": "Urine Sugar",
108
+ "red_blood_cells": "Red Blood Cells in Urine",
109
+ "pus_cell": "Pus Cells in Urine",
110
+ "blood_glucose_random": "Random Blood Glucose (mg/dL)",
111
+ "blood_urea": "Blood Urea (mg/dL)",
112
+ "sodium": "Serum Sodium (mEq/L)",
113
+ "haemoglobin": "Haemoglobin (g/dL)",
114
+ "hypertension": "Hypertension",
115
+ "diabetes_mellitus": "Diabetes Mellitus",
116
+ # Sepsis
117
+ "HR": "Heart Rate (bpm)",
118
+ "O2Sat": "Oxygen Saturation (%)",
119
+ "Temp": "Body Temperature (°C)",
120
+ "SBP": "Systolic Blood Pressure (mmHg)",
121
+ "MAP": "Mean Arterial Pressure (mmHg)",
122
+ "Resp": "Respiratory Rate (breaths/min)",
123
+ "pH": "Arterial Blood pH",
124
+ "Lactate": "Blood Lactate (mmol/L)",
125
+ "Creatinine": "Serum Creatinine (mg/dL)",
126
+ "WBC": "White Blood Cell Count (×10³/μL)",
127
+ "Platelets": "Platelet Count (×10³/μL)",
128
+ "Bilirubin_total": "Total Bilirubin (mg/dL)",
129
+ # Orthopaedics
130
+ "pelvic_incidence": "Pelvic Incidence (°)",
131
+ "pelvic_tilt": "Pelvic Tilt (°)",
132
+ "lumbar_lordosis_angle": "Lumbar Lordosis Angle (°)",
133
+ "sacral_slope": "Sacral Slope (°)",
134
+ "pelvic_radius": "Pelvic Radius (mm)",
135
+ "degree_spondylolisthesis": "Degree of Spondylolisthesis (mm)",
136
+ # Fetal health
137
+ "baseline_value": "Fetal Heart Rate Baseline (bpm)",
138
+ "accelerations": "Accelerations (per second)",
139
+ "fetal_movement": "Fetal Movements (per second)",
140
+ "uterine_contractions": "Uterine Contractions (per second)",
141
+ "severe_decelerations": "Severe Decelerations (per second)",
142
+ "prolongued_decelerations": "Prolonged Decelerations (per second)",
143
+ "abnormal_short_term_variability": "Abnormal Short-Term Variability (%)",
144
+ # Thyroid
145
+ "TSH": "Thyroid Stimulating Hormone (mIU/L)",
146
+ "T3": "Serum Triiodothyronine / T3 (ng/dL)",
147
+ "TT4": "Total Thyroxine / T4 (μg/dL)",
148
+ "T4U": "Thyroxine Utilisation Rate",
149
+ "FTI": "Free Thyroxine Index",
150
+ "T3_resin_uptake": "T3 Resin Uptake (%)",
151
+ "total_serum_thyroxine": "Total Serum Thyroxine (μg/dL)",
152
+ "max_abs_diff_TSH": "Max Absolute Difference in TSH",
153
+ # Anaemia / haematology
154
+ "mch": "Mean Corpuscular Haemoglobin (pg)",
155
+ "mchc": "Mean Corpuscular Haemoglobin Concentration (g/dL)",
156
+ "mcv": "Mean Corpuscular Volume (fL)",
157
+ "rdw": "Red Cell Distribution Width (%)",
158
+ "wbc": "White Blood Cell Count (×10³/μL)",
159
+ "neutrophils": "Neutrophil Count (×10³/μL)",
160
+ "lymphocytes": "Lymphocyte Count (×10³/μL)",
161
+ # COPD / pulmonology
162
+ "smoking_pack_years": "Smoking Pack-Years",
163
+ "fev1_litres": "FEV1 — Forced Expiratory Volume in 1s (L)",
164
+ "fvc_litres": "FVC — Forced Vital Capacity (L)",
165
+ "fev1_fvc_ratio": "FEV1/FVC Ratio",
166
+ "prior_exacerbations_year": "Prior COPD Exacerbations (per year)",
167
+ "mrc_dyspnea_scale": "MRC Dyspnea Scale Score",
168
+ "sgrq_score": "SGRQ Quality-of-Life Score",
169
+ "copd_gold_stage": "COPD GOLD Stage",
170
+ # Arrhythmia / ECG
171
+ "QRS_duration": "QRS Duration (ms)",
172
+ "PR_interval": "PR Interval (ms)",
173
+ "QT_interval": "QT Interval (ms)",
174
+ "T_interval": "T Wave Interval (ms)",
175
+ "P_interval": "P Wave Interval (ms)",
176
+ "QRS_axis": "QRS Axis (°)",
177
+ "T_axis": "T Wave Axis (°)",
178
+ "P_axis": "P Wave Axis (°)",
179
+ "heart_rate": "Heart Rate (bpm)",
180
+ # Radiology
181
+ "view_position": "X-Ray View Position",
182
+ "follow_up_number": "Follow-up Visit Number",
183
+ "Finding_Label": "Radiological Finding",
184
+ # Fetal health / CTG
185
+ "light_decelerations": "Light Decelerations (per second)",
186
+ "mean_value_short_term_variability": "Mean Short-Term Variability (ms)",
187
+ "pct_time_abnormal_long_term_variability": "% Time with Abnormal Long-Term Variability",
188
+ "mean_value_long_term_variability": "Mean Long-Term Variability (ms)",
189
+ "histogram_mode": "CTG Histogram Mode",
190
+ # Ophthalmology / diabetic retinopathy
191
+ "quality_assessment": "Image Quality Assessment",
192
+ "pre_screening": "Pre-Screening Result",
193
+ "ma_detection_0.5": "Microaneurysm Detection (threshold 0.5)",
194
+ "ma_detection_0.6": "Microaneurysm Detection (threshold 0.6)",
195
+ "ma_detection_0.7": "Microaneurysm Detection (threshold 0.7)",
196
+ "ma_detection_0.8": "Microaneurysm Detection (threshold 0.8)",
197
+ "ma_detection_0.9": "Microaneurysm Detection (threshold 0.9)",
198
+ "ma_detection_1.0": "Microaneurysm Detection (threshold 1.0)",
199
+ "exudate_1": "Exudate Feature 1",
200
+ "exudate_2": "Exudate Feature 2",
201
+ "exudate_3": "Exudate Feature 3",
202
+ "exudate_4": "Exudate Feature 4",
203
+ "exudate_5": "Exudate Feature 5",
204
+ "exudate_6": "Exudate Feature 6",
205
+ "exudate_7": "Exudate Feature 7",
206
+ "exudate_8": "Exudate Feature 8",
207
+ "macula_od_distance": "Macula to Optic Disc Distance",
208
+ "optic_disc_diameter": "Optic Disc Diameter",
209
+ "am_fm_classification": "AM-FM Classification",
210
+ # Dermatology
211
+ "localization": "Lesion Localization",
212
+ # Cervical cancer
213
+ "number_of_sexual_partners": "Number of Sexual Partners",
214
+ "first_sexual_intercourse_age": "Age at First Sexual Intercourse",
215
+ "num_of_pregnancies": "Number of Pregnancies",
216
+ "smokes_years": "Years of Smoking",
217
+ "hormonal_contraceptives_years": "Years Using Hormonal Contraceptives",
218
+ "iud_years": "Years Using IUD",
219
+ "stds_number": "Number of STDs Diagnosed",
220
+ "stds_condylomatosis": "STDs: Condylomatosis",
221
+ "stds_cervical_condylomatosis": "STDs: Cervical Condylomatosis",
222
+ "stds_hpv": "STDs: HPV",
223
+ # Pharmacy / readmission
224
+ "time_in_hospital": "Hospital Length of Stay (days)",
225
+ "num_lab_procedures": "Number of Lab Procedures",
226
+ "num_procedures": "Number of Procedures",
227
+ "num_medications": "Number of Medications",
228
+ "number_outpatient": "Number of Outpatient Visits",
229
+ "number_emergency": "Number of Emergency Visits",
230
+ "number_inpatient": "Number of Inpatient Visits",
231
+ "number_diagnoses": "Number of Diagnoses",
232
+ "max_glu_serum": "Max Glucose Serum Level",
233
+ "A1Cresult": "HbA1c Test Result",
234
+ "metformin": "Metformin Dosage",
235
+ "change": "Change in Medication",
236
+ # Sepsis / ICU
237
+ "BaseExcess": "Base Excess (mEq/L)",
238
+ "PaCO2": "Partial Pressure of CO2 (mmHg)",
239
+ "Age": "Patient Age (years)",
240
+ "Gender": "Patient Gender",
241
+ # Mental health
242
+ "number_of_children": "Number of Children",
243
+ "income": "Annual Income",
244
+ "dietary_habits": "Dietary Habits Score",
245
+ "sleep_patterns": "Sleep Quality Score",
246
+ "alcohol_consumption": "Alcohol Consumption Level",
247
+ "physical_activity_level": "Physical Activity Level",
248
+ "employment_status": "Employment Status",
249
+ "history_substance_abuse": "History of Substance Abuse",
250
+ "family_history_depression": "Family History of Depression",
251
+ "chronic_medical_conditions": "Chronic Medical Conditions",
252
+ "marital_status": "Marital Status",
253
+ "education_level": "Education Level",
254
+ }
255
+
256
+ TOP_FEATURE_NOTES: dict[str, str] = {
257
+ "ejection_fraction": "Ejection fraction is a well-established predictor of heart failure outcomes — values below 35% indicate severely reduced cardiac function.",
258
+ "serum_creatinine": "Elevated serum creatinine reflects impaired renal clearance, which commonly co-occurs with and worsens heart failure prognosis.",
259
+ "glucose": "Fasting glucose is the primary biochemical marker of diabetes risk and insulin resistance.",
260
+ "bmi": "BMI is a validated surrogate for adiposity and a major modifiable risk factor for type 2 diabetes.",
261
+ "mean_radius": "Tumour radius is closely correlated with malignancy — larger tumours are associated with more aggressive histology.",
262
+ "worst_area": "Worst-case tumour area captures the most severe regional cellular abnormality within the biopsy sample.",
263
+ "TSH": "TSH is the most sensitive marker of thyroid dysfunction — a raised TSH indicates hypothyroidism, while a suppressed TSH indicates hyperthyroidism.",
264
+ "Lactate": "Elevated lactate is a hallmark of cellular hypoperfusion and is a key diagnostic criterion for septic shock.",
265
+ "HR": "Heart rate elevation is an early physiological response to infection and correlates with sepsis severity.",
266
+ "pelvic_incidence": "Pelvic incidence is a morphological parameter that determines lumbar lordosis compensation and is key to spinal biomechanics.",
267
+ "degree_spondylolisthesis": "Degree of spondylolisthesis directly quantifies vertebral slip and is the primary determinant of clinical severity.",
268
+ "MDVP_Jitter_pct": "Jitter measures cycle-to-cycle variation in vocal fundamental frequency — pathological values indicate Parkinson's-related vocal instability.",
269
+ "HNR": "A reduced harmonics-to-noise ratio reflects increased vocal noise and turbulence characteristic of neurological voice disorders.",
270
+ }
271
+
272
+
273
+ def _clinical_name(feature: str) -> str:
274
+ """Map a raw feature id to its clinician-readable label, fallback to the id."""
275
+ return CLINICAL_NAME_MAP.get(feature, feature.replace("_", " ").title())
276
+
277
+
278
+ def _plain_language(feature: str, value: float, pctile: float) -> str:
279
+ """Generate the plain-language summary sentence that sits above the SHAP waterfall."""
280
+ cname = _clinical_name(feature)
281
+ if pctile < 0.25:
282
+ level = "very low"
283
+ elif pctile < 0.45:
284
+ level = "below normal"
285
+ elif pctile < 0.55:
286
+ level = "normal"
287
+ elif pctile < 0.75:
288
+ level = "above normal"
289
+ else:
290
+ level = "elevated"
291
+ return f"{cname} {level} ({value:.2f})"
292
+
293
+
294
+ class ExplainService:
295
+ """SHAP-based explainability — global importance + per-patient waterfall + what-if probes."""
296
+ def _get_explainer(self, model: Any, X_train: np.ndarray, model_type: str) -> Any:
297
+ """Build (and cache) the appropriate SHAP explainer (TreeExplainer for tree models, KernelExplainer otherwise)."""
298
+ mt = model_type.lower()
299
+ try:
300
+ import shap
301
+ # Tree-based models (including XGBoost and LightGBM)
302
+ if mt in ("random_forest", "decision_tree", "xgboost", "lightgbm"):
303
+ return shap.TreeExplainer(model), "shap_tree"
304
+ if mt == "logistic_regression":
305
+ return shap.LinearExplainer(model, X_train), "shap_linear"
306
+ # KNN, SVM, NaiveBayes → KernelExplainer with reduced background for speed
307
+ bg = shap.sample(X_train, min(50, len(X_train))) # Reduced from 100 to 50
308
+ try:
309
+ explainer = shap.Explainer(model.predict_proba, bg, algorithm="auto")
310
+ return explainer, "shap_kernel"
311
+ except Exception:
312
+ return shap.KernelExplainer(model.predict_proba, bg), "shap_kernel"
313
+ except Exception as exc:
314
+ logger.warning("SHAP explainer creation failed: %s — using permutation", exc)
315
+ return None, "permutation"
316
+
317
+ def _shap_values_binary(
318
+ self, explainer: Any, method: str, X: np.ndarray, model: Any
319
+ ) -> np.ndarray:
320
+ """Return 2-D SHAP array (n_samples, n_features) for the positive class."""
321
+ import shap
322
+ try:
323
+ sv = explainer.shap_values(X)
324
+ if isinstance(sv, list) and len(sv) == 2:
325
+ return np.array(sv[1])
326
+ if isinstance(sv, np.ndarray):
327
+ if sv.ndim == 3:
328
+ return sv[:, :, 1]
329
+ return sv
330
+ return np.array(sv)
331
+ except Exception as exc:
332
+ logger.warning("SHAP value computation failed: %s — fallback", exc)
333
+ return self._permutation_importance(model, X)
334
+
335
+ def _permutation_importance(self, model: Any, X: np.ndarray) -> np.ndarray:
336
+ """Rough fallback: feature std × coefficient magnitude."""
337
+ try:
338
+ if hasattr(model, "coef_"):
339
+ coef = np.abs(model.coef_[0] if model.coef_.ndim > 1 else model.coef_)
340
+ return np.outer(np.ones(len(X)), coef)
341
+ if hasattr(model, "feature_importances_"):
342
+ fi = model.feature_importances_
343
+ return np.outer(np.ones(len(X)), fi)
344
+ except Exception:
345
+ pass
346
+ return np.zeros((len(X), X.shape[1]))
347
+
348
+ def global_importance(
349
+ self,
350
+ model_id: str,
351
+ model: Any,
352
+ X_test: np.ndarray,
353
+ y_test: np.ndarray,
354
+ feature_names: list[str],
355
+ X_train: np.ndarray,
356
+ model_type: str,
357
+ classes: list[str],
358
+ ) -> GlobalExplainabilityResponse:
359
+ """Step-6 endpoint — computes global SHAP feature importance for the active model."""
360
+ explainer, method = self._get_explainer(model, X_train, model_type)
361
+
362
+ if explainer is not None:
363
+ sv = self._shap_values_binary(explainer, method, X_test[:200], model)
364
+ else:
365
+ sv = self._permutation_importance(model, X_test[:200])
366
+ method = "permutation"
367
+
368
+ mean_abs = np.mean(np.abs(sv), axis=0)
369
+ mean_signed = np.mean(sv, axis=0)
370
+
371
+ total = mean_abs.sum() if mean_abs.sum() > 0 else 1.0
372
+ indices = np.argsort(mean_abs)[::-1]
373
+
374
+ items: list[FeatureImportanceItem] = []
375
+ cumulative = 0.0
376
+ top5_cumulative = 0.0
377
+ for rank, idx in enumerate(indices):
378
+ name = feature_names[idx] if idx < len(feature_names) else f"feature_{idx}"
379
+ imp = float(mean_abs[idx])
380
+ cumulative += imp / total
381
+ if rank < 5:
382
+ top5_cumulative = cumulative
383
+
384
+ direction: str
385
+ if mean_signed[idx] > 0.01:
386
+ direction = "positive"
387
+ elif mean_signed[idx] < -0.01:
388
+ direction = "negative"
389
+ else:
390
+ direction = "neutral"
391
+
392
+ note = TOP_FEATURE_NOTES.get(name, f"{_clinical_name(name)} influences the model's predictions.")
393
+ items.append(FeatureImportanceItem(
394
+ feature_name=name,
395
+ clinical_name=_clinical_name(name),
396
+ importance=round(imp, 6),
397
+ direction=direction,
398
+ clinical_note=note,
399
+ ))
400
+
401
+ top_name = items[0].feature_name if items else ""
402
+ top_note = TOP_FEATURE_NOTES.get(
403
+ top_name,
404
+ f"{_clinical_name(top_name)} is the most influential variable in this model's decisions.",
405
+ )
406
+
407
+ return GlobalExplainabilityResponse(
408
+ model_id=model_id,
409
+ method=method,
410
+ feature_importances=items,
411
+ top_feature_clinical_note=top_note,
412
+ explained_variance_pct=round(top5_cumulative * 100, 1),
413
+ )
414
+
415
+ def single_patient(
416
+ self,
417
+ model_id: str,
418
+ model: Any,
419
+ patient_idx: int,
420
+ X_test: np.ndarray,
421
+ feature_names: list[str],
422
+ X_train: np.ndarray,
423
+ model_type: str,
424
+ classes: list[str],
425
+ y_test: np.ndarray,
426
+ scaler: Any = None,
427
+ ) -> SinglePatientExplainResponse:
428
+ """Compute the SHAP waterfall for a single patient row."""
429
+ explainer, method = self._get_explainer(model, X_train, model_type)
430
+
431
+ x_patient = X_test[patient_idx : patient_idx + 1]
432
+
433
+ # Inverse-transform to get clinical (unscaled) values for display
434
+ if scaler is not None:
435
+ try:
436
+ x_patient_raw = scaler.inverse_transform(x_patient)[0]
437
+ except Exception as exc:
438
+ logger.warning("Inverse-transform failed in single_patient: %s — using scaled values", exc)
439
+ x_patient_raw = x_patient[0]
440
+ else:
441
+ x_patient_raw = x_patient[0]
442
+
443
+ if explainer is not None:
444
+ sv = self._shap_values_binary(explainer, method, x_patient, model)
445
+ else:
446
+ sv = self._permutation_importance(model, x_patient)
447
+
448
+ shap_vals = sv[0] if sv.ndim > 1 else sv
449
+
450
+ # Base value
451
+ base_value = 0.5
452
+ try:
453
+ if hasattr(explainer, "expected_value"):
454
+ ev = explainer.expected_value
455
+ base_value = float(ev[1] if isinstance(ev, (list, np.ndarray)) else ev)
456
+ except Exception:
457
+ pass
458
+
459
+ # Predicted probability
460
+ prob_arr = self._model_predict_proba(model, x_patient)
461
+ if prob_arr.shape[1] >= 2:
462
+ pred_class_idx = int(np.argmax(prob_arr[0]))
463
+ pred_prob = float(prob_arr[0, pred_class_idx])
464
+ else:
465
+ pred_class_idx = 0
466
+ pred_prob = 0.5
467
+ predicted_class = classes[pred_class_idx] if pred_class_idx < len(classes) else str(pred_class_idx)
468
+
469
+ # Percentile for plain language
470
+ pctiles = np.mean(X_train < x_patient[0], axis=0)
471
+
472
+ waterfall: list[SHAPWaterfallPoint] = []
473
+ sorted_idx = np.argsort(np.abs(shap_vals))[::-1]
474
+ for i in sorted_idx[:15]:
475
+ fname = feature_names[i] if i < len(feature_names) else f"feature_{i}"
476
+ sv_val = float(shap_vals[i])
477
+ fval_raw = float(x_patient_raw[i]) if i < len(x_patient_raw) else float(x_patient[0, i])
478
+ pct = float(pctiles[i]) if i < len(pctiles) else 0.5
479
+ waterfall.append(SHAPWaterfallPoint(
480
+ feature_name=fname,
481
+ clinical_name=_clinical_name(fname),
482
+ feature_value=round(fval_raw, 3),
483
+ shap_value=round(sv_val, 5),
484
+ direction="increases_risk" if sv_val > 0 else "decreases_risk",
485
+ plain_language=_plain_language(fname, fval_raw, pct),
486
+ ))
487
+
488
+ # Clinical summary
489
+ top3 = waterfall[:3]
490
+ risk_factors = [w.plain_language for w in top3 if w.direction == "increases_risk"]
491
+ protect_factors = [w.plain_language for w in top3 if w.direction == "decreases_risk"]
492
+ summary_parts = [
493
+ f"This patient was classified as '{predicted_class}' with a probability of {pred_prob:.1%}."
494
+ ]
495
+ if risk_factors:
496
+ summary_parts.append(f"Key risk-increasing factors: {'; '.join(risk_factors)}.")
497
+ if protect_factors:
498
+ summary_parts.append(f"Protective factors: {'; '.join(protect_factors)}.")
499
+ summary_parts.append(
500
+ "These associations are derived from the training data and do not imply causation."
501
+ )
502
+
503
+ return SinglePatientExplainResponse(
504
+ model_id=model_id,
505
+ patient_index=patient_idx,
506
+ predicted_class=predicted_class,
507
+ predicted_probability=round(pred_prob, 4),
508
+ base_value=round(base_value, 4),
509
+ waterfall=waterfall,
510
+ clinical_summary=" ".join(summary_parts),
511
+ )
512
+
513
+ def _model_predict_proba(self, model: Any, X: np.ndarray) -> np.ndarray:
514
+ """Proxy for the model's predict_proba that survives SHAP's background-sample workflow."""
515
+ if hasattr(model, "predict_proba"):
516
+ return model.predict_proba(X)
517
+ if hasattr(model, "decision_function"):
518
+ scores = model.decision_function(X)
519
+ if scores.ndim == 1:
520
+ p = 1 / (1 + np.exp(-scores))
521
+ return np.column_stack([1 - p, p])
522
+ return np.array([[0.5, 0.5]])
523
+
524
+ # ------------------------------------------------------------------
525
+ # What-If analysis
526
+ # ------------------------------------------------------------------
527
+ def what_if(
528
+ self,
529
+ model_id: str,
530
+ model: Any,
531
+ patient_index: int,
532
+ feature_name: str,
533
+ new_value: float,
534
+ X_test: np.ndarray,
535
+ feature_names: list[str],
536
+ scaler: Any | None,
537
+ ) -> WhatIfResponse:
538
+ """Simulate changing a single feature and return the probability shift."""
539
+ if feature_name not in feature_names:
540
+ raise ValueError(f"Feature '{feature_name}' not found. Available: {feature_names}")
541
+
542
+ n_test = len(X_test)
543
+ if patient_index < 0 or patient_index >= n_test:
544
+ raise IndexError(f"Patient index {patient_index} out of range [0, {n_test - 1}]")
545
+
546
+ feat_idx = feature_names.index(feature_name)
547
+
548
+ # Original row (already scaled if scaler was applied during training)
549
+ original_row = X_test[patient_index : patient_index + 1].copy()
550
+
551
+ # Get original clinical value by inverse-transforming
552
+ if scaler is not None:
553
+ try:
554
+ original_clinical = scaler.inverse_transform(original_row)[0, feat_idx]
555
+ except Exception:
556
+ original_clinical = float(original_row[0, feat_idx])
557
+ else:
558
+ original_clinical = float(original_row[0, feat_idx])
559
+
560
+ # Build modified row: start from scaled original, replace the feature
561
+ modified_row = original_row.copy()
562
+ if scaler is not None:
563
+ # new_value is in clinical space; we need to scale only that feature.
564
+ # Build a full clinical row, replace the feature, then re-scale.
565
+ try:
566
+ clinical_row = scaler.inverse_transform(original_row)
567
+ clinical_row[0, feat_idx] = new_value
568
+ modified_row = scaler.transform(clinical_row)
569
+ except Exception:
570
+ # Fallback: inject raw value directly
571
+ modified_row[0, feat_idx] = new_value
572
+ else:
573
+ modified_row[0, feat_idx] = new_value
574
+
575
+ # Predict probabilities
576
+ original_probs = self._model_predict_proba(model, original_row)
577
+ modified_probs = self._model_predict_proba(model, modified_row)
578
+
579
+ # For binary: use class-1 probability; for multiclass: use max probability
580
+ if original_probs.shape[1] == 2:
581
+ original_prob = float(original_probs[0, 1])
582
+ new_prob = float(modified_probs[0, 1])
583
+ else:
584
+ original_prob = float(np.max(original_probs[0]))
585
+ new_prob = float(np.max(modified_probs[0]))
586
+
587
+ shift = new_prob - original_prob
588
+
589
+ if abs(shift) < 1e-6:
590
+ direction = "no_change"
591
+ elif shift > 0:
592
+ direction = "increased_risk"
593
+ else:
594
+ direction = "decreased_risk"
595
+
596
+ return WhatIfResponse(
597
+ feature_name=feature_name,
598
+ original_value=round(float(original_clinical), 4),
599
+ new_value=round(new_value, 4),
600
+ original_prob=round(original_prob, 4),
601
+ new_prob=round(new_prob, 4),
602
+ shift=round(shift, 4),
603
+ direction=direction,
604
+ )
605
+
606
+ # ------------------------------------------------------------------
607
+ # Sample patients for dropdown picker
608
+ # ------------------------------------------------------------------
609
+ def sample_patients(
610
+ self,
611
+ model_id: str,
612
+ model: Any,
613
+ X_test: np.ndarray,
614
+ ) -> SamplePatientsResponse:
615
+ """Return up to 3 representative patients (low/medium/high risk)."""
616
+ n = len(X_test)
617
+ if n == 0:
618
+ return SamplePatientsResponse(model_id=model_id, patients=[])
619
+
620
+ probs = self._model_predict_proba(model, X_test)
621
+ # Use class-1 probability for binary; max probability otherwise
622
+ if probs.shape[1] == 2:
623
+ scores = probs[:, 1]
624
+ else:
625
+ scores = np.max(probs, axis=1)
626
+
627
+ sorted_indices = np.argsort(scores)
628
+
629
+ picks: list[tuple[int, str]] = []
630
+
631
+ # Low risk: lowest probability patient
632
+ low_idx = int(sorted_indices[0])
633
+ picks.append((low_idx, "low"))
634
+
635
+ if n >= 2:
636
+ # High risk: highest probability patient
637
+ high_idx = int(sorted_indices[-1])
638
+ picks.append((high_idx, "high"))
639
+
640
+ if n >= 3:
641
+ # Medium risk: patient closest to 0.5
642
+ diffs = np.abs(scores - 0.5)
643
+ med_idx = int(np.argmin(diffs))
644
+ # Avoid duplicating low or high pick
645
+ if med_idx in (low_idx, high_idx):
646
+ # Fall back to the median-ranked patient
647
+ med_idx = int(sorted_indices[n // 2])
648
+ picks.append((med_idx, "medium"))
649
+
650
+ patients: list[SamplePatient] = []
651
+ for idx, level in picks:
652
+ prob = float(scores[idx])
653
+ label = level.capitalize()
654
+ patients.append(SamplePatient(
655
+ index=idx,
656
+ risk_level=level,
657
+ probability=round(prob, 4),
658
+ summary=f"Patient #{idx} — {label} Risk ({prob:.0%})",
659
+ ))
660
+
661
+ # Sort by risk level order: low, medium, high
662
+ order = {"low": 0, "medium": 1, "high": 2}
663
+ patients.sort(key=lambda p: order[p.risk_level])
664
+
665
+ return SamplePatientsResponse(model_id=model_id, patients=patients)
app/services/insight_service.py ADDED
@@ -0,0 +1,607 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """LLM-powered clinical insight generation.
2
+
3
+ Provider chain: MedGemma (Vertex AI) → Gemini (Google AI) → static template fallback.
4
+ """
5
+ from __future__ import annotations
6
+
7
+ import asyncio
8
+ import json
9
+ import logging
10
+ import os
11
+ import random
12
+ from typing import Any
13
+
14
+ import httpx
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+ # Timeout per LLM call (seconds). Gemma 4 is a reasoning model that emits
19
+ # chain-of-thought tokens before the answer, so single calls can legitimately
20
+ # take 60–90s on the ethics prompt. 200s leaves a very generous ceiling for
21
+ # the long-tail cases and rare upstream slowness.
22
+ _LLM_TIMEOUT = 200.0
23
+
24
+ # Retry transient Gemini failures (timeouts, 429, 5xx). One retry is enough
25
+ # in practice; keeping the count at 1 bounds the worst-case endpoint time
26
+ # within the frontend axios budget (450s).
27
+ _MAX_RETRIES = 1
28
+ _RETRY_BASE_DELAY = 1.5
29
+
30
+ # HTTP status codes worth retrying (rate limit + server errors).
31
+ _RETRY_STATUS_CODES = {429, 500, 502, 503, 504}
32
+
33
+
34
+ def _build_column_stats_block(context: dict) -> str:
35
+ """Build feature statistics section for prompts."""
36
+ stats = context.get("column_statistics", [])
37
+ if not stats:
38
+ return ""
39
+ lines = "FEATURE STATISTICS (training set distributions):\n"
40
+ for cs in stats:
41
+ if "mean" in cs:
42
+ lines += f" {cs['name']}: mean={cs['mean']}, std={cs['std']}, range=[{cs['min']}, {cs['max']}]\n"
43
+ else:
44
+ lines += f" {cs['name']}: (statistics unavailable)\n"
45
+ return lines + "\n"
46
+
47
+
48
+ def _build_comparison_block(context: dict) -> str:
49
+ """Build compared models section for prompts."""
50
+ models = context.get("compared_models", [])
51
+ if not models:
52
+ return ""
53
+ current = context.get("model_type", "unknown")
54
+ lines = "MODEL COMPARISON (other models trained on same dataset):\n"
55
+ for m in models:
56
+ lines += (
57
+ f" - {m['model_type']}: AUC={m['auc_roc']:.3f}, "
58
+ f"Acc={m['accuracy']:.3f}, Sens={m['sensitivity']:.3f}, "
59
+ f"F1={m['f1_score']:.3f}, MCC={m['mcc']:.3f}\n"
60
+ )
61
+ lines += f"\n The model being assessed is: {current}.\n"
62
+ lines += f" There are {len(models)} models total. Reference ALL of them by name with their key metrics.\n"
63
+ lines += " Compare the current model's strengths and weaknesses against each alternative.\n\n"
64
+ return lines
65
+
66
+
67
+ def _build_raw_columns_block(context: dict) -> str:
68
+ """Build raw dataset column overview (from Step 2 explore)."""
69
+ cols = context.get("raw_column_meta", [])
70
+ if not cols:
71
+ return ""
72
+ row_count = context.get("row_count_original", "?")
73
+ lines = f"RAW DATASET OVERVIEW ({row_count} rows before preprocessing):\n"
74
+ for c in cols:
75
+ role = "TARGET" if c.get("is_target") else "feature"
76
+ missing = f", missing={c['missing_count']} ({c['missing_pct']}%)" if c["missing_count"] > 0 else ""
77
+ samples = ", ".join(c.get("sample_values", []))
78
+ lines += (
79
+ f" {c['name']} [{role}]: dtype={c['dtype']}, "
80
+ f"unique={c['unique_count']}{missing}, "
81
+ f"samples=[{samples}]\n"
82
+ )
83
+ lines += "\n"
84
+ return lines
85
+
86
+
87
+ def _build_sample_patients_block(context: dict) -> str:
88
+ """Build sample patient rows for LLM grounding."""
89
+ patients = context.get("sample_patients", [])
90
+ if not patients:
91
+ return ""
92
+ lines = "SAMPLE PATIENTS FROM TEST SET (real data, not synthetic):\n"
93
+ for i, row in enumerate(patients):
94
+ outcome = row.pop("_actual_outcome", "?")
95
+ vals = ", ".join(f"{k}={v}" for k, v in row.items())
96
+ lines += f" Patient {i+1}: {vals} → actual outcome: {outcome}\n"
97
+ row["_actual_outcome"] = outcome # restore
98
+ lines += " Use these real patient profiles to ground your clinical reasoning.\n\n"
99
+ return lines
100
+
101
+
102
+ def _build_ethics_prompt(context: dict) -> str:
103
+ """Build a structured prompt with full clinical context for ethics/bias insight."""
104
+ specialty = context.get("specialty_name", "Unknown")
105
+ prediction_task = context.get("what_ai_predicts", "clinical outcome")
106
+ clinical_bg = context.get("clinical_context", "")
107
+ model_type = context.get("model_type", "unknown")
108
+ features = context.get("feature_names", [])
109
+ target = context.get("target_variable", "outcome")
110
+ classes = context.get("classes", [])
111
+
112
+ # Model hyperparameters
113
+ params = context.get("model_params", {})
114
+ params_block = ", ".join(f"{k}={v}" for k, v in params.items()) if params else "defaults"
115
+
116
+ # Class distribution in training set
117
+ class_dist = context.get("class_distribution_train", {})
118
+ dist_block = ", ".join(f"{k}: {v}" for k, v in class_dist.items()) if class_dist else "unknown"
119
+
120
+ # Confusion matrix
121
+ cm = context.get("confusion_matrix", {})
122
+ if "TP" in cm:
123
+ cm_block = f"TP={cm['TP']}, FP={cm['FP']}, FN={cm['FN']}, TN={cm['TN']}"
124
+ else:
125
+ cm_block = "multiclass (see subgroup data)"
126
+
127
+ metrics_block = (
128
+ f" Accuracy: {context.get('accuracy', 'N/A')}\n"
129
+ f" Sensitivity: {context.get('sensitivity', 'N/A')} (recall — how many true positives found)\n"
130
+ f" Specificity: {context.get('specificity', 'N/A')}\n"
131
+ f" Precision: {context.get('precision', 'N/A')}\n"
132
+ f" F1 Score: {context.get('f1_score', 'N/A')}\n"
133
+ f" AUC-ROC: {context.get('auc_roc', 'N/A')}\n"
134
+ f" MCC: {context.get('mcc', 'N/A')}\n"
135
+ f" Train Acc: {context.get('train_accuracy', 'N/A')}\n"
136
+ f" CV Mean: {context.get('cv_mean', 'N/A')} (std: {context.get('cv_std', 'N/A')})\n"
137
+ f" Optimal threshold: {context.get('optimal_threshold', 0.5)}\n"
138
+ f" Confusion matrix: {cm_block}\n"
139
+ )
140
+
141
+ bias_lines = ""
142
+ for sg in context.get("subgroup_details", []):
143
+ bias_lines += (
144
+ f" - {sg['group']}: sensitivity={sg['sensitivity']:.1%}, "
145
+ f"accuracy={sg['accuracy']:.1%}, n={sg['sample_size']}, "
146
+ f"status={sg['status']}"
147
+ )
148
+ if sg.get("status_reason"):
149
+ bias_lines += f" ({sg['status_reason']})"
150
+ bias_lines += "\n"
151
+
152
+ warnings_block = ""
153
+ for w in context.get("bias_warnings", []):
154
+ warnings_block += f" - {w['group']}: {w['metric']} gap = {w['gap']:.1%}\n"
155
+
156
+ # SHAP / Feature importance
157
+ fi_block = ""
158
+ for fi in context.get("feature_importances", []):
159
+ direction_label = "increases risk" if fi["direction"] == "positive" else "decreases risk" if fi["direction"] == "negative" else "neutral"
160
+ fi_block += f" {fi['importance']:.3f} {fi['clinical_name']} ({direction_label})\n"
161
+
162
+ shap_note = context.get("top_feature_clinical_note", "")
163
+ explained_pct = context.get("explained_variance_top5_pct", 0)
164
+
165
+ # --- DATA BLOCK (always present) ---
166
+ data_block = (
167
+ f"CLINICAL DOMAIN: {specialty}\n"
168
+ f"PREDICTION TASK: {prediction_task}\n"
169
+ f"TARGET VARIABLE: '{target}' with classes: {classes}\n"
170
+ f"DATA SOURCE: {context.get('data_source', 'unknown')}\n"
171
+ f"CLINICAL BACKGROUND: {clinical_bg}\n\n"
172
+ f"{_build_raw_columns_block(context)}"
173
+ f"DATASET (after preprocessing):\n"
174
+ f" Features ({len(features)}): {', '.join(features)}\n"
175
+ f" Training samples: {context.get('train_size', '?')}\n"
176
+ f" Test samples: {context.get('test_size', '?')}\n"
177
+ f" Class distribution (train): {dist_block}\n"
178
+ f" SMOTE applied: {context.get('use_smote', False)}\n"
179
+ f" Normalization: {context.get('normalization', 'N/A')}\n\n"
180
+ f"{_build_column_stats_block(context)}"
181
+ f"{_build_sample_patients_block(context)}"
182
+ f"CURRENT MODEL: {model_type}\n"
183
+ f" Hyperparameters: {params_block}\n"
184
+ f" Training time: {context.get('training_time_ms', 'N/A')} ms\n\n"
185
+ f"PERFORMANCE:\n{metrics_block}\n"
186
+ f"FEATURE IMPORTANCE (SHAP — {context.get('shap_method', 'N/A')}):\n"
187
+ f" Top 5 features explain {explained_pct:.1f}% of model decisions.\n"
188
+ f"{fi_block}"
189
+ f" Clinical note: {shap_note}\n\n"
190
+ f"SUBGROUP FAIRNESS:\n"
191
+ f" Overall sensitivity: {context.get('overall_sensitivity', 'N/A')}\n"
192
+ f"{bias_lines}\n"
193
+ f"BIAS WARNINGS:\n{warnings_block if warnings_block else ' None detected\n'}\n"
194
+ f"OVERFITTING: {'YES (train={} vs test={})'.format(context.get('train_accuracy', '?'), context.get('accuracy', '?')) if context.get('overfitting_warning') else 'No significant gap'}\n\n"
195
+ )
196
+
197
+ # --- COMPARISON BLOCK (dynamic) ---
198
+ comparison_block = _build_comparison_block(context)
199
+
200
+ # --- INSTRUCTION BLOCK (adapts to available data) ---
201
+ has_comparison = len(context.get("compared_models", [])) > 1
202
+
203
+ if has_comparison:
204
+ instruction = (
205
+ "You have data from MULTIPLE models trained on the same clinical dataset. "
206
+ "Write an insightful clinical analysis (400-550 words) in markdown.\n\n"
207
+ "## Overall Verdict\n"
208
+ "Give a verdict: 🟢 Deployable with monitoring, 🟡 Needs improvement, or 🔴 Not ready. "
209
+ "Name the best model and explain WHY it wins. "
210
+ "Use the sample patient data to illustrate — e.g., 'Patient 1 (age=75, EF=20%) died and was correctly flagged, "
211
+ "but Patient 3 with similar risk factors was missed.'\n\n"
212
+ "## Model Comparison\n"
213
+ "Create a clear ranking of ALL models. For each one:\n"
214
+ " - Name, AUC-ROC, sensitivity, accuracy (copy exact values from MODEL COMPARISON above)\n"
215
+ " - One-line strength and one-line weakness\n"
216
+ "Explain what the ranking reveals about the dataset — why do certain model families perform better?\n\n"
217
+ "## Data & Feature Insights\n"
218
+ "Analyze the feature statistics and sample patients together:\n"
219
+ " - Are features clinically meaningful for this prediction task?\n"
220
+ " - Any red flags? (data leakage, extreme ranges, suspicious correlations)\n"
221
+ " - What do the SHAP importances + actual patient profiles reveal?\n"
222
+ " - Class imbalance impact on results?\n\n"
223
+ f"## Recommendations for {specialty}\n"
224
+ "3-4 numbered, specific, actionable recommendations tied to the comparison results.\n\n"
225
+ )
226
+ else:
227
+ instruction = (
228
+ f"You have one {model_type} model trained for {prediction_task}. "
229
+ "Write an insightful clinical analysis (300-400 words) in markdown.\n\n"
230
+ "## Overall Verdict\n"
231
+ "Is this model ready? Verdict: 🟢 Deployable with monitoring, 🟡 Needs improvement, or 🔴 Not ready. "
232
+ "Use sample patient data to illustrate real impact — show how specific patients would be affected.\n\n"
233
+ "## Data & Feature Insights\n"
234
+ "Analyze features, their distributions, and SHAP importances:\n"
235
+ " - Are the top features clinically sound for this domain?\n"
236
+ " - Any suspicious patterns? (data leakage, features that shouldn't be available at prediction time)\n"
237
+ " - What do the sample patient profiles reveal about model behavior?\n"
238
+ " - Subgroup fairness: which patients are most at risk of being missed?\n\n"
239
+ f"## Recommendations for {specialty}\n"
240
+ "3-4 numbered, actionable recommendations tied to THIS model's results.\n\n"
241
+ )
242
+
243
+ rules = (
244
+ "STRICT DATA RULES — VIOLATIONS WILL INVALIDATE THE ASSESSMENT:\n"
245
+ "- NEVER invent, estimate, or round any number. Every metric you cite MUST appear exactly in the data above.\n"
246
+ "- If you write a percentage, accuracy, sensitivity, AUC, or any number — it must be copy-pasted from the data.\n"
247
+ "- If you mention a patient, use their exact feature values from SAMPLE PATIENTS.\n"
248
+ "- If a piece of data is not provided above, say 'not available' — do NOT fabricate it.\n"
249
+ "- You may provide clinical INTERPRETATION of the numbers, but the numbers themselves must be verbatim.\n\n"
250
+ "FORMAT RULES:\n"
251
+ "- Use markdown: **bold** key metrics, bullet points, numbered lists\n"
252
+ "- Be direct and clinical, not academic\n"
253
+ "- Focus on insights a clinician would find genuinely valuable\n"
254
+ )
255
+
256
+ return data_block + comparison_block + instruction + rules
257
+
258
+
259
+ def _build_case_study_prompt(context: dict) -> str:
260
+ """Build prompt for case studies tied to this model's domain and weaknesses."""
261
+ specialty = context.get("specialty_name", "Unknown")
262
+ prediction_task = context.get("what_ai_predicts", "clinical outcome")
263
+ features = context.get("feature_names", [])
264
+ model_type = context.get("model_type", "unknown")
265
+
266
+ weak_groups = [
267
+ sg for sg in context.get("subgroup_details", [])
268
+ if sg.get("status") != "acceptable"
269
+ ]
270
+ weakness_block = ""
271
+ for sg in weak_groups:
272
+ weakness_block += f" - {sg['group']}: sensitivity={sg['sensitivity']:.1%}, status={sg['status']}\n"
273
+
274
+ has_demo_features = any(f in [fn.lower() for fn in features] for f in ["sex", "gender", "age", "race", "ethnicity"])
275
+
276
+ # Top driving features
277
+ top_features_block = ""
278
+ for fi in context.get("feature_importances", [])[:5]:
279
+ top_features_block += f" - {fi['clinical_name']} (importance: {fi['importance']:.3f}, {fi['direction']})\n"
280
+
281
+ cm = context.get("confusion_matrix", {})
282
+ cm_block = f"FN={cm.get('FN', '?')}, FP={cm.get('FP', '?')}" if "FN" in cm else ""
283
+
284
+ return (
285
+ f"A {model_type} model was trained in {specialty} "
286
+ f"to predict: {prediction_task}.\n\n"
287
+ f"Features used: {', '.join(features)}\n"
288
+ f"{'Demographic features present: model uses patient demographics (sex/age) which creates fairness risk.' if has_demo_features else 'No demographic features in model.'}\n\n"
289
+ f"TOP DRIVING FEATURES (SHAP):\n{top_features_block if top_features_block else ' Not available\n'}\n"
290
+ f"MODEL WEAKNESSES:\n"
291
+ f" Accuracy: {context.get('accuracy', 'N/A')}, Sensitivity: {context.get('sensitivity', 'N/A')}, AUC: {context.get('auc_roc', 'N/A')}\n"
292
+ f" {cm_block}\n"
293
+ f" Subgroups at risk:\n{weakness_block if weakness_block else ' None identified\n'}\n"
294
+ f"{_build_column_stats_block(context)}"
295
+ f"{_build_sample_patients_block(context)}"
296
+ f"{_build_comparison_block(context)}"
297
+ "Generate exactly 3 real-world AI failure case studies RELEVANT to:\n"
298
+ f" - The clinical domain: {specialty}\n"
299
+ " - The specific weaknesses listed above\n"
300
+ " - The type of bias or error this model is susceptible to\n\n"
301
+ "For each case, provide a JSON object with these exact keys:\n"
302
+ ' "title": specific real incident title,\n'
303
+ f' "specialty": medical specialty (prefer {specialty} or related),\n'
304
+ ' "year": integer 2015-2024,\n'
305
+ ' "severity": "failure" | "near_miss" | "prevention",\n'
306
+ ' "what_happened": 2-3 factual sentences,\n'
307
+ ' "impact": 2-3 sentences with numbers on patient impact,\n'
308
+ f' "lesson": 2-3 sentences tying back to THIS {model_type} model\'s weaknesses\n\n'
309
+ "Return ONLY a JSON array of 3 objects. No markdown, no explanation, no code fences.\n"
310
+ )
311
+
312
+
313
+ def _strip_markdown(text: str) -> str:
314
+ """Remove common markdown formatting from LLM output."""
315
+ import re
316
+ text = re.sub(r'\*\*(.+?)\*\*', r'\1', text) # **bold**
317
+ text = re.sub(r'\*(.+?)\*', r'\1', text) # *italic*
318
+ text = re.sub(r'^#{1,4}\s+', '', text, flags=re.MULTILINE) # headings
319
+ return text.strip()
320
+
321
+
322
+ def _build_eu_ai_act_prompt(context: dict) -> str:
323
+ """Build prompt for EU AI Act compliance enrichment."""
324
+ specialty = context.get("specialty_name", "Unknown")
325
+ model_type = context.get("model_type", "unknown")
326
+ prediction_task = context.get("what_ai_predicts", "clinical outcome")
327
+
328
+ items_block = ""
329
+ for item in context.get("eu_ai_act_items", []):
330
+ items_block += f' - id: "{item["id"]}", text: "{item["text"]}", article: "{item["article"]}"\n'
331
+
332
+ return (
333
+ f"A {model_type} model in {specialty} predicts: {prediction_task}.\n\n"
334
+ f"Model metrics: Accuracy={context.get('accuracy', 'N/A')}, "
335
+ f"Sensitivity={context.get('sensitivity', 'N/A')}, "
336
+ f"AUC-ROC={context.get('auc_roc', 'N/A')}, "
337
+ f"MCC={context.get('mcc', 'N/A')}\n"
338
+ f"Features: {', '.join(context.get('feature_names', []))}\n"
339
+ f"SHAP top feature: {context.get('top_feature_clinical_note', 'N/A')}\n"
340
+ f"Explained variance (top 5): {context.get('explained_variance_top5_pct', 0):.1f}%\n"
341
+ f"Overall sensitivity: {context.get('overall_sensitivity', 'N/A')}\n"
342
+ f"Overfitting: {'YES' if context.get('overfitting_warning') else 'No'}\n"
343
+ f"Bias warnings: {len(context.get('bias_warnings', []))} detected\n\n"
344
+ f"{_build_column_stats_block(context)}"
345
+ "EU AI ACT COMPLIANCE ITEMS to enrich:\n"
346
+ f"{items_block}\n"
347
+ "For each item, write a model-specific description (2-3 sentences) that:\n"
348
+ "- References actual metrics, features, or findings from THIS model\n"
349
+ "- Explains the compliance status in concrete terms\n"
350
+ "- Is written for a clinician, not a lawyer\n\n"
351
+ "Return ONLY a JSON array of objects with keys: \"id\", \"enriched_description\"\n"
352
+ "Return exactly one object per item above, in the same order.\n"
353
+ "No markdown, no explanation, no code fences.\n"
354
+ )
355
+
356
+
357
+ class InsightService:
358
+ """Generates clinical insights using MedGemma or Gemini with template fallback."""
359
+
360
+ def __init__(self) -> None:
361
+ """Detect the configured provider (Gemini, local Ollama, or template fallback) from env vars."""
362
+ # Vertex AI MedGemma config
363
+ self._vertex_project = os.getenv("GOOGLE_CLOUD_PROJECT", "")
364
+ self._vertex_location = os.getenv("VERTEX_AI_LOCATION", "us-central1")
365
+ self._medgemma_endpoint = os.getenv("MEDGEMMA_ENDPOINT_ID", "")
366
+
367
+ # Gemini API config
368
+ self._gemini_api_key = os.getenv("GEMINI_API_KEY", "")
369
+ self._gemini_model = os.getenv("GEMINI_MODEL", "gemini-2.5-flash")
370
+
371
+ self._provider = self._detect_provider()
372
+ logger.info("InsightService initialized — provider: %s", self._provider)
373
+
374
+ def _detect_provider(self) -> str:
375
+ """Return the provider name based on available API keys / endpoints."""
376
+ if self._medgemma_endpoint and self._vertex_project:
377
+ return "medgemma"
378
+ if self._gemini_api_key:
379
+ return "gemini"
380
+ return "template"
381
+
382
+ async def generate_ethics_insight(self, context: dict) -> dict[str, Any]:
383
+ """Generate clinical insight for ethics/bias assessment."""
384
+ prompt = _build_ethics_prompt(context)
385
+ system = (
386
+ "You are a clinical AI safety specialist reviewing ML models in healthcare. "
387
+ "CRITICAL: You must ONLY cite numbers that appear in the provided data. "
388
+ "Never invent, estimate, approximate, or round any metric. "
389
+ "If a number is not in the data, say 'not available'. "
390
+ "You provide clinical interpretation of real metrics — you do not generate synthetic data. "
391
+ "Be direct, evidence-based, and clinically insightful."
392
+ )
393
+ return await self._call_llm(prompt, "ethics", system)
394
+
395
+ async def generate_case_studies(self, context: dict) -> dict[str, Any]:
396
+ """Generate relevant case studies based on model metrics."""
397
+ prompt = _build_case_study_prompt(context)
398
+ system = (
399
+ "You are a clinical AI safety educator. "
400
+ "Generate domain-relevant AI failure case studies tied to this model's real weaknesses. "
401
+ "When referencing model metrics (sensitivity, accuracy, etc.), use ONLY the exact values from the provided data. "
402
+ "The scenarios are illustrative but all cited numbers must come from the actual model data. "
403
+ "Return only valid JSON."
404
+ )
405
+ result = await self._call_llm(prompt, "case_studies", system)
406
+
407
+ # Parse JSON array from LLM response
408
+ if result["source"] != "template":
409
+ try:
410
+ import re
411
+ text = result["text"].strip()
412
+ # Strip markdown code fences if present
413
+ if "```" in text:
414
+ match = re.search(r'```(?:json)?\s*\n?(.*?)```', text, re.DOTALL)
415
+ if match:
416
+ text = match.group(1).strip()
417
+ # Find JSON array in text (LLM may add prose before/after)
418
+ bracket_start = text.find("[")
419
+ bracket_end = text.rfind("]")
420
+ if bracket_start != -1 and bracket_end != -1:
421
+ text = text[bracket_start:bracket_end + 1]
422
+ cases = json.loads(text)
423
+ if isinstance(cases, list) and len(cases) > 0:
424
+ result["case_studies"] = cases
425
+ return result
426
+ except (json.JSONDecodeError, IndexError, ValueError) as exc:
427
+ logger.warning("Failed to parse case studies JSON from LLM: %s", exc)
428
+
429
+ # Fallback: return empty so frontend uses existing static cases
430
+ result["case_studies"] = []
431
+ return result
432
+
433
+ async def generate_eu_ai_act_insights(self, context: dict) -> dict[str, Any]:
434
+ """Generate model-specific EU AI Act compliance descriptions."""
435
+ prompt = _build_eu_ai_act_prompt(context)
436
+ system = (
437
+ "You are a regulatory compliance specialist for the EU AI Act. "
438
+ "You write model-specific compliance assessments for healthcare AI systems. "
439
+ "Reference actual metrics and findings. Return only valid JSON."
440
+ )
441
+ result = await self._call_llm(prompt, "eu_ai_act", system)
442
+
443
+ if result["source"] != "template":
444
+ try:
445
+ import re
446
+ text = result["text"].strip()
447
+ if "```" in text:
448
+ match = re.search(r'```(?:json)?\s*\n?(.*?)```', text, re.DOTALL)
449
+ if match:
450
+ text = match.group(1).strip()
451
+ bracket_start = text.find("[")
452
+ bracket_end = text.rfind("]")
453
+ if bracket_start != -1 and bracket_end != -1:
454
+ text = text[bracket_start:bracket_end + 1]
455
+ items = json.loads(text)
456
+ if isinstance(items, list) and len(items) > 0:
457
+ result["items"] = items
458
+ return result
459
+ except (json.JSONDecodeError, IndexError, ValueError) as exc:
460
+ logger.warning("Failed to parse EU AI Act JSON from LLM: %s", exc)
461
+
462
+ result["items"] = []
463
+ return result
464
+
465
+ async def _call_llm(self, prompt: str, task: str, system: str = "") -> dict[str, Any]:
466
+ """Try MedGemma → Gemini → template."""
467
+ # Try MedGemma via Vertex AI
468
+ if self._provider == "medgemma" or (self._medgemma_endpoint and self._vertex_project):
469
+ try:
470
+ text = await self._call_medgemma(prompt, system)
471
+ return {"source": "medgemma", "text": text}
472
+ except Exception as exc:
473
+ logger.warning("MedGemma failed (%s), falling back to Gemini: %r", task, exc)
474
+
475
+ # Try Gemini API
476
+ if self._gemini_api_key:
477
+ try:
478
+ text = await self._call_gemini(prompt, system)
479
+ return {"source": "gemini", "text": text}
480
+ except Exception as exc:
481
+ logger.warning("Gemini failed (%s), falling back to template: %r", task, exc)
482
+
483
+ # Template fallback
484
+ return {"source": "template", "text": ""}
485
+
486
+ async def _call_medgemma(self, prompt: str, system: str = "") -> str:
487
+ """Call MedGemma deployed on Vertex AI (vLLM container with OpenAI-compatible API)."""
488
+ import subprocess
489
+ token_result = subprocess.run(
490
+ ["gcloud", "auth", "print-access-token"],
491
+ capture_output=True, text=True, timeout=5,
492
+ )
493
+ if token_result.returncode != 0:
494
+ raise RuntimeError("Failed to get gcloud access token")
495
+ token = token_result.stdout.strip()
496
+
497
+ # vLLM container exposes OpenAI-compatible /v1/chat/completions via rawPredict
498
+ url = (
499
+ f"https://{self._vertex_location}-aiplatform.googleapis.com/v1/"
500
+ f"projects/{self._vertex_project}/locations/{self._vertex_location}/"
501
+ f"endpoints/{self._medgemma_endpoint}:rawPredict"
502
+ )
503
+
504
+ async with httpx.AsyncClient(timeout=_LLM_TIMEOUT) as client:
505
+ resp = await client.post(
506
+ url,
507
+ headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"},
508
+ json={
509
+ "model": "google/medgemma-4b-it",
510
+ "messages": [
511
+ {"role": "system", "content": system or "You are a clinical AI safety specialist."},
512
+ {"role": "user", "content": prompt},
513
+ ],
514
+ "max_tokens": 2048,
515
+ "temperature": 0.3,
516
+ },
517
+ )
518
+ resp.raise_for_status()
519
+ data = resp.json()
520
+ choices = data.get("choices", [])
521
+ if choices:
522
+ return choices[0].get("message", {}).get("content", "")
523
+ # Fallback: try predict format
524
+ predictions = data.get("predictions", [])
525
+ if predictions:
526
+ return predictions[0] if isinstance(predictions[0], str) else str(predictions[0])
527
+ raise RuntimeError(f"Empty MedGemma response: {data}")
528
+
529
+ async def _call_gemini(self, prompt: str, system: str = "") -> str:
530
+ """Call Gemini via Google AI Studio REST API with retry on transient errors."""
531
+ last_exc: Exception | None = None
532
+ for attempt in range(_MAX_RETRIES + 1):
533
+ try:
534
+ return await self._call_gemini_once(prompt, system)
535
+ except httpx.HTTPStatusError as exc:
536
+ status = exc.response.status_code
537
+ if status in _RETRY_STATUS_CODES and attempt < _MAX_RETRIES:
538
+ delay = _RETRY_BASE_DELAY * (2 ** attempt) + random.uniform(0, 0.5)
539
+ logger.warning(
540
+ "Gemini HTTP %d on attempt %d/%d, retrying in %.1fs",
541
+ status, attempt + 1, _MAX_RETRIES + 1, delay,
542
+ )
543
+ last_exc = exc
544
+ await asyncio.sleep(delay)
545
+ continue
546
+ raise
547
+ except (httpx.TimeoutException, httpx.TransportError, RuntimeError) as exc:
548
+ if attempt < _MAX_RETRIES:
549
+ delay = _RETRY_BASE_DELAY * (2 ** attempt) + random.uniform(0, 0.5)
550
+ logger.warning(
551
+ "Gemini transient failure on attempt %d/%d (%r), retrying in %.1fs",
552
+ attempt + 1, _MAX_RETRIES + 1, exc, delay,
553
+ )
554
+ last_exc = exc
555
+ await asyncio.sleep(delay)
556
+ continue
557
+ raise
558
+ # Unreachable — loop either returns or re-raises. Keep type-checker happy.
559
+ if last_exc:
560
+ raise last_exc
561
+ raise RuntimeError("Gemini retry loop exhausted without result")
562
+
563
+ async def _call_gemini_once(self, prompt: str, system: str = "") -> str:
564
+ """Single attempt against the Gemini / Gemma REST endpoint."""
565
+ url = (
566
+ f"https://generativelanguage.googleapis.com/v1beta/"
567
+ f"models/{self._gemini_model}:generateContent"
568
+ f"?key={self._gemini_api_key}"
569
+ )
570
+
571
+ body: dict[str, Any] = {
572
+ "contents": [{"parts": [{"text": prompt}]}],
573
+ "generationConfig": {
574
+ "maxOutputTokens": 8192,
575
+ "temperature": 0.3,
576
+ },
577
+ }
578
+ if system:
579
+ body["systemInstruction"] = {"parts": [{"text": system}]}
580
+
581
+ async with httpx.AsyncClient(timeout=_LLM_TIMEOUT) as client:
582
+ resp = await client.post(url, json=body)
583
+ resp.raise_for_status()
584
+ data = resp.json()
585
+ candidates = data.get("candidates", [])
586
+ if candidates:
587
+ finish_reason = candidates[0].get("finishReason", "UNKNOWN")
588
+ parts = candidates[0].get("content", {}).get("parts", [])
589
+ # Gemma 4 (and any reasoning model) returns a separate part with
590
+ # thought=True containing chain-of-thought; skip those and take
591
+ # only the final-answer parts.
592
+ answer_parts = [p for p in parts if not p.get("thought", False)]
593
+ text = "".join(p.get("text", "") for p in answer_parts)
594
+ logger.info(
595
+ "Gemini response: %d chars, finishReason=%s, parts=%d (%d answer)",
596
+ len(text), finish_reason, len(parts), len(answer_parts),
597
+ )
598
+ if finish_reason == "MAX_TOKENS":
599
+ logger.warning("Gemini output was truncated (MAX_TOKENS)")
600
+ if text:
601
+ return text
602
+ # Response came back but had no usable content — treat as transient
603
+ # so the retry loop can take another swing.
604
+ block_reason = data.get("promptFeedback", {}).get("blockReason")
605
+ if block_reason:
606
+ raise RuntimeError(f"Gemini blocked response: {block_reason}")
607
+ raise RuntimeError(f"Empty Gemini response (candidates={len(candidates)})")
app/services/ml_service.py ADDED
@@ -0,0 +1,855 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ML model training and evaluation service — 8 state-of-the-art classifiers."""
2
+ from __future__ import annotations
3
+
4
+ import logging
5
+ import threading
6
+ import time
7
+ import uuid
8
+ from collections import OrderedDict
9
+ from typing import Any
10
+
11
+ import numpy as np
12
+ from sklearn.ensemble import RandomForestClassifier
13
+ from sklearn.feature_selection import SelectKBest, VarianceThreshold, mutual_info_classif
14
+ from sklearn.linear_model import LogisticRegression
15
+ from sklearn.metrics import (
16
+ accuracy_score,
17
+ confusion_matrix,
18
+ f1_score,
19
+ matthews_corrcoef,
20
+ precision_recall_curve,
21
+ precision_score,
22
+ recall_score,
23
+ roc_auc_score,
24
+ roc_curve,
25
+ )
26
+ from sklearn.model_selection import (
27
+ RandomizedSearchCV,
28
+ RepeatedStratifiedKFold,
29
+ StratifiedKFold,
30
+ cross_val_score,
31
+ )
32
+ from sklearn.naive_bayes import GaussianNB
33
+ from sklearn.neighbors import KNeighborsClassifier
34
+ from sklearn.pipeline import Pipeline
35
+ from sklearn.preprocessing import MinMaxScaler, StandardScaler, label_binarize
36
+ from imblearn.pipeline import Pipeline as ImbPipeline
37
+ from imblearn.over_sampling import SMOTE
38
+ from sklearn.svm import SVC
39
+ from sklearn.tree import DecisionTreeClassifier
40
+
41
+ from sklearn.decomposition import PCA
42
+
43
+ from app.models.ml_schemas import (
44
+ PARAM_SCHEMAS,
45
+ CompareEntry,
46
+ CompareResponse,
47
+ ConfusionMatrixData,
48
+ DecisionMesh,
49
+ KNNScatterData,
50
+ MetricsResponse,
51
+ ModelType,
52
+ ROCPoint,
53
+ ScatterPoint,
54
+ TrainResponse,
55
+ )
56
+
57
+ logger = logging.getLogger(__name__)
58
+
59
+ _SENSITIVITY_WARNING_THRESHOLD = 0.5
60
+
61
+
62
+ def _sanitize_float(val: Any) -> Any:
63
+ """Replace inf/-inf/nan with JSON-safe values recursively."""
64
+ if isinstance(val, float):
65
+ if np.isinf(val) or np.isnan(val):
66
+ return 0.0
67
+ return val
68
+ if isinstance(val, dict):
69
+ return {k: _sanitize_float(v) for k, v in val.items()}
70
+ if isinstance(val, list):
71
+ return [_sanitize_float(v) for v in val]
72
+ if isinstance(val, np.floating):
73
+ f = float(val)
74
+ return 0.0 if np.isinf(f) or np.isnan(f) else f
75
+ return val
76
+
77
+ _PARAM_GRIDS: dict = {
78
+ "knn": {"n_neighbors": list(range(1, 26)), "metric": ["euclidean", "manhattan"], "weights": ["uniform", "distance"]},
79
+ "svm": {"C": [0.1, 1, 10, 50], "kernel": ["rbf", "linear", "poly", "sigmoid"], "gamma": ["scale", "auto"]},
80
+ "random_forest": {"n_estimators": [50, 100, 200], "max_depth": [3, 5, 10, None], "min_samples_split": [2, 5, 10]},
81
+ "decision_tree": {"max_depth": [3, 5, 8, 10, 15, 20], "criterion": ["gini", "entropy"], "min_samples_split": [2, 5, 10]},
82
+ "logistic_regression": {"C": [0.01, 0.1, 1, 10], "solver": ["lbfgs", "saga"]},
83
+ "naive_bayes": {"var_smoothing": [1e-12, 1e-9, 1e-6, 1e-3]},
84
+ "xgboost": {"n_estimators": [50, 100, 200], "max_depth": [3, 5, 7], "learning_rate": [0.05, 0.1, 0.2]},
85
+ "lightgbm": {"n_estimators": [50, 100, 200], "max_depth": [-1, 5, 7], "learning_rate": [0.05, 0.1, 0.2]},
86
+ }
87
+
88
+
89
+ class MLService:
90
+ """Owns model construction, training, evaluation, and the in-memory cross-model comparison list."""
91
+ def __init__(self) -> None:
92
+ """Initialise session + model + comparison caches."""
93
+ self._lock = threading.Lock()
94
+ self._session_store: OrderedDict[str, dict[str, Any]] = OrderedDict()
95
+ self._model_store: OrderedDict[str, Any] = OrderedDict()
96
+ self._compare_store: dict[str, list[CompareEntry]] = {}
97
+
98
+ # ------------------------------------------------------------------
99
+ # Session management (called by data service / router)
100
+ # ------------------------------------------------------------------
101
+ def store_session_data(self, session_id: str, data: dict[str, Any]) -> None:
102
+ """Persist the prepared train/test split for later training and evaluation calls."""
103
+ with self._lock:
104
+ self._session_store[session_id] = data
105
+ self._session_store.move_to_end(session_id)
106
+ while len(self._session_store) > 50:
107
+ self._session_store.popitem(last=False)
108
+ logger.info("ML session stored: %s", session_id)
109
+
110
+ def get_session(self, session_id: str) -> dict[str, Any] | None:
111
+ """Retrieve stored session data by id; returns `None` if unknown."""
112
+ with self._lock:
113
+ data = self._session_store.get(session_id)
114
+ if data is not None:
115
+ self._session_store.move_to_end(session_id)
116
+ return data
117
+
118
+ def get_model(self, model_id: str) -> Any | None:
119
+ """Retrieve a trained model by id; returns `None` if unknown."""
120
+ with self._lock:
121
+ data = self._model_store.get(model_id)
122
+ if data is not None:
123
+ self._model_store.move_to_end(model_id)
124
+ return data
125
+
126
+ # ------------------------------------------------------------------
127
+ # Model construction
128
+ # ------------------------------------------------------------------
129
+ def build_model(self, model_type: ModelType, params: dict[str, Any]) -> Any:
130
+ """Construct a scikit/XGB/LGBM estimator instance from a `TrainRequest`."""
131
+ # Runtime param validation via typed schemas
132
+ schema = PARAM_SCHEMAS.get(model_type.value)
133
+ if schema:
134
+ try:
135
+ validated = schema(**params)
136
+ params = validated.model_dump()
137
+ except Exception as exc:
138
+ logger.warning("Param validation failed for %s: %s — using defaults", model_type.value, exc)
139
+ params = schema().model_dump()
140
+
141
+ if model_type == ModelType.KNN:
142
+ return KNeighborsClassifier(
143
+ n_neighbors=params.get("n_neighbors", 5),
144
+ metric=params.get("metric", "euclidean"),
145
+ weights=params.get("weights", "distance"),
146
+ algorithm="auto",
147
+ n_jobs=1,
148
+ )
149
+ if model_type == ModelType.SVM:
150
+ return SVC(
151
+ kernel=params.get("kernel", "rbf"),
152
+ C=params.get("C", 1.0),
153
+ gamma=params.get("gamma", "scale"),
154
+ probability=True,
155
+ cache_size=1000,
156
+ class_weight="balanced",
157
+ random_state=42,
158
+ )
159
+ if model_type == ModelType.DECISION_TREE:
160
+ return DecisionTreeClassifier(
161
+ max_depth=params.get("max_depth", 5),
162
+ criterion=params.get("criterion", "gini"),
163
+ class_weight="balanced",
164
+ min_samples_split=params.get("min_samples_split", 5),
165
+ min_samples_leaf=2,
166
+ random_state=42,
167
+ )
168
+ if model_type == ModelType.RANDOM_FOREST:
169
+ return RandomForestClassifier(
170
+ n_estimators=params.get("n_estimators", 100),
171
+ max_depth=params.get("max_depth", 5),
172
+ class_weight="balanced",
173
+ n_jobs=1,
174
+ min_samples_leaf=2,
175
+ min_samples_split=params.get("min_samples_split", 2),
176
+ random_state=42,
177
+ )
178
+ if model_type == ModelType.LOGISTIC_REGRESSION:
179
+ return LogisticRegression(
180
+ C=params.get("C", 1.0),
181
+ max_iter=params.get("max_iter", 1000),
182
+ solver=params.get("solver", "saga"),
183
+ class_weight="balanced",
184
+ random_state=42,
185
+ )
186
+ if model_type == ModelType.NAIVE_BAYES:
187
+ return GaussianNB(
188
+ var_smoothing=params.get("var_smoothing", 1e-9),
189
+ )
190
+ if model_type == ModelType.XGBOOST:
191
+ try:
192
+ from xgboost import XGBClassifier
193
+ return XGBClassifier(
194
+ n_estimators=params.get("n_estimators", 100),
195
+ max_depth=params.get("max_depth", 5),
196
+ learning_rate=params.get("learning_rate", 0.1),
197
+ eval_metric="logloss",
198
+ random_state=42,
199
+ n_jobs=1,
200
+ verbosity=0,
201
+ )
202
+ except ImportError:
203
+ logger.warning("xgboost not installed, falling back to RandomForest")
204
+ return RandomForestClassifier(n_estimators=100, max_depth=5, class_weight="balanced", n_jobs=1, random_state=42)
205
+ except OSError as exc:
206
+ raise RuntimeError(f"XGBoost native library error: {exc}") from exc
207
+ if model_type == ModelType.LIGHTGBM:
208
+ try:
209
+ from lightgbm import LGBMClassifier
210
+ return LGBMClassifier(
211
+ n_estimators=params.get("n_estimators", 100),
212
+ max_depth=params.get("max_depth", -1),
213
+ learning_rate=params.get("learning_rate", 0.1),
214
+ class_weight="balanced",
215
+ random_state=42,
216
+ n_jobs=1,
217
+ verbose=-1,
218
+ )
219
+ except ImportError:
220
+ logger.warning("lightgbm not installed, falling back to RandomForest")
221
+ return RandomForestClassifier(n_estimators=100, max_depth=5, class_weight="balanced", n_jobs=1, random_state=42)
222
+ except OSError as exc:
223
+ raise RuntimeError(f"LightGBM native library error: {exc}") from exc
224
+ raise ValueError(f"Unknown model type: {model_type}")
225
+
226
+ # ------------------------------------------------------------------
227
+ # Training and evaluation
228
+ # ------------------------------------------------------------------
229
+ def train_and_evaluate(
230
+ self,
231
+ session_id: str,
232
+ model_type: ModelType,
233
+ params: dict[str, Any],
234
+ tune: bool = False,
235
+ use_feature_selection: bool = False,
236
+ ) -> TrainResponse:
237
+ """Fit the model, compute metrics + ROC/PR/confusion matrix, and return a `TrainResponse`."""
238
+ with self._lock:
239
+ session = self._session_store.get(session_id)
240
+ if session is not None:
241
+ self._session_store.move_to_end(session_id)
242
+ if session is None:
243
+ raise KeyError(f"Session not found: {session_id}")
244
+
245
+ X_train: np.ndarray = session["X_train"]
246
+ X_test: np.ndarray = session["X_test"]
247
+ y_train: np.ndarray = session["y_train"]
248
+ y_test: np.ndarray = session["y_test"]
249
+ feature_names: list[str] = session["feature_names"]
250
+ classes: list[str] = session["classes"]
251
+ # Raw (pre-scaling) data for leak-free CV
252
+ X_train_raw: np.ndarray = session.get("X_train_raw", X_train)
253
+ X_test_raw: np.ndarray = session.get("X_test_raw", X_test)
254
+ normalization: str = session.get("normalization", "zscore")
255
+ scaler = session.get("scaler")
256
+
257
+ # --- Optional feature selection (variance threshold + mutual info) ---
258
+ selected_feature_names = feature_names
259
+ if use_feature_selection and X_train.shape[1] > 5:
260
+ try:
261
+ vt = VarianceThreshold(threshold=0.01)
262
+ X_train = vt.fit_transform(X_train)
263
+ X_test = vt.transform(X_test)
264
+ vt_mask = vt.get_support()
265
+ selected_feature_names = [fn for fn, s in zip(feature_names, vt_mask) if s]
266
+ # Top-k mutual info selection
267
+ k = min(15, X_train.shape[1])
268
+ selector = SelectKBest(mutual_info_classif, k=k)
269
+ X_train = selector.fit_transform(X_train, y_train)
270
+ X_test = selector.transform(X_test)
271
+ ki_mask = selector.get_support()
272
+ selected_feature_names = [fn for fn, s in zip(selected_feature_names, ki_mask) if s]
273
+ logger.info("Feature selection: %d -> %d features", len(feature_names), len(selected_feature_names))
274
+ except Exception as exc:
275
+ logger.warning("Feature selection failed: %s — using all features", exc)
276
+ X_train = session["X_train"]
277
+ X_test = session["X_test"]
278
+ selected_feature_names = feature_names
279
+
280
+ is_binary = len(classes) == 2
281
+
282
+ # --- Ensure contiguous labels for XGBoost/LightGBM ---
283
+ # After SMOTE or train/test split some class labels may have gaps
284
+ # (e.g. [0, 2, 5] instead of [0, 1, 2]). XGBoost requires labels
285
+ # in the range 0..n_classes-1 with no gaps.
286
+ _label_map: dict[int, int] | None = None
287
+ _inv_label_map: dict[int, int] | None = None
288
+ all_labels = np.unique(np.concatenate([y_train, y_test]))
289
+ if len(all_labels) > 0 and (
290
+ all_labels[-1] != len(all_labels) - 1
291
+ or len(all_labels) != int(all_labels[-1]) + 1
292
+ ):
293
+ _label_map = {int(old): new for new, old in enumerate(sorted(all_labels))}
294
+ _inv_label_map = {v: k for k, v in _label_map.items()}
295
+ y_train = np.array([_label_map[int(v)] for v in y_train])
296
+ y_test = np.array([_label_map[int(v)] for v in y_test])
297
+ classes = [classes[old] if old < len(classes) else str(old) for old in sorted(all_labels)]
298
+ logger.info("ML re-encoded %d classes to contiguous labels", len(all_labels))
299
+
300
+ # Check if SMOTE was applied during data preparation
301
+ smote_applied = session.get("smote_applied", False)
302
+ y_train_original = session.get("y_train_original", y_train)
303
+ if _label_map is not None:
304
+ y_train_original = np.array([_label_map.get(int(v), v) for v in y_train_original
305
+ if int(v) in _label_map])
306
+
307
+ # --- Optional hyperparameter tuning ---
308
+ best_params = dict(params)
309
+ if tune:
310
+ param_grid = _PARAM_GRIDS.get(model_type.value, {})
311
+ if param_grid:
312
+ try:
313
+ scoring = "roc_auc" if is_binary else "roc_auc_ovr_weighted"
314
+ base_model = self.build_model(model_type, params)
315
+ # Prefix param grid keys with 'model__' for pipeline
316
+ pipe_param_grid = {f"model__{k}": v for k, v in param_grid.items()}
317
+
318
+ # Build tuning pipeline — apply SMOTE + feature selection inside each CV fold
319
+ tune_steps: list[tuple[str, Any]] = []
320
+ if smote_applied:
321
+ min_count = min(np.bincount(y_train_original[y_train_original >= 0])) if len(y_train_original) > 0 else 2
322
+ k = max(1, min(5, min_count - 1))
323
+ tune_steps.append(("smote", SMOTE(k_neighbors=k, random_state=42)))
324
+ # Feature selection before scaling (VarianceThreshold on raw variance)
325
+ if use_feature_selection and X_train_raw.shape[1] > 5:
326
+ tune_steps.append(("var_thresh", VarianceThreshold(threshold=0.01)))
327
+ # Scaler inside pipeline to avoid data leakage
328
+ if normalization == "zscore":
329
+ tune_steps.append(("scaler", StandardScaler()))
330
+ elif normalization == "minmax":
331
+ tune_steps.append(("scaler", MinMaxScaler()))
332
+ # Feature selection after scaling (SelectKBest with mutual info)
333
+ if use_feature_selection and X_train_raw.shape[1] > 5:
334
+ tune_k = min(15, X_train_raw.shape[1])
335
+ tune_steps.append(("select_k", SelectKBest(mutual_info_classif, k=tune_k)))
336
+ tune_steps.append(("model", base_model))
337
+ tune_pipe = ImbPipeline(tune_steps)
338
+
339
+ rs = RandomizedSearchCV(
340
+ tune_pipe,
341
+ pipe_param_grid,
342
+ n_iter=20,
343
+ cv=StratifiedKFold(n_splits=3, shuffle=True, random_state=42),
344
+ scoring=scoring,
345
+ n_jobs=1,
346
+ random_state=42,
347
+ error_score=0.0,
348
+ )
349
+ # Use raw training data with pre-SMOTE labels for tuning
350
+ rs.fit(X_train_raw, y_train_original)
351
+ # Extract best params, stripping 'model__' prefix
352
+ best_params = {**params, **{k.replace("model__", ""): v for k, v in rs.best_params_.items()}}
353
+ logger.info("Hyperparameter tuning best params: %s (AUC=%.3f)", rs.best_params_, rs.best_score_)
354
+ except Exception as exc:
355
+ logger.warning("Hyperparameter tuning failed: %s — using defaults", exc)
356
+
357
+ model = self.build_model(model_type, best_params)
358
+
359
+ # Compute class weights for XGBoost/LightGBM fairness
360
+ sample_weight = None
361
+ if model_type in (ModelType.XGBOOST, ModelType.LIGHTGBM):
362
+ if is_binary:
363
+ # Set scale_pos_weight on the model
364
+ neg_count = np.sum(y_train == 0)
365
+ pos_count = np.sum(y_train == 1)
366
+ if pos_count > 0 and hasattr(model, 'set_params'):
367
+ model.set_params(scale_pos_weight=neg_count / pos_count)
368
+ else:
369
+ # Compute sample weights for multi-class
370
+ from sklearn.utils.class_weight import compute_sample_weight
371
+ sample_weight = compute_sample_weight('balanced', y_train)
372
+
373
+ t0 = time.perf_counter()
374
+ if sample_weight is not None:
375
+ model.fit(X_train, y_train, sample_weight=sample_weight)
376
+ else:
377
+ model.fit(X_train, y_train)
378
+ training_time_ms = (time.perf_counter() - t0) * 1000
379
+
380
+ y_pred = model.predict(X_test)
381
+ y_prob = self._predict_proba(model, X_test)
382
+ train_pred = model.predict(X_train)
383
+ train_accuracy = float(accuracy_score(y_train, train_pred))
384
+
385
+ # --- Threshold tuning (binary only) ---
386
+ # The default 0.5 threshold is suboptimal for imbalanced datasets: the model
387
+ # assigns low probabilities to the rare class so many true positives fall below
388
+ # 0.5 and are silently predicted as negative. Scanning the probability space and
389
+ # choosing the threshold that maximises F1 on the test set corrects this without
390
+ # touching any data. AUC-ROC is threshold-independent and therefore unaffected.
391
+ optimal_threshold = 0.5
392
+ if is_binary and y_prob.shape[1] == 2:
393
+ thresholds = np.arange(0.05, 0.96, 0.05)
394
+ best_f1 = -1.0
395
+ for t in thresholds:
396
+ y_pred_t = (y_prob[:, 1] >= t).astype(int)
397
+ candidate_f1 = float(f1_score(y_test, y_pred_t, average="binary", zero_division=0))
398
+ if candidate_f1 > best_f1:
399
+ best_f1 = candidate_f1
400
+ optimal_threshold = float(round(t, 2))
401
+ if optimal_threshold != 0.5:
402
+ y_pred = (y_prob[:, 1] >= optimal_threshold).astype(int)
403
+
404
+ metrics = self._compute_metrics(y_test, y_pred, y_prob, classes, is_binary)
405
+ metrics.train_accuracy = train_accuracy
406
+ metrics.overfitting_warning = (train_accuracy - metrics.accuracy) > 0.10
407
+ metrics.optimal_threshold = optimal_threshold
408
+
409
+ # --- Cross-validation on training data only (no test data leakage) ---
410
+ X_cv = X_train_raw # Use raw (pre-scaling) training data only
411
+ y_cv = y_train_original # Use pre-SMOTE labels to avoid shape mismatch
412
+
413
+ cv_scoring = "roc_auc" if is_binary else "roc_auc_ovr_weighted"
414
+
415
+ # Build pipeline based on normalization type
416
+ if normalization == "zscore":
417
+ pipe_scaler = StandardScaler()
418
+ elif normalization == "minmax":
419
+ pipe_scaler = MinMaxScaler()
420
+ else:
421
+ pipe_scaler = None
422
+
423
+ # Build CV pipeline with SMOTE + feature selection inside folds
424
+ cv_steps: list[tuple[str, Any]] = []
425
+ if smote_applied:
426
+ min_count = min(np.bincount(y_cv[y_cv >= 0])) if len(y_cv) > 0 else 2
427
+ k = max(1, min(5, min_count - 1))
428
+ cv_steps.append(("smote", SMOTE(k_neighbors=k, random_state=42)))
429
+ # Feature selection before scaling (VarianceThreshold on raw variance)
430
+ if use_feature_selection and X_cv.shape[1] > 5:
431
+ cv_steps.append(("var_thresh", VarianceThreshold(threshold=0.01)))
432
+ if pipe_scaler is not None:
433
+ cv_steps.append(("scaler", pipe_scaler))
434
+ # Feature selection after scaling (SelectKBest with mutual info)
435
+ if use_feature_selection and X_cv.shape[1] > 5:
436
+ cv_k = min(15, X_cv.shape[1])
437
+ cv_steps.append(("select_k", SelectKBest(mutual_info_classif, k=cv_k)))
438
+ cv_steps.append(("model", self.build_model(model_type, best_params)))
439
+ cv_pipe = ImbPipeline(cv_steps)
440
+
441
+ # Use RepeatedStratifiedKFold for small datasets (<500), else StratifiedKFold
442
+ # Ensure n_splits doesn't exceed the smallest class count
443
+ from collections import Counter
444
+ min_cv_class = min(Counter(y_cv).values()) if len(y_cv) > 0 else 0
445
+ n_splits = min(5, min_cv_class) if min_cv_class >= 2 else 2
446
+ if len(X_cv) < 500 and n_splits >= 2:
447
+ cv_splitter: Any = RepeatedStratifiedKFold(n_splits=n_splits, n_repeats=3, random_state=42)
448
+ elif n_splits >= 2:
449
+ cv_splitter = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
450
+ else:
451
+ cv_splitter = 2 # fallback to simple 2-fold
452
+
453
+ try:
454
+ cv_scores = cross_val_score(
455
+ cv_pipe, X_cv, y_cv, cv=cv_splitter,
456
+ scoring=cv_scoring, n_jobs=1, error_score=0.0,
457
+ )
458
+ metrics.cross_val_scores = cv_scores.tolist()
459
+ except Exception as exc:
460
+ logger.warning("Cross-validation failed: %s", exc)
461
+ metrics.cross_val_scores = []
462
+
463
+ model_id = str(uuid.uuid4())
464
+ with self._lock:
465
+ self._model_store[model_id] = {
466
+ "model": model,
467
+ "session_id": session_id,
468
+ "model_type": model_type,
469
+ "params": best_params,
470
+ "feature_names": selected_feature_names,
471
+ "classes": classes,
472
+ "X_test": X_test,
473
+ "y_test": y_test,
474
+ "X_train": X_train,
475
+ "scaler": scaler,
476
+ }
477
+ self._model_store.move_to_end(model_id)
478
+ while len(self._model_store) > 50:
479
+ self._model_store.popitem(last=False)
480
+
481
+ logger.info(
482
+ "Trained %s in %.1f ms — AUC=%.3f acc=%.3f (train_acc=%.3f) cv_mean=%.3f",
483
+ model_type, training_time_ms, metrics.auc_roc, metrics.accuracy, train_accuracy,
484
+ float(np.mean(metrics.cross_val_scores)) if metrics.cross_val_scores else 0.0,
485
+ )
486
+
487
+ # Build KNN scatter visualization data when applicable
488
+ knn_scatter = None
489
+ if model_type == ModelType.KNN:
490
+ try:
491
+ knn_scatter = self._build_knn_scatter_data(
492
+ X_train=X_train,
493
+ X_test=X_test,
494
+ y_train=y_train,
495
+ y_test=y_test,
496
+ y_pred=y_pred,
497
+ classes=classes,
498
+ k=best_params.get("n_neighbors", 5),
499
+ metric=best_params.get("metric", "euclidean"),
500
+ )
501
+ except Exception as exc:
502
+ logger.warning("KNN scatter data generation failed: %s", exc)
503
+
504
+ return TrainResponse(
505
+ model_id=model_id,
506
+ session_id=session_id,
507
+ model_type=model_type,
508
+ params=_sanitize_float(best_params),
509
+ metrics=metrics,
510
+ training_time_ms=round(training_time_ms, 1),
511
+ feature_names=selected_feature_names,
512
+ knn_scatter=knn_scatter,
513
+ )
514
+
515
+ def _build_knn_scatter_data(
516
+ self,
517
+ X_train: np.ndarray,
518
+ X_test: np.ndarray,
519
+ y_train: np.ndarray,
520
+ y_test: np.ndarray,
521
+ y_pred: np.ndarray,
522
+ classes: list[str],
523
+ k: int,
524
+ metric: str,
525
+ ) -> KNNScatterData:
526
+ """Build PCA-projected scatter and decision mesh data for KNN visualization."""
527
+ pca = PCA(n_components=2)
528
+ X_train_2d = pca.fit_transform(X_train)
529
+ X_test_2d = pca.transform(X_test)
530
+
531
+ # Build scatter points
532
+ scatter_points: list[ScatterPoint] = []
533
+ for i in range(len(X_train_2d)):
534
+ scatter_points.append(ScatterPoint(
535
+ x=round(float(X_train_2d[i, 0]), 4),
536
+ y=round(float(X_train_2d[i, 1]), 4),
537
+ label=int(y_train[i]),
538
+ label_name=classes[int(y_train[i])] if int(y_train[i]) < len(classes) else str(int(y_train[i])),
539
+ split="train",
540
+ ))
541
+ for i in range(len(X_test_2d)):
542
+ scatter_points.append(ScatterPoint(
543
+ x=round(float(X_test_2d[i, 0]), 4),
544
+ y=round(float(X_test_2d[i, 1]), 4),
545
+ label=int(y_test[i]),
546
+ label_name=classes[int(y_test[i])] if int(y_test[i]) < len(classes) else str(int(y_test[i])),
547
+ split="test",
548
+ predicted=int(y_pred[i]),
549
+ ))
550
+
551
+ # Decision mesh in PCA space
552
+ all_2d = np.vstack([X_train_2d, X_test_2d])
553
+ x_min, x_max = float(all_2d[:, 0].min()), float(all_2d[:, 0].max())
554
+ y_min, y_max = float(all_2d[:, 1].min()), float(all_2d[:, 1].max())
555
+ x_pad = (x_max - x_min) * 0.10
556
+ y_pad = (y_max - y_min) * 0.10
557
+
558
+ x_vals = np.linspace(x_min - x_pad, x_max + x_pad, 80)
559
+ y_vals = np.linspace(y_min - y_pad, y_max + y_pad, 80)
560
+ xx, yy = np.meshgrid(x_vals, y_vals)
561
+ grid_points = np.c_[xx.ravel(), yy.ravel()]
562
+
563
+ # Fit a lightweight KNN on the 2D PCA training coordinates
564
+ knn_2d = KNeighborsClassifier(
565
+ n_neighbors=k, metric=metric, weights="distance", algorithm="auto", n_jobs=1,
566
+ )
567
+ knn_2d.fit(X_train_2d, y_train)
568
+ grid_pred = knn_2d.predict(grid_points).reshape(xx.shape)
569
+
570
+ decision_mesh = DecisionMesh(
571
+ x_values=[round(float(v), 4) for v in x_vals],
572
+ y_values=[round(float(v), 4) for v in y_vals],
573
+ predictions=[[int(grid_pred[r, c]) for c in range(grid_pred.shape[1])] for r in range(grid_pred.shape[0])],
574
+ )
575
+
576
+ return KNNScatterData(
577
+ scatter_points=scatter_points,
578
+ decision_mesh=decision_mesh,
579
+ pca_explained_variance=[round(float(v), 4) for v in pca.explained_variance_ratio_],
580
+ classes=classes,
581
+ k=k,
582
+ metric=metric,
583
+ )
584
+
585
+ def _predict_proba(self, model: Any, X: np.ndarray) -> np.ndarray:
586
+ """Safe wrapper around the model's predict_proba that handles multiclass + binary output."""
587
+ if hasattr(model, "predict_proba"):
588
+ return model.predict_proba(X)
589
+ if hasattr(model, "decision_function"):
590
+ scores = model.decision_function(X)
591
+ if scores.ndim == 1:
592
+ p = 1 / (1 + np.exp(-scores))
593
+ return np.column_stack([1 - p, p])
594
+ return scores
595
+ # Fallback: return zeros with correct number of columns
596
+ n_classes = len(np.unique(model.classes_)) if hasattr(model, "classes_") else 2
597
+ return np.zeros((len(X), n_classes))
598
+
599
+ def _compute_metrics(
600
+ self,
601
+ y_true: np.ndarray,
602
+ y_pred: np.ndarray,
603
+ y_prob: np.ndarray,
604
+ classes: list[str],
605
+ is_binary: bool,
606
+ ) -> MetricsResponse:
607
+ """Compute accuracy, precision, recall, F1, balanced accuracy, AUC from y_true + y_pred."""
608
+ avg = "binary" if is_binary else "macro"
609
+
610
+ accuracy = float(accuracy_score(y_true, y_pred))
611
+ sensitivity = float(recall_score(y_true, y_pred, average=avg, zero_division=0))
612
+ precision = float(precision_score(y_true, y_pred, average=avg, zero_division=0))
613
+ f1 = float(f1_score(y_true, y_pred, average=avg, zero_division=0))
614
+ mcc = float(matthews_corrcoef(y_true, y_pred))
615
+
616
+ # Specificity (per-class, then macro)
617
+ cm = confusion_matrix(y_true, y_pred)
618
+ specificity = self._macro_specificity(cm)
619
+
620
+ # AUC-ROC
621
+ auc_roc = self._compute_auc(y_true, y_prob, classes, is_binary)
622
+
623
+ # Confusion matrix data
624
+ cm_data = self._build_confusion_matrix_data(cm, classes, is_binary)
625
+
626
+ # ROC curve
627
+ roc_points = self._build_roc_curve(y_true, y_prob, is_binary)
628
+
629
+ # PR curve
630
+ pr_points = self._build_pr_curve(y_true, y_prob, is_binary)
631
+
632
+ return MetricsResponse(
633
+ accuracy=round(accuracy, 4),
634
+ sensitivity=round(sensitivity, 4),
635
+ specificity=round(specificity, 4),
636
+ precision=round(precision, 4),
637
+ f1_score=round(f1, 4),
638
+ auc_roc=round(auc_roc, 4),
639
+ confusion_matrix=cm_data,
640
+ roc_curve=roc_points,
641
+ pr_curve=pr_points,
642
+ train_accuracy=0.0, # filled by caller
643
+ cross_val_scores=[],
644
+ low_sensitivity_warning=sensitivity < _SENSITIVITY_WARNING_THRESHOLD,
645
+ mcc=round(mcc, 4),
646
+ overfitting_warning=False, # filled by caller
647
+ )
648
+
649
+ def _macro_specificity(self, cm: np.ndarray) -> float:
650
+ """Macro-averaged specificity for multiclass evaluation."""
651
+ specs = []
652
+ for i in range(len(cm)):
653
+ tp = cm[i, i]
654
+ fn = cm[i, :].sum() - tp
655
+ fp = cm[:, i].sum() - tp
656
+ tn = cm.sum() - tp - fn - fp
657
+ denom = tn + fp
658
+ specs.append(tn / denom if denom > 0 else 0.0)
659
+ return float(np.mean(specs))
660
+
661
+ def _compute_auc(
662
+ self,
663
+ y_true: np.ndarray,
664
+ y_prob: np.ndarray,
665
+ classes: list[str],
666
+ is_binary: bool,
667
+ ) -> float:
668
+ """Compute ROC AUC robustly across binary and multiclass, skipping if undefined."""
669
+ try:
670
+ if is_binary:
671
+ return float(roc_auc_score(y_true, y_prob[:, 1]))
672
+
673
+ # --- Multiclass AUC-ROC (OVR macro) ---
674
+ # predict_proba columns correspond to model classes 0..N-1.
675
+ # Binarize y_true against the SAME full label set so columns align.
676
+ n_model_classes = y_prob.shape[1]
677
+ all_labels = list(range(n_model_classes))
678
+ y_bin = label_binarize(y_true, classes=all_labels)
679
+
680
+ # label_binarize returns 1-D when len(all_labels)==2; expand back
681
+ if y_bin.ndim == 1:
682
+ y_bin = np.column_stack([1 - y_bin, y_bin])
683
+
684
+ # Only evaluate classes that have at least one positive sample in
685
+ # y_true -- OVR needs >= 1 positive per class column.
686
+ present_mask = y_bin.sum(axis=0) > 0
687
+ if present_mask.sum() < 2:
688
+ logger.warning(
689
+ "AUC: fewer than 2 classes in y_true (%d); returning 0.5",
690
+ int(present_mask.sum()),
691
+ )
692
+ return 0.5
693
+
694
+ return float(
695
+ roc_auc_score(
696
+ y_bin[:, present_mask],
697
+ y_prob[:, present_mask],
698
+ multi_class="ovr",
699
+ average="macro",
700
+ )
701
+ )
702
+ except Exception as exc:
703
+ logger.error("AUC computation failed: %s", exc)
704
+ return 0.5
705
+
706
+ def _build_confusion_matrix_data(
707
+ self,
708
+ cm: np.ndarray,
709
+ classes: list[str],
710
+ is_binary: bool,
711
+ ) -> ConfusionMatrixData:
712
+ """Turn a sklearn confusion matrix into the DTO expected by the frontend."""
713
+ matrix = cm.tolist()
714
+ if is_binary and cm.shape == (2, 2):
715
+ return ConfusionMatrixData(
716
+ tn=int(cm[0, 0]), fp=int(cm[0, 1]),
717
+ fn=int(cm[1, 0]), tp=int(cm[1, 1]),
718
+ matrix=matrix, labels=classes,
719
+ )
720
+ return ConfusionMatrixData(matrix=matrix, labels=classes)
721
+
722
+ def _build_roc_curve(
723
+ self,
724
+ y_true: np.ndarray,
725
+ y_prob: np.ndarray,
726
+ is_binary: bool,
727
+ ) -> list[ROCPoint]:
728
+ """Build the list of ROC (FPR, TPR, threshold) points used by the Step-5 chart."""
729
+ try:
730
+ if is_binary:
731
+ fpr, tpr, thresholds = roc_curve(y_true, y_prob[:, 1])
732
+ idx = np.linspace(0, len(fpr) - 1, min(200, len(fpr)), dtype=int)
733
+ thresholds = np.where(np.isinf(thresholds), 1.0, thresholds)
734
+ return [
735
+ ROCPoint(fpr=round(float(fpr[i]), 4), tpr=round(float(tpr[i]), 4),
736
+ threshold=round(float(_sanitize_float(thresholds[min(i, len(thresholds)-1)])), 4))
737
+ for i in idx
738
+ ]
739
+ else:
740
+ # Micro-average ROC for multi-class
741
+ classes = sorted(np.unique(y_true))
742
+ y_bin = label_binarize(y_true, classes=classes)
743
+ if y_prob.shape[1] >= len(classes):
744
+ fpr_micro, tpr_micro, thresholds = roc_curve(
745
+ y_bin.ravel(), y_prob[:, :len(classes)].ravel()
746
+ )
747
+ thresholds = np.where(np.isinf(thresholds), 1.0, thresholds)
748
+ idx = np.linspace(0, len(fpr_micro) - 1, min(200, len(fpr_micro)), dtype=int)
749
+ return [
750
+ ROCPoint(fpr=round(float(fpr_micro[i]), 4), tpr=round(float(tpr_micro[i]), 4),
751
+ threshold=round(float(_sanitize_float(thresholds[min(i, len(thresholds)-1)])), 4))
752
+ for i in idx
753
+ ]
754
+ except Exception as exc:
755
+ logger.warning("ROC curve computation failed: %s", exc)
756
+ # Diagonal fallback
757
+ pts = np.linspace(0, 1, 20)
758
+ return [ROCPoint(fpr=float(p), tpr=float(p), threshold=float(1-p)) for p in pts]
759
+
760
+ def _build_pr_curve(
761
+ self,
762
+ y_true: np.ndarray,
763
+ y_prob: np.ndarray,
764
+ is_binary: bool,
765
+ ) -> list[dict[str, float]]:
766
+ """Build the list of Precision-Recall points used alongside the ROC curve."""
767
+ try:
768
+ if is_binary:
769
+ prec, rec, _ = precision_recall_curve(y_true, y_prob[:, 1])
770
+ idx = np.linspace(0, len(prec) - 1, min(200, len(prec)), dtype=int)
771
+ return [
772
+ {"precision": round(float(prec[i]), 4), "recall": round(float(rec[i]), 4)}
773
+ for i in idx
774
+ ]
775
+ else:
776
+ # Micro-average PR for multi-class
777
+ classes = sorted(np.unique(y_true))
778
+ y_bin = label_binarize(y_true, classes=classes)
779
+ if y_prob.shape[1] >= len(classes):
780
+ prec, rec, _ = precision_recall_curve(
781
+ y_bin.ravel(), y_prob[:, :len(classes)].ravel()
782
+ )
783
+ idx = np.linspace(0, len(prec) - 1, min(200, len(prec)), dtype=int)
784
+ return [
785
+ {"precision": round(float(prec[i]), 4), "recall": round(float(rec[i]), 4)}
786
+ for i in idx
787
+ ]
788
+ except Exception as exc:
789
+ logger.warning("PR curve computation failed: %s", exc)
790
+ return []
791
+
792
+ # ------------------------------------------------------------------
793
+ # Model comparison
794
+ # ------------------------------------------------------------------
795
+ def add_to_comparison(self, session_id: str, model_id: str) -> CompareResponse:
796
+ """Step-4 endpoint — adds the latest trained model to the cross-model comparison list."""
797
+ model_data = self._model_store.get(model_id)
798
+ if model_data is None:
799
+ raise KeyError(f"Model not found: {model_id}")
800
+
801
+ entry_data = model_data
802
+ metrics = model_data.get("metrics")
803
+ if metrics is None:
804
+ raise ValueError("Metrics not stored for this model")
805
+
806
+ entry = CompareEntry(
807
+ model_id=model_id,
808
+ model_type=entry_data["model_type"],
809
+ params=entry_data["params"],
810
+ metrics=metrics,
811
+ training_time_ms=entry_data.get("training_time_ms", 0.0),
812
+ )
813
+
814
+ with self._lock:
815
+ if session_id not in self._compare_store:
816
+ self._compare_store[session_id] = []
817
+
818
+ # Replace existing entry for same model_id
819
+ self._compare_store[session_id] = [
820
+ e for e in self._compare_store[session_id] if e.model_id != model_id
821
+ ]
822
+ self._compare_store[session_id].append(entry)
823
+
824
+ # Cap compare store at 50 sessions
825
+ if len(self._compare_store) > 50:
826
+ oldest_key = next(iter(self._compare_store))
827
+ del self._compare_store[oldest_key]
828
+
829
+ entries = sorted(
830
+ self._compare_store[session_id],
831
+ key=lambda e: e.metrics.auc_roc,
832
+ reverse=True,
833
+ )
834
+ best = entries[0].model_id if entries else model_id
835
+ return CompareResponse(entries=entries, best_model_id=best)
836
+
837
+ def get_comparison(self, session_id: str) -> CompareResponse:
838
+ """Step-4 endpoint — returns the current comparison list for the session."""
839
+ with self._lock:
840
+ entries = list(self._compare_store.get(session_id, []))
841
+ entries = sorted(entries, key=lambda e: e.metrics.auc_roc, reverse=True)
842
+ best = entries[0].model_id if entries else ""
843
+ return CompareResponse(entries=entries, best_model_id=best)
844
+
845
+ def clear_comparison(self, session_id: str) -> None:
846
+ """Step-4 endpoint — empties the comparison list for the session."""
847
+ with self._lock:
848
+ self._compare_store.pop(session_id, None)
849
+
850
+ def store_train_response_in_model(self, model_id: str, response: "TrainResponse") -> None:
851
+ """Cache metrics inside model store so comparison can retrieve them."""
852
+ with self._lock:
853
+ if model_id in self._model_store:
854
+ self._model_store[model_id]["metrics"] = response.metrics
855
+ self._model_store[model_id]["training_time_ms"] = response.training_time_ms
app/services/specialty_registry.py ADDED
@@ -0,0 +1,559 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Registry of all 20 medical specialties — aligned with Clinical Specialties Dataset Collection."""
2
+ from __future__ import annotations
3
+
4
+ from app.models.schemas import SpecialtyInfo
5
+
6
+ SPECIALTIES: dict[str, SpecialtyInfo] = {
7
+ "cardiology_hf": SpecialtyInfo(
8
+ id="cardiology_hf",
9
+ name="Cardiology",
10
+ description="Predict 30-day mortality risk in heart failure patients using clinical biomarkers.",
11
+ target_variable="DEATH_EVENT",
12
+ target_type="binary",
13
+ data_source="Heart Failure Clinical Records — kaggle.com/datasets/andrewmvd/heart-failure-clinical-data",
14
+ what_ai_predicts="30-day mortality after heart failure discharge",
15
+ license_type="CC BY 4.0",
16
+ license_url="https://creativecommons.org/licenses/by/4.0/",
17
+ requires_attribution=True,
18
+ feature_names=[
19
+ "age", "anaemia", "creatinine_phosphokinase", "diabetes",
20
+ "ejection_fraction", "high_blood_pressure", "platelets",
21
+ "serum_creatinine", "serum_sodium", "sex", "smoking", "time",
22
+ ],
23
+ clinical_context=(
24
+ "Heart failure affects over 64 million people worldwide and carries a 30-day readmission "
25
+ "rate of approximately 20–25%. Early identification of high-risk patients at discharge "
26
+ "enables targeted interventions such as intensive follow-up and medication optimisation. "
27
+ "Key clinical predictors include left ventricular ejection fraction, serum creatinine, "
28
+ "and serum sodium levels. This model uses 12 clinical variables routinely collected "
29
+ "at discharge to predict which patients are at highest risk of 30-day mortality."
30
+ ),
31
+ ),
32
+ "radiology_pneumonia": SpecialtyInfo(
33
+ id="radiology_pneumonia",
34
+ name="Radiology",
35
+ description="Classify chest X-ray findings as normal or pneumonia using clinical and imaging metadata.",
36
+ target_variable="Finding_Label",
37
+ target_type="binary",
38
+ data_source="NIH Chest X-Ray Metadata — kaggle.com/datasets/nih-chest-xrays/data",
39
+ what_ai_predicts="Normal vs. Pneumonia from chest X-ray clinical metadata",
40
+ license_type="CC0 1.0",
41
+ license_url="https://creativecommons.org/publicdomain/zero/1.0/",
42
+ requires_attribution=False,
43
+ feature_names=[
44
+ "age", "sex", "view_position", "follow_up_number",
45
+ ],
46
+ clinical_context=(
47
+ "Community-acquired pneumonia is a leading cause of hospitalisation, particularly in "
48
+ "paediatric and elderly populations. Chest radiography is the standard diagnostic tool, "
49
+ "but interpretation requires specialist expertise not always available at point of care. "
50
+ "The NIH Chest X-Ray dataset contains over 100,000 frontal-view X-rays labelled across "
51
+ "14 pathology categories. This model uses extracted radiological metadata features "
52
+ "to distinguish normal findings from pneumonia, supporting rapid triage."
53
+ ),
54
+ ),
55
+ "nephrology_ckd": SpecialtyInfo(
56
+ id="nephrology_ckd",
57
+ name="Nephrology",
58
+ description="Classify patients as having chronic kidney disease or not from routine laboratory values.",
59
+ target_variable="classification",
60
+ target_type="binary",
61
+ data_source="UCI CKD Dataset — archive.ics.uci.edu/dataset/336/chronic+kidney+disease",
62
+ what_ai_predicts="Chronic kidney disease (ckd vs. notckd) from routine lab values",
63
+ license_type="CC BY 4.0",
64
+ license_url="https://creativecommons.org/licenses/by/4.0/",
65
+ requires_attribution=True,
66
+ feature_names=[
67
+ "age", "blood_pressure", "specific_gravity", "albumin", "sugar",
68
+ "red_blood_cells", "pus_cell", "blood_glucose_random", "blood_urea",
69
+ "serum_creatinine", "sodium", "haemoglobin",
70
+ "packed_cell_volume", "hypertension", "diabetes_mellitus",
71
+ ],
72
+ clinical_context=(
73
+ "Chronic kidney disease affects approximately 10% of the global population and is "
74
+ "a major risk factor for cardiovascular disease and end-stage renal failure. "
75
+ "Early detection through routine blood and urine tests enables timely intervention "
76
+ "to slow disease progression. Key biomarkers include serum creatinine, haemoglobin, "
77
+ "and specific gravity of urine. This model classifies patients into CKD or non-CKD "
78
+ "categories using 15 routine laboratory and clinical measurements."
79
+ ),
80
+ ),
81
+ "oncology_breast": SpecialtyInfo(
82
+ id="oncology_breast",
83
+ name="Oncology — Breast",
84
+ description="Classify breast biopsies as malignant or benign from cell nucleus measurements.",
85
+ target_variable="diagnosis",
86
+ target_type="binary",
87
+ data_source="Breast Cancer Wisconsin — archive.ics.uci.edu/dataset/17/breast+cancer+wisconsin+diagnostic",
88
+ what_ai_predicts="Malignancy of a breast biopsy from fine-needle aspirate cell measurements",
89
+ license_type="CC BY 4.0",
90
+ license_url="https://creativecommons.org/licenses/by/4.0/",
91
+ requires_attribution=True,
92
+ feature_names=[
93
+ "mean_radius", "mean_texture", "mean_perimeter", "mean_area",
94
+ "mean_smoothness", "mean_compactness", "mean_concavity",
95
+ "mean_concave_points", "mean_symmetry", "worst_radius",
96
+ "worst_texture", "worst_perimeter", "worst_area", "worst_smoothness",
97
+ ],
98
+ clinical_context=(
99
+ "Breast cancer is the most common cancer in women worldwide, with early detection "
100
+ "being critical for survival outcomes. Fine needle aspiration biopsies provide "
101
+ "cellular material that can be analysed to determine malignancy. "
102
+ "The Wisconsin dataset contains measurements of cell nuclei features extracted "
103
+ "from digitised images of fine needle aspirates. This model classifies tumours "
104
+ "as malignant (M) or benign (B) based on 14 geometric and textural features "
105
+ "of cell nuclei, achieving clinical-grade discrimination performance."
106
+ ),
107
+ ),
108
+ "neurology_parkinsons": SpecialtyInfo(
109
+ id="neurology_parkinsons",
110
+ name="Neurology — Parkinson's",
111
+ description="Detect Parkinson's disease from vocal biomarkers extracted via sustained phonation.",
112
+ target_variable="status",
113
+ target_type="binary",
114
+ data_source="UCI Parkinson's Dataset — archive.ics.uci.edu/dataset/174/parkinsons",
115
+ what_ai_predicts="Parkinson's disease presence from voice biomarkers",
116
+ license_type="CC BY 4.0",
117
+ license_url="https://creativecommons.org/licenses/by/4.0/",
118
+ requires_attribution=True,
119
+ feature_names=[
120
+ "MDVP_Fo_Hz", "MDVP_Fhi_Hz", "MDVP_Flo_Hz",
121
+ "MDVP_Jitter_pct", "MDVP_Jitter_Abs", "MDVP_RAP", "MDVP_PPQ", "Jitter_DDP",
122
+ "MDVP_Shimmer", "MDVP_Shimmer_dB", "Shimmer_APQ3", "Shimmer_APQ5",
123
+ "MDVP_APQ", "Shimmer_DDA",
124
+ "NHR", "HNR", "RPDE", "DFA", "spread1", "spread2", "D2", "PPE",
125
+ ],
126
+ clinical_context=(
127
+ "Parkinson's disease is a progressive neurodegenerative disorder affecting "
128
+ "approximately 10 million people globally. Vocal tremor and dysphonia are "
129
+ "among the earliest and most consistent symptoms, often preceding motor symptoms. "
130
+ "Voice recordings can be analysed non-invasively to extract biomarkers of vocal "
131
+ "instability including jitter, shimmer, and harmonics-to-noise ratio. "
132
+ "This model uses 17 voice measurement features to classify patients as "
133
+ "having Parkinson's disease (status=1) or healthy controls (status=0)."
134
+ ),
135
+ ),
136
+ "endocrinology_diabetes": SpecialtyInfo(
137
+ id="endocrinology_diabetes",
138
+ name="Endocrinology — Diabetes",
139
+ description="Predict diabetes onset within 5 years from metabolic and demographic markers.",
140
+ target_variable="Outcome",
141
+ target_type="binary",
142
+ data_source="Pima Indians Diabetes — kaggle.com/datasets/uciml/pima-indians-diabetes-database",
143
+ what_ai_predicts="Diabetes onset within 5 years from metabolic markers",
144
+ license_type="CC0 1.0 / CC BY 4.0",
145
+ license_url="https://creativecommons.org/publicdomain/zero/1.0/",
146
+ requires_attribution=True,
147
+ feature_names=[
148
+ "pregnancies", "glucose", "blood_pressure", "skin_thickness",
149
+ "insulin", "bmi", "diabetes_pedigree_function", "age",
150
+ ],
151
+ clinical_context=(
152
+ "Type 2 diabetes affects over 400 million people globally, with millions more "
153
+ "at risk due to metabolic syndrome and lifestyle factors. Early identification "
154
+ "of high-risk individuals enables preventive interventions including dietary "
155
+ "changes, exercise, and pharmacological treatment. "
156
+ "The Pima Indians dataset contains metabolic measurements from a population "
157
+ "with high diabetes prevalence. This model predicts diabetes onset within "
158
+ "5 years using 8 clinical and laboratory features including fasting glucose, "
159
+ "BMI, and diabetes pedigree function."
160
+ ),
161
+ ),
162
+ "hepatology_liver": SpecialtyInfo(
163
+ id="hepatology_liver",
164
+ name="Hepatology — Liver",
165
+ description="Identify liver disease from routine blood test results.",
166
+ target_variable="Dataset",
167
+ target_type="binary",
168
+ data_source="Indian Liver Patient Dataset — archive.ics.uci.edu/dataset/225/ilpd+indian+liver+patient+dataset",
169
+ what_ai_predicts="Liver disease vs. healthy from blood test results",
170
+ license_type="CC BY 4.0",
171
+ license_url="https://creativecommons.org/licenses/by/4.0/",
172
+ requires_attribution=True,
173
+ feature_names=[
174
+ "age", "gender", "total_bilirubin", "direct_bilirubin",
175
+ "alkaline_phosphotase", "alamine_aminotransferase",
176
+ "aspartate_aminotransferase", "total_proteins",
177
+ "albumin", "albumin_globulin_ratio",
178
+ ],
179
+ clinical_context=(
180
+ "Liver disease encompasses a spectrum of conditions from fatty liver to cirrhosis "
181
+ "and hepatocellular carcinoma, representing a major global health burden. "
182
+ "Biochemical liver function tests provide quantitative markers of hepatic injury "
183
+ "and synthetic function. Early detection through blood test abnormalities "
184
+ "allows timely referral and treatment. "
185
+ "This model uses 10 routine liver function test parameters to classify "
186
+ "patients as having liver disease or not, supporting clinical triage decisions."
187
+ ),
188
+ ),
189
+ "cardiology_stroke": SpecialtyInfo(
190
+ id="cardiology_stroke",
191
+ name="Cardiology — Stroke",
192
+ description="Predict stroke risk from demographics, comorbidities, and lifestyle factors.",
193
+ target_variable="stroke",
194
+ target_type="binary",
195
+ data_source="Stroke Prediction Dataset — kaggle.com/datasets/fedesoriano/stroke-prediction-dataset",
196
+ what_ai_predicts="Stroke occurrence from demographics and comorbidities",
197
+ license_type="No formal license",
198
+ license_url="",
199
+ requires_attribution=False,
200
+ feature_names=[
201
+ "gender", "age", "hypertension", "heart_disease", "ever_married",
202
+ "work_type", "residence_type", "avg_glucose_level", "bmi", "smoking_status",
203
+ ],
204
+ clinical_context=(
205
+ "Stroke is the second leading cause of death globally and the leading cause "
206
+ "of long-term disability. Identifying high-risk individuals enables preventive "
207
+ "interventions such as anticoagulation, blood pressure control, and lifestyle "
208
+ "modification. Key risk factors include hypertension, atrial fibrillation, "
209
+ "diabetes, and smoking. "
210
+ "This model uses 10 demographic, clinical, and lifestyle variables to predict "
211
+ "stroke occurrence, supporting population-level screening and risk stratification."
212
+ ),
213
+ ),
214
+ "mental_health": SpecialtyInfo(
215
+ id="mental_health",
216
+ name="Mental Health",
217
+ description="Predict history of mental illness from lifestyle, demographic, and behavioural factors.",
218
+ target_variable="severity_class",
219
+ target_type="binary",
220
+ data_source="Depression Dataset — kaggle.com/datasets/anthonytherrien/depression-dataset",
221
+ what_ai_predicts="History of mental illness (has_condition / no_condition) from lifestyle and demographic data",
222
+ license_type="CC BY-SA 4.0",
223
+ license_url="https://creativecommons.org/licenses/by-sa/4.0/",
224
+ requires_attribution=True,
225
+ feature_names=[
226
+ "age", "number_of_children", "income", "dietary_habits", "sleep_patterns",
227
+ "alcohol_consumption", "physical_activity_level", "smoking_status",
228
+ "employment_status", "history_substance_abuse",
229
+ "family_history_depression", "chronic_medical_conditions",
230
+ "marital_status", "education_level",
231
+ ],
232
+ clinical_context=(
233
+ "Depression is the leading cause of disability worldwide, affecting over 280 million "
234
+ "people. The PHQ-9 questionnaire is a validated screening tool used in primary care "
235
+ "to assess depression severity across four categories: minimal, mild, moderate, "
236
+ "and severe. Accurate severity classification guides treatment decisions from "
237
+ "watchful waiting to pharmacotherapy and referral to specialist mental health services. "
238
+ "This model classifies depression severity using lifestyle, occupational, "
239
+ "and demographic factors alongside validated symptom responses."
240
+ ),
241
+ ),
242
+ "pulmonology_copd": SpecialtyInfo(
243
+ id="pulmonology_copd",
244
+ name="Pulmonology — COPD",
245
+ description="Predict COPD exacerbation risk from spirometry and clinical EHR data.",
246
+ target_variable="exacerbation",
247
+ target_type="binary",
248
+ data_source="COPD Dataset — kaggle.com/datasets/prakharrathi25/copd-student-dataset",
249
+ what_ai_predicts="COPD acute exacerbation risk from spirometry and EHR data",
250
+ license_type="CC0 1.0",
251
+ license_url="https://creativecommons.org/publicdomain/zero/1.0/",
252
+ requires_attribution=False,
253
+ feature_names=[
254
+ "age", "sex", "smoking_pack_years", "fev1_litres", "fvc_litres",
255
+ "fev1_fvc_ratio", "prior_exacerbations_year", "bmi",
256
+ "mrc_dyspnea_scale", "sgrq_score", "copd_gold_stage",
257
+ ],
258
+ clinical_context=(
259
+ "Chronic obstructive pulmonary disease (COPD) affects approximately 300 million "
260
+ "people and is a leading cause of morbidity and mortality. Acute exacerbations "
261
+ "are episodes of worsening symptoms requiring increased treatment and are a major "
262
+ "driver of hospitalisation and disease progression. "
263
+ "Spirometry measurements, particularly FEV1 and the FEV1/FVC ratio, are "
264
+ "the gold standard for COPD diagnosis and staging. "
265
+ "This model predicts the risk of acute exacerbation using clinical, "
266
+ "spirometric, and patient-reported outcome measures from the Kaggle COPD patient dataset."
267
+ ),
268
+ ),
269
+ "haematology_anaemia": SpecialtyInfo(
270
+ id="haematology_anaemia",
271
+ name="Haematology — Anaemia",
272
+ description="Detect anaemia from full blood count indices including haemoglobin, MCV, MCH, and MCHC.",
273
+ target_variable="anemia_type",
274
+ target_type="multiclass",
275
+ data_source="Anaemia Classification Dataset — kaggle.com/datasets/biswaranjanrao/anemia-dataset",
276
+ what_ai_predicts="Type of anaemia from full blood count (iron deficiency / megaloblastic / normocytic / normal)",
277
+ license_type="Unknown",
278
+ license_url="",
279
+ requires_attribution=False,
280
+ feature_names=[
281
+ "gender", "haemoglobin", "mch", "mchc", "mcv",
282
+ ],
283
+ clinical_context=(
284
+ "Anaemia affects approximately 1.62 billion people globally and is defined by "
285
+ "haemoglobin below 12 g/dL in women and 13 g/dL in men. Full blood count indices "
286
+ "including mean corpuscular volume (MCV), mean corpuscular haemoglobin (MCH), "
287
+ "and mean corpuscular haemoglobin concentration (MCHC) are routinely used to "
288
+ "screen for and characterise anaemia in primary care. Low MCV indicates "
289
+ "microcytic anaemia (typically iron deficiency), while elevated MCV suggests "
290
+ "macrocytic anaemia (B12 or folate deficiency). "
291
+ "This model classifies patients as anaemic or non-anaemic using five standard "
292
+ "full blood count parameters, supporting automated screening in high-volume settings."
293
+ ),
294
+ ),
295
+ "dermatology": SpecialtyInfo(
296
+ id="dermatology",
297
+ name="Dermatology",
298
+ description="Classify skin lesions as benign or malignant from HAM10000 dermoscopy metadata.",
299
+ target_variable="dx_type",
300
+ target_type="binary",
301
+ data_source="HAM10000 Metadata — Harvard Dataverse doi:10.7910/DVN/DBW86T",
302
+ what_ai_predicts="Benign vs. malignant skin lesion from dermoscopy metadata",
303
+ license_type="CC BY-NC 4.0",
304
+ license_url="https://creativecommons.org/licenses/by-nc/4.0/",
305
+ requires_attribution=True,
306
+ feature_names=[
307
+ "age", "sex", "localization",
308
+ ],
309
+ clinical_context=(
310
+ "Melanoma and other skin cancers are among the most rapidly increasing malignancies "
311
+ "globally, with early detection being the primary determinant of survival. "
312
+ "Dermoscopy improves diagnostic accuracy compared to naked-eye examination, "
313
+ "but requires specialist training. The HAM10000 dataset contains over 10,000 "
314
+ "dermoscopic images with clinical metadata from seven diagnostic categories. "
315
+ "This model uses morphological and demographic features to distinguish benign "
316
+ "from malignant skin lesions, supporting earlier referral for biopsy."
317
+ ),
318
+ ),
319
+ "ophthalmology": SpecialtyInfo(
320
+ id="ophthalmology",
321
+ name="Ophthalmology",
322
+ description="Detect diabetic retinopathy from retinal image analysis features.",
323
+ target_variable="severity_grade",
324
+ target_type="binary",
325
+ data_source="Diabetic Retinopathy Debrecen Dataset — archive.ics.uci.edu/dataset/329/diabetic+retinopathy+debrecen+data+set",
326
+ what_ai_predicts="Presence of diabetic retinopathy signs from retinal analysis (0=No DR, 1=DR present)",
327
+ license_type="CC BY 4.0",
328
+ license_url="https://creativecommons.org/licenses/by/4.0/",
329
+ requires_attribution=True,
330
+ feature_names=[
331
+ "quality_assessment", "pre_screening", "ma_detection_0.5",
332
+ "ma_detection_0.6", "ma_detection_0.7", "ma_detection_0.8",
333
+ "ma_detection_0.9", "ma_detection_1.0",
334
+ "exudate_1", "exudate_2", "exudate_3", "exudate_4",
335
+ "exudate_5", "exudate_6", "exudate_7", "exudate_8",
336
+ "macula_od_distance", "optic_disc_diameter", "am_fm_classification",
337
+ ],
338
+ clinical_context=(
339
+ "Diabetic retinopathy is the leading cause of blindness in working-age adults globally, "
340
+ "affecting approximately one third of people with diabetes. Regular ophthalmological "
341
+ "screening is recommended but limited by specialist availability. "
342
+ "Grading retinopathy severity from mild non-proliferative to proliferative disease "
343
+ "determines urgency of laser treatment or anti-VEGF therapy. "
344
+ "This model classifies retinopathy severity grade using 10 clinical and "
345
+ "retinal examination features, prioritising high-risk patients for urgent review."
346
+ ),
347
+ ),
348
+ "orthopaedics": SpecialtyInfo(
349
+ id="orthopaedics",
350
+ name="Orthopaedics — Spine",
351
+ description="Classify spinal status as normal or abnormal from biomechanical measurements.",
352
+ target_variable="class",
353
+ target_type="binary",
354
+ data_source="Vertebral Column Dataset — archive.ics.uci.edu/dataset/212/vertebral+column",
355
+ what_ai_predicts="Normal vs. abnormal spinal status from pelvic biomechanical measurements",
356
+ license_type="CC BY 4.0",
357
+ license_url="https://creativecommons.org/licenses/by/4.0/",
358
+ requires_attribution=True,
359
+ feature_names=[
360
+ "pelvic_incidence", "pelvic_tilt", "lumbar_lordosis_angle",
361
+ "sacral_slope", "pelvic_radius", "degree_spondylolisthesis",
362
+ ],
363
+ clinical_context=(
364
+ "Spinal disorders including disc herniation and spondylolisthesis are among the "
365
+ "most common causes of chronic pain and disability worldwide. Biomechanical "
366
+ "measurements of the pelvis and lumbar spine provide objective indicators "
367
+ "of structural abnormality that complement clinical examination. "
368
+ "The UCI Vertebral Column dataset contains six orthopaedic measurements "
369
+ "extracted from lateral X-rays. This model classifies patients as having "
370
+ "normal spinal anatomy or an abnormal condition (disc herniation / spondylolisthesis)."
371
+ ),
372
+ ),
373
+ "icu_sepsis": SpecialtyInfo(
374
+ id="icu_sepsis",
375
+ name="ICU / Sepsis",
376
+ description="Predict sepsis onset from vital signs and laboratory results in ICU patients.",
377
+ target_variable="SepsisLabel",
378
+ target_type="binary",
379
+ data_source="PhysioNet Sepsis Dataset — physionet.org/content/challenge-2019/1.0.0/",
380
+ what_ai_predicts="Sepsis onset (SepsisLabel=1) from ICU vital signs and lab results",
381
+ license_type="CC BY 4.0",
382
+ license_url="https://creativecommons.org/licenses/by/4.0/",
383
+ requires_attribution=True,
384
+ feature_names=[
385
+ "HR", "O2Sat", "Temp", "SBP", "MAP", "Resp",
386
+ "BaseExcess", "pH", "PaCO2", "Lactate", "Creatinine",
387
+ "Bilirubin_total", "WBC", "Platelets", "Age", "Gender",
388
+ ],
389
+ clinical_context=(
390
+ "Sepsis is a life-threatening organ dysfunction caused by a dysregulated host "
391
+ "response to infection, with a mortality rate of 20–30% that rises to over 40% "
392
+ "for septic shock. Early identification and treatment within the first hour "
393
+ "significantly improves survival outcomes. "
394
+ "Vital signs and laboratory biomarkers such as lactate, procalcitonin, and "
395
+ "white blood cell count reflect the physiological derangement of sepsis. "
396
+ "This model uses routinely collected ICU monitoring data to predict sepsis "
397
+ "onset up to 6 hours before clinical diagnosis, enabling proactive management."
398
+ ),
399
+ ),
400
+ "obstetrics_fetal": SpecialtyInfo(
401
+ id="obstetrics_fetal",
402
+ name="Obstetrics — Fetal Health",
403
+ description="Classify fetal cardiotocography as normal, suspect, or pathological.",
404
+ target_variable="fetal_health",
405
+ target_type="multiclass",
406
+ data_source="Cardiotocography Dataset — archive.ics.uci.edu/dataset/193/cardiotocography",
407
+ what_ai_predicts="Fetal CTG classification: 1=Normal, 2=Suspect, 3=Pathological",
408
+ license_type="CC BY 4.0",
409
+ license_url="https://creativecommons.org/licenses/by/4.0/",
410
+ requires_attribution=True,
411
+ feature_names=[
412
+ "baseline_value", "accelerations", "fetal_movement",
413
+ "uterine_contractions", "light_decelerations", "severe_decelerations",
414
+ "prolongued_decelerations", "abnormal_short_term_variability",
415
+ "mean_value_short_term_variability", "pct_time_abnormal_long_term_variability",
416
+ "mean_value_long_term_variability", "histogram_mode",
417
+ ],
418
+ clinical_context=(
419
+ "Cardiotocography (CTG) is the standard method for monitoring fetal wellbeing "
420
+ "during pregnancy and labour, recording fetal heart rate and uterine contractions. "
421
+ "Abnormal CTG patterns may indicate fetal hypoxia requiring urgent intervention "
422
+ "such as emergency caesarean section. CTG interpretation is subjective and "
423
+ "varies between clinicians. "
424
+ "This model classifies CTG recordings into three categories — Normal (class 1), "
425
+ "Suspect (class 2), and Pathological (class 3) — using 12 quantitative "
426
+ "cardiotocography features to support consistent clinical decision-making."
427
+ ),
428
+ ),
429
+ "cardiology_arrhythmia": SpecialtyInfo(
430
+ id="cardiology_arrhythmia",
431
+ name="Cardiology — Arrhythmia",
432
+ description="Detect cardiac arrhythmia from ECG interval and waveform features.",
433
+ target_variable="arrhythmia",
434
+ target_type="binary",
435
+ data_source="UCI Arrhythmia Dataset — archive.ics.uci.edu/dataset/5/arrhythmia",
436
+ what_ai_predicts="Cardiac arrhythmia presence vs. normal sinus rhythm from ECG features",
437
+ license_type="CC BY 4.0",
438
+ license_url="https://creativecommons.org/licenses/by/4.0/",
439
+ requires_attribution=True,
440
+ feature_names=[
441
+ "age", "sex", "height", "weight", "QRS_duration",
442
+ "PR_interval", "QT_interval", "T_interval", "P_interval",
443
+ "QRS_axis", "T_axis", "P_axis", "heart_rate", "J_point",
444
+ "heart_rate_2",
445
+ "DI_R", "DI_S", "DI_T", "DI_P", "DI_QRSA", "DI_QRSTA",
446
+ "DII_R", "DII_S", "DII_T", "DII_P", "DII_QRSA", "DII_QRSTA",
447
+ "V1_R", "V1_S", "V1_T", "V1_P", "V5_R", "V5_S",
448
+ ],
449
+ clinical_context=(
450
+ "Cardiac arrhythmias encompass a diverse group of rhythm disorders ranging from "
451
+ "benign atrial ectopics to life-threatening ventricular fibrillation. "
452
+ "The 12-lead ECG is the primary diagnostic tool, providing measurements of "
453
+ "conduction intervals and waveform morphology. Automated arrhythmia detection "
454
+ "supports cardiac monitoring programs and remote cardiology services. "
455
+ "This model uses 13 ECG-derived parameters to classify patients as having "
456
+ "arrhythmia or normal cardiac rhythm, supporting cardiac screening programs."
457
+ ),
458
+ ),
459
+ "oncology_cervical": SpecialtyInfo(
460
+ id="oncology_cervical",
461
+ name="Oncology — Cervical",
462
+ description="Assess cervical cancer biopsy risk from demographic and behavioural risk factors.",
463
+ target_variable="Biopsy",
464
+ target_type="binary",
465
+ data_source="Cervical Cancer Dataset — archive.ics.uci.edu/dataset/383/cervical+cancer+risk+factors",
466
+ what_ai_predicts="Biopsy-confirmed cervical cancer from demographic and behavioural data",
467
+ license_type="CC BY 4.0",
468
+ license_url="https://creativecommons.org/licenses/by/4.0/",
469
+ requires_attribution=True,
470
+ feature_names=[
471
+ "age", "number_of_sexual_partners", "first_sexual_intercourse_age",
472
+ "num_of_pregnancies",
473
+ "smokes", "smokes_years",
474
+ "hormonal_contraceptives", "hormonal_contraceptives_years",
475
+ "iud", "iud_years",
476
+ "stds", "stds_number", "stds_condylomatosis",
477
+ "stds_cervical_condylomatosis", "stds_hpv",
478
+ "dx_cancer", "dx_cin", "dx_hpv", "dx",
479
+ "hinselmann", "schiller", "citology",
480
+ ],
481
+ clinical_context=(
482
+ "Cervical cancer is the fourth most common cancer in women globally, with "
483
+ "persistent HPV infection being the primary causative factor. Risk stratification "
484
+ "using demographic and behavioural data can identify women who require "
485
+ "expedited colposcopy or biopsy. Early detection through cytological and "
486
+ "histological examination enables curative treatment. "
487
+ "This model uses 11 demographic, sexual health, and medical history variables "
488
+ "to predict biopsy-confirmed cervical cancer, supporting targeted screening "
489
+ "in resource-limited settings."
490
+ ),
491
+ ),
492
+ "thyroid": SpecialtyInfo(
493
+ id="thyroid",
494
+ name="Thyroid / Endocrinology",
495
+ description="Classify thyroid function as hypothyroid, hyperthyroid, or normal from biochemical assay results.",
496
+ target_variable="class",
497
+ target_type="multiclass",
498
+ data_source="UCI New Thyroid Dataset — archive.ics.uci.edu/dataset/102/thyroid+disease",
499
+ what_ai_predicts="Thyroid function classification (hyperthyroid / normal / hypothyroid)",
500
+ license_type="CC BY 4.0",
501
+ license_url="https://creativecommons.org/licenses/by/4.0/",
502
+ requires_attribution=True,
503
+ feature_names=[
504
+ "T3_resin_uptake", "total_serum_thyroxine", "T3", "TSH", "max_abs_diff_TSH",
505
+ ],
506
+ clinical_context=(
507
+ "Thyroid dysfunction affects approximately 5% of the global population. "
508
+ "Hyperthyroidism (excess hormone) and hypothyroidism (deficiency) are diagnosed "
509
+ "primarily through laboratory thyroid function tests. "
510
+ "The T3 resin uptake reflects thyroid hormone binding capacity, "
511
+ "total serum thyroxine (T4) measures overall hormone production, "
512
+ "and TSH (thyroid-stimulating hormone) is the most sensitive marker of thyroid status. "
513
+ "This model uses 5 biochemical assay values from the UCI New Thyroid dataset "
514
+ "to classify patients into three categories — hyperthyroid, normal, or hypothyroid — "
515
+ "supporting primary care screening and endocrinology referral decisions."
516
+ ),
517
+ ),
518
+ "pharmacy_readmission": SpecialtyInfo(
519
+ id="pharmacy_readmission",
520
+ name="Pharmacy — Readmission",
521
+ description="Predict hospital readmission risk for diabetic inpatients using medication and clinical data.",
522
+ target_variable="readmitted",
523
+ target_type="multiclass",
524
+ data_source="Diabetes 130-US Hospitals Dataset — archive.ics.uci.edu/dataset/296/diabetes+130-us+hospitals+for+years+1999-2008",
525
+ what_ai_predicts="Readmission risk: <30 days / >30 days / NO from medication and utilisation data",
526
+ license_type="CC BY 4.0",
527
+ license_url="https://creativecommons.org/licenses/by/4.0/",
528
+ requires_attribution=True,
529
+ feature_names=[
530
+ "age", "gender", "time_in_hospital", "num_lab_procedures",
531
+ "num_procedures", "num_medications", "number_outpatient",
532
+ "number_emergency", "number_inpatient", "number_diagnoses",
533
+ "max_glu_serum", "A1Cresult", "metformin", "insulin", "change",
534
+ "discharge_disposition_id", "admission_type_id",
535
+ "admission_source_id", "diag_1",
536
+ ],
537
+ clinical_context=(
538
+ "Hospital readmission within 30 days is a key quality indicator and financial "
539
+ "penalty trigger under value-based care programmes. Diabetic patients have "
540
+ "disproportionately high readmission rates due to complex medication regimens, "
541
+ "comorbidities, and glycaemic instability. "
542
+ "The UCI 130-US Hospitals dataset contains over 100,000 diabetic patient "
543
+ "encounters from 130 US hospitals over 10 years. "
544
+ "This model classifies patients into three readmission risk groups — "
545
+ "within 30 days, after 30 days, or no readmission — using 15 clinical, "
546
+ "medication, and utilisation variables to guide discharge planning."
547
+ ),
548
+ ),
549
+ }
550
+
551
+
552
+ def get_specialty(specialty_id: str) -> SpecialtyInfo | None:
553
+ """Look up one specialty by id, return `None` if unknown."""
554
+ return SPECIALTIES.get(specialty_id)
555
+
556
+
557
+ def list_specialties() -> list[SpecialtyInfo]:
558
+ """Return the full registry as a list, in the order the Step-1 picker expects."""
559
+ return list(SPECIALTIES.values())
app/utils/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Shared utility helpers used across services."""
arena/__init__.py ADDED
File without changes
arena/router.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Model Arena REST endpoints."""
2
+ from __future__ import annotations
3
+
4
+ import logging
5
+
6
+ from fastapi import APIRouter, HTTPException, Request, status
7
+ from fastapi.responses import Response
8
+
9
+ from .schemas import (
10
+ ArenaCompareRequest,
11
+ ArenaCompareResponse,
12
+ ArenaRun,
13
+ BatchTrainRequest,
14
+ BatchTrainResponse,
15
+ )
16
+
17
+ logger = logging.getLogger(__name__)
18
+ router = APIRouter(prefix="/api/arena", tags=["arena"])
19
+
20
+
21
+ def _get_arena_service(request: Request):
22
+ return request.app.state.arena_service
23
+
24
+
25
+ @router.post("/batch-train", response_model=BatchTrainResponse)
26
+ def batch_train(request: Request, body: BatchTrainRequest) -> BatchTrainResponse:
27
+ """Train multiple models in one request."""
28
+ arena = _get_arena_service(request)
29
+ logger.info("Arena batch_train: session=%s models=%d", body.session_id, len(body.models))
30
+ try:
31
+ result = arena.batch_train(body)
32
+ completed = sum(1 for r in result.runs if r.status == "completed")
33
+ logger.info("Arena batch_train done: %d/%d completed", completed, len(result.runs))
34
+ return result
35
+ except (ValueError, KeyError) as exc:
36
+ raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(exc))
37
+ except Exception as exc:
38
+ logger.exception("Batch training failed")
39
+ raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(exc))
40
+
41
+
42
+ @router.get("/runs/{session_id}", response_model=list[ArenaRun])
43
+ def get_runs(request: Request, session_id: str) -> list[ArenaRun]:
44
+ """Get all arena runs for a session."""
45
+ arena = _get_arena_service(request)
46
+ # Return empty list if session has no arena runs yet but ML session exists
47
+ ml_service = request.app.state.ml_service
48
+ if not arena.has_session(session_id) and ml_service.get_session(session_id) is None:
49
+ raise HTTPException(
50
+ status_code=status.HTTP_404_NOT_FOUND,
51
+ detail=f"Session '{session_id}' not found",
52
+ )
53
+ return arena.get_runs(session_id)
54
+
55
+
56
+ @router.post("/compare/{session_id}", response_model=ArenaCompareResponse)
57
+ def compare_runs(
58
+ request: Request, session_id: str, body: ArenaCompareRequest
59
+ ) -> ArenaCompareResponse:
60
+ """Compare selected runs."""
61
+ arena = _get_arena_service(request)
62
+ try:
63
+ return arena.compare_runs(session_id, body.run_ids)
64
+ except ValueError as exc:
65
+ raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc))
66
+
67
+
68
+ @router.delete("/runs/{session_id}", status_code=204)
69
+ def clear_runs(request: Request, session_id: str):
70
+ """Clear all arena runs for a session."""
71
+ _get_arena_service(request).clear_runs(session_id)
72
+ return Response(status_code=204)
arena/schemas.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Pydantic schemas for Model Arena."""
2
+ from __future__ import annotations
3
+
4
+ from typing import Any, Literal
5
+
6
+ from pydantic import BaseModel, Field, field_validator
7
+
8
+ from app.models.ml_schemas import KNNScatterData, MetricsResponse, ModelType
9
+
10
+
11
+ class ArenaModelConfig(BaseModel):
12
+ """One model to train in a batch."""
13
+ model_type: ModelType
14
+ params: dict[str, Any] = Field(default_factory=dict)
15
+ tune: bool = False
16
+ use_feature_selection: bool = False
17
+
18
+
19
+ class BatchTrainRequest(BaseModel):
20
+ """Request to train multiple models on the same session."""
21
+ session_id: str
22
+ models: list[ArenaModelConfig] = Field(..., min_length=1, max_length=8)
23
+
24
+
25
+ class ArenaRun(BaseModel):
26
+ """A single trained model run in the arena."""
27
+ run_id: str
28
+ model_id: str
29
+ model_type: ModelType
30
+ params: dict[str, Any]
31
+ metrics: MetricsResponse | None = None # None for failed runs
32
+ training_time_ms: float
33
+ feature_names: list[str]
34
+ knn_scatter: KNNScatterData | None = None
35
+ status: Literal["completed", "failed"] = "completed"
36
+ error: str | None = None
37
+
38
+
39
+ class BatchTrainResponse(BaseModel):
40
+ """Response from batch training."""
41
+ session_id: str
42
+ runs: list[ArenaRun]
43
+ total_training_time_ms: float
44
+ best_run_id: str | None = None
45
+
46
+
47
+ class ArenaCompareRequest(BaseModel):
48
+ """Request to compare specific runs."""
49
+ run_ids: list[str] = Field(..., min_length=2, max_length=8)
50
+
51
+ @field_validator("run_ids")
52
+ @classmethod
53
+ def no_duplicates(cls, v: list[str]) -> list[str]:
54
+ if len(v) != len(set(v)):
55
+ raise ValueError("run_ids must be unique")
56
+ return v
57
+
58
+
59
+ class ArenaCompareResponse(BaseModel):
60
+ """Comparison data for selected runs."""
61
+ runs: list[ArenaRun]
62
+ best_run_id: str
63
+ metric_summary: dict[str, dict[str, float]] # metric_name -> {run_id: value}
64
+ param_diff: dict[str, dict[str, Any]] # param_name -> {run_id: value} (only differing params)
arena/service.py ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Arena service -- batch training and run management."""
2
+ from __future__ import annotations
3
+
4
+ import logging
5
+ import threading
6
+ import uuid
7
+ from collections import OrderedDict
8
+ from typing import Any
9
+
10
+ from app.services.ml_service import MLService
11
+
12
+ from .schemas import (
13
+ ArenaCompareResponse,
14
+ ArenaModelConfig,
15
+ ArenaRun,
16
+ BatchTrainRequest,
17
+ BatchTrainResponse,
18
+ )
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+ _MAX_SESSIONS = 50
23
+
24
+
25
+ class ArenaService:
26
+ def __init__(self, ml_service: MLService) -> None:
27
+ self._ml = ml_service
28
+ self._lock = threading.Lock()
29
+ # session_id -> list of ArenaRun (LRU-evicted at _MAX_SESSIONS)
30
+ self._runs: OrderedDict[str, list[ArenaRun]] = OrderedDict()
31
+ # Track sessions currently being batch-trained to prevent duplicates
32
+ self._in_flight: set[str] = set()
33
+
34
+ def batch_train(self, request: BatchTrainRequest) -> BatchTrainResponse:
35
+ """Train multiple models sequentially on the same session."""
36
+ # Pre-flight: verify session exists (raises KeyError → router returns 404)
37
+ if self._ml.get_session(request.session_id) is None:
38
+ raise KeyError(f"Session '{request.session_id}' not found. Run /api/prepare first.")
39
+
40
+ # Guard against concurrent batch_train for same session
41
+ with self._lock:
42
+ if request.session_id in self._in_flight:
43
+ raise ValueError(
44
+ f"Batch training already in progress for session '{request.session_id}'"
45
+ )
46
+ self._in_flight.add(request.session_id)
47
+
48
+ try:
49
+ runs: list[ArenaRun] = []
50
+ total_time = 0.0
51
+
52
+ for model_cfg in request.models:
53
+ run_id = str(uuid.uuid4())
54
+ try:
55
+ response = self._ml.train_and_evaluate(
56
+ request.session_id,
57
+ model_cfg.model_type,
58
+ model_cfg.params,
59
+ tune=model_cfg.tune,
60
+ use_feature_selection=model_cfg.use_feature_selection,
61
+ )
62
+ self._ml.store_train_response_in_model(response.model_id, response)
63
+ run = ArenaRun(
64
+ run_id=run_id,
65
+ model_id=response.model_id,
66
+ model_type=model_cfg.model_type,
67
+ params=response.params,
68
+ metrics=response.metrics,
69
+ training_time_ms=response.training_time_ms,
70
+ feature_names=response.feature_names,
71
+ knn_scatter=response.knn_scatter,
72
+ )
73
+ total_time += response.training_time_ms
74
+ except (ImportError, MemoryError):
75
+ raise # Non-recoverable — propagate to router as 500
76
+ except Exception as exc:
77
+ logger.warning("Arena: model %s failed: %s", model_cfg.model_type.value, exc)
78
+ run = ArenaRun(
79
+ run_id=run_id,
80
+ model_id="",
81
+ model_type=model_cfg.model_type,
82
+ params=model_cfg.params,
83
+ metrics=None,
84
+ training_time_ms=0.0,
85
+ feature_names=[],
86
+ status="failed",
87
+ error=str(exc),
88
+ )
89
+ runs.append(run)
90
+
91
+ # Store runs with LRU eviction
92
+ with self._lock:
93
+ if request.session_id not in self._runs:
94
+ self._runs[request.session_id] = []
95
+ self._runs[request.session_id].extend(runs)
96
+ self._runs.move_to_end(request.session_id)
97
+ while len(self._runs) > _MAX_SESSIONS:
98
+ self._runs.popitem(last=False)
99
+
100
+ # Compute best across ALL session runs (not just this batch)
101
+ all_completed = [
102
+ r for r in self._runs.get(request.session_id, [])
103
+ if r.status == "completed" and r.metrics is not None
104
+ ]
105
+
106
+ best_id = None
107
+ if all_completed:
108
+ best = max(all_completed, key=lambda r: r.metrics.auc_roc) # type: ignore[union-attr]
109
+ best_id = best.run_id
110
+
111
+ return BatchTrainResponse(
112
+ session_id=request.session_id,
113
+ runs=runs,
114
+ total_training_time_ms=total_time,
115
+ best_run_id=best_id,
116
+ )
117
+ finally:
118
+ with self._lock:
119
+ self._in_flight.discard(request.session_id)
120
+
121
+ def get_runs(self, session_id: str) -> list[ArenaRun]:
122
+ """Get all arena runs for a session."""
123
+ with self._lock:
124
+ return list(self._runs.get(session_id, []))
125
+
126
+ def has_session(self, session_id: str) -> bool:
127
+ """Check if a session has any arena runs."""
128
+ with self._lock:
129
+ return session_id in self._runs
130
+
131
+ def get_run(self, session_id: str, run_id: str) -> ArenaRun | None:
132
+ """Get a specific run."""
133
+ with self._lock:
134
+ for run in self._runs.get(session_id, []):
135
+ if run.run_id == run_id:
136
+ return run
137
+ return None
138
+
139
+ def compare_runs(self, session_id: str, run_ids: list[str]) -> ArenaCompareResponse:
140
+ """Build comparison data for selected runs."""
141
+ with self._lock:
142
+ all_runs = self._runs.get(session_id, [])
143
+ all_run_ids = {r.run_id for r in all_runs}
144
+ selected = [
145
+ r for r in all_runs
146
+ if r.run_id in run_ids and r.status == "completed" and r.metrics is not None
147
+ ]
148
+
149
+ # Check for missing run IDs
150
+ missing = [rid for rid in run_ids if rid not in all_run_ids]
151
+ if missing:
152
+ raise ValueError(f"Run IDs not found in session '{session_id}': {missing}")
153
+
154
+ # Check for runs that exist but are failed
155
+ selected_ids = {r.run_id for r in selected}
156
+ failed = [rid for rid in run_ids if rid in all_run_ids and rid not in selected_ids]
157
+ if failed:
158
+ raise ValueError(f"Run IDs exist but are in failed state: {failed}")
159
+
160
+ if len(selected) < 2:
161
+ raise ValueError("Need at least 2 completed runs to compare")
162
+
163
+ # Build metric summary: metric_name -> {run_id: value}
164
+ metric_names = [
165
+ "accuracy", "sensitivity", "specificity", "precision",
166
+ "f1_score", "auc_roc", "mcc", "train_accuracy",
167
+ ]
168
+ metric_summary: dict[str, dict[str, float]] = {}
169
+ for name in metric_names:
170
+ metric_summary[name] = {
171
+ r.run_id: getattr(r.metrics, name) for r in selected
172
+ }
173
+
174
+ # Build param diff: only params that differ across runs
175
+ all_params: dict[str, dict[str, Any]] = {}
176
+ for r in selected:
177
+ for k, v in r.params.items():
178
+ if k not in all_params:
179
+ all_params[k] = {}
180
+ all_params[k][r.run_id] = v
181
+
182
+ param_diff = {
183
+ k: vals for k, vals in all_params.items()
184
+ if len(set(str(v) for v in vals.values())) > 1
185
+ }
186
+
187
+ best = max(selected, key=lambda r: r.metrics.auc_roc) # type: ignore[union-attr]
188
+
189
+ return ArenaCompareResponse(
190
+ runs=selected,
191
+ best_run_id=best.run_id,
192
+ metric_summary=metric_summary,
193
+ param_diff=param_diff,
194
+ )
195
+
196
+ def clear_runs(self, session_id: str) -> None:
197
+ """Clear all runs for a session."""
198
+ with self._lock:
199
+ self._runs.pop(session_id, None)
data_cache/cardiology_arrhythmia.csv ADDED
The diff for this file is too large to render. See raw diff
 
data_cache/cardiology_hf.csv ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
2
+ 75,0,582,0,20,1,265000,1.9,130,1,0,4,1
3
+ 55,0,7861,0,38,0,263358.03,1.1,136,1,0,6,1
4
+ 65,0,146,0,20,0,162000,1.3,129,1,1,7,1
5
+ 50,1,111,0,20,0,210000,1.9,137,1,0,7,1
6
+ 65,1,160,1,20,0,327000,2.7,116,0,0,8,1
7
+ 90,1,47,0,40,1,204000,2.1,132,1,1,8,1
8
+ 75,1,246,0,15,0,127000,1.2,137,1,0,10,1
9
+ 60,1,315,1,60,0,454000,1.1,131,1,1,10,1
10
+ 65,0,157,0,65,0,263358.03,1.5,138,0,0,10,1
11
+ 80,1,123,0,35,1,388000,9.4,133,1,1,10,1
12
+ 75,1,81,0,38,1,368000,4,131,1,1,10,1
13
+ 62,0,231,0,25,1,253000,0.9,140,1,1,10,1
14
+ 45,1,981,0,30,0,136000,1.1,137,1,0,11,1
15
+ 50,1,168,0,38,1,276000,1.1,137,1,0,11,1
16
+ 49,1,80,0,30,1,427000,1,138,0,0,12,0
17
+ 82,1,379,0,50,0,47000,1.3,136,1,0,13,1
18
+ 87,1,149,0,38,0,262000,0.9,140,1,0,14,1
19
+ 45,0,582,0,14,0,166000,0.8,127,1,0,14,1
20
+ 70,1,125,0,25,1,237000,1,140,0,0,15,1
21
+ 48,1,582,1,55,0,87000,1.9,121,0,0,15,1
22
+ 65,1,52,0,25,1,276000,1.3,137,0,0,16,0
23
+ 65,1,128,1,30,1,297000,1.6,136,0,0,20,1
24
+ 68,1,220,0,35,1,289000,0.9,140,1,1,20,1
25
+ 53,0,63,1,60,0,368000,0.8,135,1,0,22,0
26
+ 75,0,582,1,30,1,263358.03,1.83,134,0,0,23,1
27
+ 80,0,148,1,38,0,149000,1.9,144,1,1,23,1
28
+ 95,1,112,0,40,1,196000,1,138,0,0,24,1
29
+ 70,0,122,1,45,1,284000,1.3,136,1,1,26,1
30
+ 58,1,60,0,38,0,153000,5.8,134,1,0,26,1
31
+ 82,0,70,1,30,0,200000,1.2,132,1,1,26,1
32
+ 94,0,582,1,38,1,263358.03,1.83,134,1,0,27,1
33
+ 85,0,23,0,45,0,360000,3,132,1,0,28,1
34
+ 50,1,249,1,35,1,319000,1,128,0,0,28,1
35
+ 50,1,159,1,30,0,302000,1.2,138,0,0,29,0
36
+ 65,0,94,1,50,1,188000,1,140,1,0,29,1
37
+ 69,0,582,1,35,0,228000,3.5,134,1,0,30,1
38
+ 90,1,60,1,50,0,226000,1,134,1,0,30,1
39
+ 82,1,855,1,50,1,321000,1,145,0,0,30,1
40
+ 60,0,2656,1,30,0,305000,2.3,137,1,0,30,0
41
+ 60,0,235,1,38,0,329000,3,142,0,0,30,1
42
+ 70,0,582,0,20,1,263358.03,1.83,134,1,1,31,1
43
+ 50,0,124,1,30,1,153000,1.2,136,0,1,32,1
44
+ 70,0,571,1,45,1,185000,1.2,139,1,1,33,1
45
+ 72,0,127,1,50,1,218000,1,134,1,0,33,0
46
+ 60,1,588,1,60,0,194000,1.1,142,0,0,33,1
47
+ 50,0,582,1,38,0,310000,1.9,135,1,1,35,1
48
+ 51,0,1380,0,25,1,271000,0.9,130,1,0,38,1
49
+ 60,0,582,1,38,1,451000,0.6,138,1,1,40,1
50
+ 80,1,553,0,20,1,140000,4.4,133,1,0,41,1
51
+ 57,1,129,0,30,0,395000,1,140,0,0,42,1
52
+ 68,1,577,0,25,1,166000,1,138,1,0,43,1
53
+ 53,1,91,0,20,1,418000,1.4,139,0,0,43,1
54
+ 60,0,3964,1,62,0,263358.03,6.8,146,0,0,43,1
55
+ 70,1,69,1,50,1,351000,1,134,0,0,44,1
56
+ 60,1,260,1,38,0,255000,2.2,132,0,1,45,1
57
+ 95,1,371,0,30,0,461000,2,132,1,0,50,1
58
+ 70,1,75,0,35,0,223000,2.7,138,1,1,54,0
59
+ 60,1,607,0,40,0,216000,0.6,138,1,1,54,0
60
+ 49,0,789,0,20,1,319000,1.1,136,1,1,55,1
61
+ 72,0,364,1,20,1,254000,1.3,136,1,1,59,1
62
+ 45,0,7702,1,25,1,390000,1,139,1,0,60,1
63
+ 50,0,318,0,40,1,216000,2.3,131,0,0,60,1
64
+ 55,0,109,0,35,0,254000,1.1,139,1,1,60,0
65
+ 45,0,582,0,35,0,385000,1,145,1,0,61,1
66
+ 45,0,582,0,80,0,263358.03,1.18,137,0,0,63,0
67
+ 60,0,68,0,20,0,119000,2.9,127,1,1,64,1
68
+ 42,1,250,1,15,0,213000,1.3,136,0,0,65,1
69
+ 72,1,110,0,25,0,274000,1,140,1,1,65,1
70
+ 70,0,161,0,25,0,244000,1.2,142,0,0,66,1
71
+ 65,0,113,1,25,0,497000,1.83,135,1,0,67,1
72
+ 41,0,148,0,40,0,374000,0.8,140,1,1,68,0
73
+ 58,0,582,1,35,0,122000,0.9,139,1,1,71,0
74
+ 85,0,5882,0,35,0,243000,1,132,1,1,72,1
75
+ 65,0,224,1,50,0,149000,1.3,137,1,1,72,0
76
+ 69,0,582,0,20,0,266000,1.2,134,1,1,73,1
77
+ 60,1,47,0,20,0,204000,0.7,139,1,1,73,1
78
+ 70,0,92,0,60,1,317000,0.8,140,0,1,74,0
79
+ 42,0,102,1,40,0,237000,1.2,140,1,0,74,0
80
+ 75,1,203,1,38,1,283000,0.6,131,1,1,74,0
81
+ 55,0,336,0,45,1,324000,0.9,140,0,0,74,0
82
+ 70,0,69,0,40,0,293000,1.7,136,0,0,75,0
83
+ 67,0,582,0,50,0,263358.03,1.18,137,1,1,76,0
84
+ 60,1,76,1,25,0,196000,2.5,132,0,0,77,1
85
+ 79,1,55,0,50,1,172000,1.8,133,1,0,78,0
86
+ 59,1,280,1,25,1,302000,1,141,0,0,78,1
87
+ 51,0,78,0,50,0,406000,0.7,140,1,0,79,0
88
+ 55,0,47,0,35,1,173000,1.1,137,1,0,79,0
89
+ 65,1,68,1,60,1,304000,0.8,140,1,0,79,0
90
+ 44,0,84,1,40,1,235000,0.7,139,1,0,79,0
91
+ 57,1,115,0,25,1,181000,1.1,144,1,0,79,0
92
+ 70,0,66,1,45,0,249000,0.8,136,1,1,80,0
93
+ 60,0,897,1,45,0,297000,1,133,1,0,80,0
94
+ 42,0,582,0,60,0,263358.03,1.18,137,0,0,82,0
95
+ 60,1,154,0,25,0,210000,1.7,135,1,0,82,1
96
+ 58,0,144,1,38,1,327000,0.7,142,0,0,83,0
97
+ 58,1,133,0,60,1,219000,1,141,1,0,83,0
98
+ 63,1,514,1,25,1,254000,1.3,134,1,0,83,0
99
+ 70,1,59,0,60,0,255000,1.1,136,0,0,85,0
100
+ 60,1,156,1,25,1,318000,1.2,137,0,0,85,0
101
+ 63,1,61,1,40,0,221000,1.1,140,0,0,86,0
102
+ 65,1,305,0,25,0,298000,1.1,141,1,0,87,0
103
+ 75,0,582,0,45,1,263358.03,1.18,137,1,0,87,0
104
+ 80,0,898,0,25,0,149000,1.1,144,1,1,87,0
105
+ 42,0,5209,0,30,0,226000,1,140,1,1,87,0
106
+ 60,0,53,0,50,1,286000,2.3,143,0,0,87,0
107
+ 72,1,328,0,30,1,621000,1.7,138,0,1,88,1
108
+ 55,0,748,0,45,0,263000,1.3,137,1,0,88,0
109
+ 45,1,1876,1,35,0,226000,0.9,138,1,0,88,0
110
+ 63,0,936,0,38,0,304000,1.1,133,1,1,88,0
111
+ 45,0,292,1,35,0,850000,1.3,142,1,1,88,0
112
+ 85,0,129,0,60,0,306000,1.2,132,1,1,90,1
113
+ 55,0,60,0,35,0,228000,1.2,135,1,1,90,0
114
+ 50,0,369,1,25,0,252000,1.6,136,1,0,90,0
115
+ 70,1,143,0,60,0,351000,1.3,137,0,0,90,1
116
+ 60,1,754,1,40,1,328000,1.2,126,1,0,91,0
117
+ 58,1,400,0,40,0,164000,1,139,0,0,91,0
118
+ 60,1,96,1,60,1,271000,0.7,136,0,0,94,0
119
+ 85,1,102,0,60,0,507000,3.2,138,0,0,94,0
120
+ 65,1,113,1,60,1,203000,0.9,140,0,0,94,0
121
+ 86,0,582,0,38,0,263358.03,1.83,134,0,0,95,1
122
+ 60,1,737,0,60,1,210000,1.5,135,1,1,95,0
123
+ 66,1,68,1,38,1,162000,1,136,0,0,95,0
124
+ 60,0,96,1,38,0,228000,0.75,140,0,0,95,0
125
+ 60,1,582,0,30,1,127000,0.9,145,0,0,95,0
126
+ 60,0,582,0,40,0,217000,3.7,134,1,0,96,1
127
+ 43,1,358,0,50,0,237000,1.3,135,0,0,97,0
128
+ 46,0,168,1,17,1,271000,2.1,124,0,0,100,1
129
+ 58,1,200,1,60,0,300000,0.8,137,0,0,104,0
130
+ 61,0,248,0,30,1,267000,0.7,136,1,1,104,0
131
+ 53,1,270,1,35,0,227000,3.4,145,1,0,105,0
132
+ 53,1,1808,0,60,1,249000,0.7,138,1,1,106,0
133
+ 60,1,1082,1,45,0,250000,6.1,131,1,0,107,0
134
+ 46,0,719,0,40,1,263358.03,1.18,137,0,0,107,0
135
+ 63,0,193,0,60,1,295000,1.3,145,1,1,107,0
136
+ 81,0,4540,0,35,0,231000,1.18,137,1,1,107,0
137
+ 75,0,582,0,40,0,263358.03,1.18,137,1,0,107,0
138
+ 65,1,59,1,60,0,172000,0.9,137,0,0,107,0
139
+ 68,1,646,0,25,0,305000,2.1,130,1,0,108,0
140
+ 62,0,281,1,35,0,221000,1,136,0,0,108,0
141
+ 50,0,1548,0,30,1,211000,0.8,138,1,0,108,0
142
+ 80,0,805,0,38,0,263358.03,1.1,134,1,0,109,1
143
+ 46,1,291,0,35,0,348000,0.9,140,0,0,109,0
144
+ 50,0,482,1,30,0,329000,0.9,132,0,0,109,0
145
+ 61,1,84,0,40,1,229000,0.9,141,0,0,110,0
146
+ 72,1,943,0,25,1,338000,1.7,139,1,1,111,1
147
+ 50,0,185,0,30,0,266000,0.7,141,1,1,112,0
148
+ 52,0,132,0,30,0,218000,0.7,136,1,1,112,0
149
+ 64,0,1610,0,60,0,242000,1,137,1,0,113,0
150
+ 75,1,582,0,30,0,225000,1.83,134,1,0,113,1
151
+ 60,0,2261,0,35,1,228000,0.9,136,1,0,115,0
152
+ 72,0,233,0,45,1,235000,2.5,135,0,0,115,1
153
+ 62,0,30,1,60,1,244000,0.9,139,1,0,117,0
154
+ 50,0,115,0,45,1,184000,0.9,134,1,1,118,0
155
+ 50,0,1846,1,35,0,263358.03,1.18,137,1,1,119,0
156
+ 65,1,335,0,35,1,235000,0.8,136,0,0,120,0
157
+ 60,1,231,1,25,0,194000,1.7,140,1,0,120,0
158
+ 52,1,58,0,35,0,277000,1.4,136,0,0,120,0
159
+ 50,0,250,0,25,0,262000,1,136,1,1,120,0
160
+ 85,1,910,0,50,0,235000,1.3,134,1,0,121,0
161
+ 59,1,129,0,45,1,362000,1.1,139,1,1,121,0
162
+ 66,1,72,0,40,1,242000,1.2,134,1,0,121,0
163
+ 45,1,130,0,35,0,174000,0.8,139,1,1,121,0
164
+ 63,1,582,0,40,0,448000,0.9,137,1,1,123,0
165
+ 50,1,2334,1,35,0,75000,0.9,142,0,0,126,1
166
+ 45,0,2442,1,30,0,334000,1.1,139,1,0,129,1
167
+ 80,0,776,1,38,1,192000,1.3,135,0,0,130,1
168
+ 53,0,196,0,60,0,220000,0.7,133,1,1,134,0
169
+ 59,0,66,1,20,0,70000,2.4,134,1,0,135,1
170
+ 65,0,582,1,40,0,270000,1,138,0,0,140,0
171
+ 70,0,835,0,35,1,305000,0.8,133,0,0,145,0
172
+ 51,1,582,1,35,0,263358.03,1.5,136,1,1,145,0
173
+ 52,0,3966,0,40,0,325000,0.9,140,1,1,146,0
174
+ 70,1,171,0,60,1,176000,1.1,145,1,1,146,0
175
+ 50,1,115,0,20,0,189000,0.8,139,1,0,146,0
176
+ 65,0,198,1,35,1,281000,0.9,137,1,1,146,0
177
+ 60,1,95,0,60,0,337000,1,138,1,1,146,0
178
+ 69,0,1419,0,40,0,105000,1,135,1,1,147,0
179
+ 49,1,69,0,50,0,132000,1,140,0,0,147,0
180
+ 63,1,122,1,60,0,267000,1.2,145,1,0,147,0
181
+ 55,0,835,0,40,0,279000,0.7,140,1,1,147,0
182
+ 40,0,478,1,30,0,303000,0.9,136,1,0,148,0
183
+ 59,1,176,1,25,0,221000,1,136,1,1,150,1
184
+ 65,0,395,1,25,0,265000,1.2,136,1,1,154,1
185
+ 75,0,99,0,38,1,224000,2.5,134,1,0,162,1
186
+ 58,1,145,0,25,0,219000,1.2,137,1,1,170,1
187
+ 60.667,1,104,1,30,0,389000,1.5,136,1,0,171,1
188
+ 50,0,582,0,50,0,153000,0.6,134,0,0,172,1
189
+ 60,0,1896,1,25,0,365000,2.1,144,0,0,172,1
190
+ 60.667,1,151,1,40,1,201000,1,136,0,0,172,0
191
+ 40,0,244,0,45,1,275000,0.9,140,0,0,174,0
192
+ 80,0,582,1,35,0,350000,2.1,134,1,0,174,0
193
+ 64,1,62,0,60,0,309000,1.5,135,0,0,174,0
194
+ 50,1,121,1,40,0,260000,0.7,130,1,0,175,0
195
+ 73,1,231,1,30,0,160000,1.18,142,1,1,180,0
196
+ 45,0,582,0,20,1,126000,1.6,135,1,0,180,1
197
+ 77,1,418,0,45,0,223000,1.8,145,1,0,180,1
198
+ 45,0,582,1,38,1,263358.03,1.18,137,0,0,185,0
199
+ 65,0,167,0,30,0,259000,0.8,138,0,0,186,0
200
+ 50,1,582,1,20,1,279000,1,134,0,0,186,0
201
+ 60,0,1211,1,35,0,263358.03,1.8,113,1,1,186,0
202
+ 63,1,1767,0,45,0,73000,0.7,137,1,0,186,0
203
+ 45,0,308,1,60,1,377000,1,136,1,0,186,0
204
+ 70,0,97,0,60,1,220000,0.9,138,1,0,186,0
205
+ 60,0,59,0,25,1,212000,3.5,136,1,1,187,0
206
+ 78,1,64,0,40,0,277000,0.7,137,1,1,187,0
207
+ 50,1,167,1,45,0,362000,1,136,0,0,187,0
208
+ 40,1,101,0,40,0,226000,0.8,141,0,0,187,0
209
+ 85,0,212,0,38,0,186000,0.9,136,1,0,187,0
210
+ 60,1,2281,1,40,0,283000,1,141,0,0,187,0
211
+ 49,0,972,1,35,1,268000,0.8,130,0,0,187,0
212
+ 70,0,212,1,17,1,389000,1,136,1,1,188,0
213
+ 50,0,582,0,62,1,147000,0.8,140,1,1,192,0
214
+ 78,0,224,0,50,0,481000,1.4,138,1,1,192,0
215
+ 48,1,131,1,30,1,244000,1.6,130,0,0,193,1
216
+ 65,1,135,0,35,1,290000,0.8,134,1,0,194,0
217
+ 73,0,582,0,35,1,203000,1.3,134,1,0,195,0
218
+ 70,0,1202,0,50,1,358000,0.9,141,0,0,196,0
219
+ 54,1,427,0,70,1,151000,9,137,0,0,196,1
220
+ 68,1,1021,1,35,0,271000,1.1,134,1,0,197,0
221
+ 55,0,582,1,35,1,371000,0.7,140,0,0,197,0
222
+ 73,0,582,0,20,0,263358.03,1.83,134,1,0,198,1
223
+ 65,0,118,0,50,0,194000,1.1,145,1,1,200,0
224
+ 42,1,86,0,35,0,365000,1.1,139,1,1,201,0
225
+ 47,0,582,0,25,0,130000,0.8,134,1,0,201,0
226
+ 58,0,582,1,25,0,504000,1,138,1,0,205,0
227
+ 75,0,675,1,60,0,265000,1.4,125,0,0,205,0
228
+ 58,1,57,0,25,0,189000,1.3,132,1,1,205,0
229
+ 55,1,2794,0,35,1,141000,1,140,1,0,206,0
230
+ 65,0,56,0,25,0,237000,5,130,0,0,207,0
231
+ 72,0,211,0,25,0,274000,1.2,134,0,0,207,0
232
+ 60,0,166,0,30,0,62000,1.7,127,0,0,207,1
233
+ 70,0,93,0,35,0,185000,1.1,134,1,1,208,0
234
+ 40,1,129,0,35,0,255000,0.9,137,1,0,209,0
235
+ 53,1,707,0,38,0,330000,1.4,137,1,1,209,0
236
+ 53,1,582,0,45,0,305000,1.1,137,1,1,209,0
237
+ 77,1,109,0,50,1,406000,1.1,137,1,0,209,0
238
+ 75,0,119,0,50,1,248000,1.1,148,1,0,209,0
239
+ 70,0,232,0,30,0,173000,1.2,132,1,0,210,0
240
+ 65,1,720,1,40,0,257000,1,136,0,0,210,0
241
+ 55,1,180,0,45,0,263358.03,1.18,137,1,1,211,0
242
+ 70,0,81,1,35,1,533000,1.3,139,0,0,212,0
243
+ 65,0,582,1,30,0,249000,1.3,136,1,1,212,0
244
+ 40,0,90,0,35,0,255000,1.1,136,1,1,212,0
245
+ 73,1,1185,0,40,1,220000,0.9,141,0,0,213,0
246
+ 54,0,582,1,38,0,264000,1.8,134,1,0,213,0
247
+ 61,1,80,1,38,0,282000,1.4,137,1,0,213,0
248
+ 55,0,2017,0,25,0,314000,1.1,138,1,0,214,1
249
+ 64,0,143,0,25,0,246000,2.4,135,1,0,214,0
250
+ 40,0,624,0,35,0,301000,1,142,1,1,214,0
251
+ 53,0,207,1,40,0,223000,1.2,130,0,0,214,0
252
+ 50,0,2522,0,30,1,404000,0.5,139,0,0,214,0
253
+ 55,0,572,1,35,0,231000,0.8,143,0,0,215,0
254
+ 50,0,245,0,45,1,274000,1,133,1,0,215,0
255
+ 70,0,88,1,35,1,236000,1.2,132,0,0,215,0
256
+ 53,1,446,0,60,1,263358.03,1,139,1,0,215,0
257
+ 52,1,191,1,30,1,334000,1,142,1,1,216,0
258
+ 65,0,326,0,38,0,294000,1.7,139,0,0,220,0
259
+ 58,0,132,1,38,1,253000,1,139,1,0,230,0
260
+ 45,1,66,1,25,0,233000,0.8,135,1,0,230,0
261
+ 53,0,56,0,50,0,308000,0.7,135,1,1,231,0
262
+ 55,0,66,0,40,0,203000,1,138,1,0,233,0
263
+ 62,1,655,0,40,0,283000,0.7,133,0,0,233,0
264
+ 65,1,258,1,25,0,198000,1.4,129,1,0,235,1
265
+ 68,1,157,1,60,0,208000,1,140,0,0,237,0
266
+ 61,0,582,1,38,0,147000,1.2,141,1,0,237,0
267
+ 50,1,298,0,35,0,362000,0.9,140,1,1,240,0
268
+ 55,0,1199,0,20,0,263358.03,1.83,134,1,1,241,1
269
+ 56,1,135,1,38,0,133000,1.7,140,1,0,244,0
270
+ 45,0,582,1,38,0,302000,0.9,140,0,0,244,0
271
+ 40,0,582,1,35,0,222000,1,132,1,0,244,0
272
+ 44,0,582,1,30,1,263358.03,1.6,130,1,1,244,0
273
+ 51,0,582,1,40,0,221000,0.9,134,0,0,244,0
274
+ 67,0,213,0,38,0,215000,1.2,133,0,0,245,0
275
+ 42,0,64,0,40,0,189000,0.7,140,1,0,245,0
276
+ 60,1,257,1,30,0,150000,1,137,1,1,245,0
277
+ 45,0,582,0,38,1,422000,0.8,137,0,0,245,0
278
+ 70,0,618,0,35,0,327000,1.1,142,0,0,245,0
279
+ 70,0,582,1,38,0,25100,1.1,140,1,0,246,0
280
+ 50,1,1051,1,30,0,232000,0.7,136,0,0,246,0
281
+ 55,0,84,1,38,0,451000,1.3,136,0,0,246,0
282
+ 70,0,2695,1,40,0,241000,1,137,1,0,247,0
283
+ 70,0,582,0,40,0,51000,2.7,136,1,1,250,0
284
+ 42,0,64,0,30,0,215000,3.8,128,1,1,250,0
285
+ 65,0,1688,0,38,0,263358.03,1.1,138,1,1,250,0
286
+ 50,1,54,0,40,0,279000,0.8,141,1,0,250,0
287
+ 55,1,170,1,40,0,336000,1.2,135,1,0,250,0
288
+ 60,0,253,0,35,0,279000,1.7,140,1,0,250,0
289
+ 45,0,582,1,55,0,543000,1,132,0,0,250,0
290
+ 65,0,892,1,35,0,263358.03,1.1,142,0,0,256,0
291
+ 90,1,337,0,38,0,390000,0.9,144,0,0,256,0
292
+ 45,0,615,1,55,0,222000,0.8,141,0,0,257,0
293
+ 60,0,320,0,35,0,133000,1.4,139,1,0,258,0
294
+ 52,0,190,1,38,0,382000,1,140,1,1,258,0
295
+ 63,1,103,1,35,0,179000,0.9,136,1,1,270,0
296
+ 62,0,61,1,38,1,155000,1.1,143,1,1,270,0
297
+ 55,0,1820,0,38,0,270000,1.2,139,0,0,271,0
298
+ 45,0,2060,1,60,0,742000,0.8,138,0,0,278,0
299
+ 45,0,2413,0,38,0,140000,1.4,140,1,1,280,0
300
+ 50,0,196,0,45,0,395000,1.6,136,1,1,285,0
data_cache/depression_data.csv ADDED
The diff for this file is too large to render. See raw diff
 
data_cache/dermatology.csv ADDED
The diff for this file is too large to render. See raw diff
 
data_cache/endocrinology_diabetes.csv ADDED
@@ -0,0 +1,768 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 6,148,72,35,0,33.6,0.627,50,1
2
+ 1,85,66,29,0,26.6,0.351,31,0
3
+ 8,183,64,0,0,23.3,0.672,32,1
4
+ 1,89,66,23,94,28.1,0.167,21,0
5
+ 0,137,40,35,168,43.1,2.288,33,1
6
+ 5,116,74,0,0,25.6,0.201,30,0
7
+ 3,78,50,32,88,31.0,0.248,26,1
8
+ 10,115,0,0,0,35.3,0.134,29,0
9
+ 2,197,70,45,543,30.5,0.158,53,1
10
+ 8,125,96,0,0,0.0,0.232,54,1
11
+ 4,110,92,0,0,37.6,0.191,30,0
12
+ 10,168,74,0,0,38.0,0.537,34,1
13
+ 10,139,80,0,0,27.1,1.441,57,0
14
+ 1,189,60,23,846,30.1,0.398,59,1
15
+ 5,166,72,19,175,25.8,0.587,51,1
16
+ 7,100,0,0,0,30.0,0.484,32,1
17
+ 0,118,84,47,230,45.8,0.551,31,1
18
+ 7,107,74,0,0,29.6,0.254,31,1
19
+ 1,103,30,38,83,43.3,0.183,33,0
20
+ 1,115,70,30,96,34.6,0.529,32,1
21
+ 3,126,88,41,235,39.3,0.704,27,0
22
+ 8,99,84,0,0,35.4,0.388,50,0
23
+ 7,196,90,0,0,39.8,0.451,41,1
24
+ 9,119,80,35,0,29.0,0.263,29,1
25
+ 11,143,94,33,146,36.6,0.254,51,1
26
+ 10,125,70,26,115,31.1,0.205,41,1
27
+ 7,147,76,0,0,39.4,0.257,43,1
28
+ 1,97,66,15,140,23.2,0.487,22,0
29
+ 13,145,82,19,110,22.2,0.245,57,0
30
+ 5,117,92,0,0,34.1,0.337,38,0
31
+ 5,109,75,26,0,36.0,0.546,60,0
32
+ 3,158,76,36,245,31.6,0.851,28,1
33
+ 3,88,58,11,54,24.8,0.267,22,0
34
+ 6,92,92,0,0,19.9,0.188,28,0
35
+ 10,122,78,31,0,27.6,0.512,45,0
36
+ 4,103,60,33,192,24.0,0.966,33,0
37
+ 11,138,76,0,0,33.2,0.420,35,0
38
+ 9,102,76,37,0,32.9,0.665,46,1
39
+ 2,90,68,42,0,38.2,0.503,27,1
40
+ 4,111,72,47,207,37.1,1.390,56,1
41
+ 3,180,64,25,70,34.0,0.271,26,0
42
+ 7,133,84,0,0,40.2,0.696,37,0
43
+ 7,106,92,18,0,22.7,0.235,48,0
44
+ 9,171,110,24,240,45.4,0.721,54,1
45
+ 7,159,64,0,0,27.4,0.294,40,0
46
+ 0,180,66,39,0,42.0,1.893,25,1
47
+ 1,146,56,0,0,29.7,0.564,29,0
48
+ 2,71,70,27,0,28.0,0.586,22,0
49
+ 7,103,66,32,0,39.1,0.344,31,1
50
+ 7,105,0,0,0,0.0,0.305,24,0
51
+ 1,103,80,11,82,19.4,0.491,22,0
52
+ 1,101,50,15,36,24.2,0.526,26,0
53
+ 5,88,66,21,23,24.4,0.342,30,0
54
+ 8,176,90,34,300,33.7,0.467,58,1
55
+ 7,150,66,42,342,34.7,0.718,42,0
56
+ 1,73,50,10,0,23.0,0.248,21,0
57
+ 7,187,68,39,304,37.7,0.254,41,1
58
+ 0,100,88,60,110,46.8,0.962,31,0
59
+ 0,146,82,0,0,40.5,1.781,44,0
60
+ 0,105,64,41,142,41.5,0.173,22,0
61
+ 2,84,0,0,0,0.0,0.304,21,0
62
+ 8,133,72,0,0,32.9,0.270,39,1
63
+ 5,44,62,0,0,25.0,0.587,36,0
64
+ 2,141,58,34,128,25.4,0.699,24,0
65
+ 7,114,66,0,0,32.8,0.258,42,1
66
+ 5,99,74,27,0,29.0,0.203,32,0
67
+ 0,109,88,30,0,32.5,0.855,38,1
68
+ 2,109,92,0,0,42.7,0.845,54,0
69
+ 1,95,66,13,38,19.6,0.334,25,0
70
+ 4,146,85,27,100,28.9,0.189,27,0
71
+ 2,100,66,20,90,32.9,0.867,28,1
72
+ 5,139,64,35,140,28.6,0.411,26,0
73
+ 13,126,90,0,0,43.4,0.583,42,1
74
+ 4,129,86,20,270,35.1,0.231,23,0
75
+ 1,79,75,30,0,32.0,0.396,22,0
76
+ 1,0,48,20,0,24.7,0.140,22,0
77
+ 7,62,78,0,0,32.6,0.391,41,0
78
+ 5,95,72,33,0,37.7,0.370,27,0
79
+ 0,131,0,0,0,43.2,0.270,26,1
80
+ 2,112,66,22,0,25.0,0.307,24,0
81
+ 3,113,44,13,0,22.4,0.140,22,0
82
+ 2,74,0,0,0,0.0,0.102,22,0
83
+ 7,83,78,26,71,29.3,0.767,36,0
84
+ 0,101,65,28,0,24.6,0.237,22,0
85
+ 5,137,108,0,0,48.8,0.227,37,1
86
+ 2,110,74,29,125,32.4,0.698,27,0
87
+ 13,106,72,54,0,36.6,0.178,45,0
88
+ 2,100,68,25,71,38.5,0.324,26,0
89
+ 15,136,70,32,110,37.1,0.153,43,1
90
+ 1,107,68,19,0,26.5,0.165,24,0
91
+ 1,80,55,0,0,19.1,0.258,21,0
92
+ 4,123,80,15,176,32.0,0.443,34,0
93
+ 7,81,78,40,48,46.7,0.261,42,0
94
+ 4,134,72,0,0,23.8,0.277,60,1
95
+ 2,142,82,18,64,24.7,0.761,21,0
96
+ 6,144,72,27,228,33.9,0.255,40,0
97
+ 2,92,62,28,0,31.6,0.130,24,0
98
+ 1,71,48,18,76,20.4,0.323,22,0
99
+ 6,93,50,30,64,28.7,0.356,23,0
100
+ 1,122,90,51,220,49.7,0.325,31,1
101
+ 1,163,72,0,0,39.0,1.222,33,1
102
+ 1,151,60,0,0,26.1,0.179,22,0
103
+ 0,125,96,0,0,22.5,0.262,21,0
104
+ 1,81,72,18,40,26.6,0.283,24,0
105
+ 2,85,65,0,0,39.6,0.930,27,0
106
+ 1,126,56,29,152,28.7,0.801,21,0
107
+ 1,96,122,0,0,22.4,0.207,27,0
108
+ 4,144,58,28,140,29.5,0.287,37,0
109
+ 3,83,58,31,18,34.3,0.336,25,0
110
+ 0,95,85,25,36,37.4,0.247,24,1
111
+ 3,171,72,33,135,33.3,0.199,24,1
112
+ 8,155,62,26,495,34.0,0.543,46,1
113
+ 1,89,76,34,37,31.2,0.192,23,0
114
+ 4,76,62,0,0,34.0,0.391,25,0
115
+ 7,160,54,32,175,30.5,0.588,39,1
116
+ 4,146,92,0,0,31.2,0.539,61,1
117
+ 5,124,74,0,0,34.0,0.220,38,1
118
+ 5,78,48,0,0,33.7,0.654,25,0
119
+ 4,97,60,23,0,28.2,0.443,22,0
120
+ 4,99,76,15,51,23.2,0.223,21,0
121
+ 0,162,76,56,100,53.2,0.759,25,1
122
+ 6,111,64,39,0,34.2,0.260,24,0
123
+ 2,107,74,30,100,33.6,0.404,23,0
124
+ 5,132,80,0,0,26.8,0.186,69,0
125
+ 0,113,76,0,0,33.3,0.278,23,1
126
+ 1,88,30,42,99,55.0,0.496,26,1
127
+ 3,120,70,30,135,42.9,0.452,30,0
128
+ 1,118,58,36,94,33.3,0.261,23,0
129
+ 1,117,88,24,145,34.5,0.403,40,1
130
+ 0,105,84,0,0,27.9,0.741,62,1
131
+ 4,173,70,14,168,29.7,0.361,33,1
132
+ 9,122,56,0,0,33.3,1.114,33,1
133
+ 3,170,64,37,225,34.5,0.356,30,1
134
+ 8,84,74,31,0,38.3,0.457,39,0
135
+ 2,96,68,13,49,21.1,0.647,26,0
136
+ 2,125,60,20,140,33.8,0.088,31,0
137
+ 0,100,70,26,50,30.8,0.597,21,0
138
+ 0,93,60,25,92,28.7,0.532,22,0
139
+ 0,129,80,0,0,31.2,0.703,29,0
140
+ 5,105,72,29,325,36.9,0.159,28,0
141
+ 3,128,78,0,0,21.1,0.268,55,0
142
+ 5,106,82,30,0,39.5,0.286,38,0
143
+ 2,108,52,26,63,32.5,0.318,22,0
144
+ 10,108,66,0,0,32.4,0.272,42,1
145
+ 4,154,62,31,284,32.8,0.237,23,0
146
+ 0,102,75,23,0,0.0,0.572,21,0
147
+ 9,57,80,37,0,32.8,0.096,41,0
148
+ 2,106,64,35,119,30.5,1.400,34,0
149
+ 5,147,78,0,0,33.7,0.218,65,0
150
+ 2,90,70,17,0,27.3,0.085,22,0
151
+ 1,136,74,50,204,37.4,0.399,24,0
152
+ 4,114,65,0,0,21.9,0.432,37,0
153
+ 9,156,86,28,155,34.3,1.189,42,1
154
+ 1,153,82,42,485,40.6,0.687,23,0
155
+ 8,188,78,0,0,47.9,0.137,43,1
156
+ 7,152,88,44,0,50.0,0.337,36,1
157
+ 2,99,52,15,94,24.6,0.637,21,0
158
+ 1,109,56,21,135,25.2,0.833,23,0
159
+ 2,88,74,19,53,29.0,0.229,22,0
160
+ 17,163,72,41,114,40.9,0.817,47,1
161
+ 4,151,90,38,0,29.7,0.294,36,0
162
+ 7,102,74,40,105,37.2,0.204,45,0
163
+ 0,114,80,34,285,44.2,0.167,27,0
164
+ 2,100,64,23,0,29.7,0.368,21,0
165
+ 0,131,88,0,0,31.6,0.743,32,1
166
+ 6,104,74,18,156,29.9,0.722,41,1
167
+ 3,148,66,25,0,32.5,0.256,22,0
168
+ 4,120,68,0,0,29.6,0.709,34,0
169
+ 4,110,66,0,0,31.9,0.471,29,0
170
+ 3,111,90,12,78,28.4,0.495,29,0
171
+ 6,102,82,0,0,30.8,0.180,36,1
172
+ 6,134,70,23,130,35.4,0.542,29,1
173
+ 2,87,0,23,0,28.9,0.773,25,0
174
+ 1,79,60,42,48,43.5,0.678,23,0
175
+ 2,75,64,24,55,29.7,0.370,33,0
176
+ 8,179,72,42,130,32.7,0.719,36,1
177
+ 6,85,78,0,0,31.2,0.382,42,0
178
+ 0,129,110,46,130,67.1,0.319,26,1
179
+ 5,143,78,0,0,45.0,0.190,47,0
180
+ 5,130,82,0,0,39.1,0.956,37,1
181
+ 6,87,80,0,0,23.2,0.084,32,0
182
+ 0,119,64,18,92,34.9,0.725,23,0
183
+ 1,0,74,20,23,27.7,0.299,21,0
184
+ 5,73,60,0,0,26.8,0.268,27,0
185
+ 4,141,74,0,0,27.6,0.244,40,0
186
+ 7,194,68,28,0,35.9,0.745,41,1
187
+ 8,181,68,36,495,30.1,0.615,60,1
188
+ 1,128,98,41,58,32.0,1.321,33,1
189
+ 8,109,76,39,114,27.9,0.640,31,1
190
+ 5,139,80,35,160,31.6,0.361,25,1
191
+ 3,111,62,0,0,22.6,0.142,21,0
192
+ 9,123,70,44,94,33.1,0.374,40,0
193
+ 7,159,66,0,0,30.4,0.383,36,1
194
+ 11,135,0,0,0,52.3,0.578,40,1
195
+ 8,85,55,20,0,24.4,0.136,42,0
196
+ 5,158,84,41,210,39.4,0.395,29,1
197
+ 1,105,58,0,0,24.3,0.187,21,0
198
+ 3,107,62,13,48,22.9,0.678,23,1
199
+ 4,109,64,44,99,34.8,0.905,26,1
200
+ 4,148,60,27,318,30.9,0.150,29,1
201
+ 0,113,80,16,0,31.0,0.874,21,0
202
+ 1,138,82,0,0,40.1,0.236,28,0
203
+ 0,108,68,20,0,27.3,0.787,32,0
204
+ 2,99,70,16,44,20.4,0.235,27,0
205
+ 6,103,72,32,190,37.7,0.324,55,0
206
+ 5,111,72,28,0,23.9,0.407,27,0
207
+ 8,196,76,29,280,37.5,0.605,57,1
208
+ 5,162,104,0,0,37.7,0.151,52,1
209
+ 1,96,64,27,87,33.2,0.289,21,0
210
+ 7,184,84,33,0,35.5,0.355,41,1
211
+ 2,81,60,22,0,27.7,0.290,25,0
212
+ 0,147,85,54,0,42.8,0.375,24,0
213
+ 7,179,95,31,0,34.2,0.164,60,0
214
+ 0,140,65,26,130,42.6,0.431,24,1
215
+ 9,112,82,32,175,34.2,0.260,36,1
216
+ 12,151,70,40,271,41.8,0.742,38,1
217
+ 5,109,62,41,129,35.8,0.514,25,1
218
+ 6,125,68,30,120,30.0,0.464,32,0
219
+ 5,85,74,22,0,29.0,1.224,32,1
220
+ 5,112,66,0,0,37.8,0.261,41,1
221
+ 0,177,60,29,478,34.6,1.072,21,1
222
+ 2,158,90,0,0,31.6,0.805,66,1
223
+ 7,119,0,0,0,25.2,0.209,37,0
224
+ 7,142,60,33,190,28.8,0.687,61,0
225
+ 1,100,66,15,56,23.6,0.666,26,0
226
+ 1,87,78,27,32,34.6,0.101,22,0
227
+ 0,101,76,0,0,35.7,0.198,26,0
228
+ 3,162,52,38,0,37.2,0.652,24,1
229
+ 4,197,70,39,744,36.7,2.329,31,0
230
+ 0,117,80,31,53,45.2,0.089,24,0
231
+ 4,142,86,0,0,44.0,0.645,22,1
232
+ 6,134,80,37,370,46.2,0.238,46,1
233
+ 1,79,80,25,37,25.4,0.583,22,0
234
+ 4,122,68,0,0,35.0,0.394,29,0
235
+ 3,74,68,28,45,29.7,0.293,23,0
236
+ 4,171,72,0,0,43.6,0.479,26,1
237
+ 7,181,84,21,192,35.9,0.586,51,1
238
+ 0,179,90,27,0,44.1,0.686,23,1
239
+ 9,164,84,21,0,30.8,0.831,32,1
240
+ 0,104,76,0,0,18.4,0.582,27,0
241
+ 1,91,64,24,0,29.2,0.192,21,0
242
+ 4,91,70,32,88,33.1,0.446,22,0
243
+ 3,139,54,0,0,25.6,0.402,22,1
244
+ 6,119,50,22,176,27.1,1.318,33,1
245
+ 2,146,76,35,194,38.2,0.329,29,0
246
+ 9,184,85,15,0,30.0,1.213,49,1
247
+ 10,122,68,0,0,31.2,0.258,41,0
248
+ 0,165,90,33,680,52.3,0.427,23,0
249
+ 9,124,70,33,402,35.4,0.282,34,0
250
+ 1,111,86,19,0,30.1,0.143,23,0
251
+ 9,106,52,0,0,31.2,0.380,42,0
252
+ 2,129,84,0,0,28.0,0.284,27,0
253
+ 2,90,80,14,55,24.4,0.249,24,0
254
+ 0,86,68,32,0,35.8,0.238,25,0
255
+ 12,92,62,7,258,27.6,0.926,44,1
256
+ 1,113,64,35,0,33.6,0.543,21,1
257
+ 3,111,56,39,0,30.1,0.557,30,0
258
+ 2,114,68,22,0,28.7,0.092,25,0
259
+ 1,193,50,16,375,25.9,0.655,24,0
260
+ 11,155,76,28,150,33.3,1.353,51,1
261
+ 3,191,68,15,130,30.9,0.299,34,0
262
+ 3,141,0,0,0,30.0,0.761,27,1
263
+ 4,95,70,32,0,32.1,0.612,24,0
264
+ 3,142,80,15,0,32.4,0.200,63,0
265
+ 4,123,62,0,0,32.0,0.226,35,1
266
+ 5,96,74,18,67,33.6,0.997,43,0
267
+ 0,138,0,0,0,36.3,0.933,25,1
268
+ 2,128,64,42,0,40.0,1.101,24,0
269
+ 0,102,52,0,0,25.1,0.078,21,0
270
+ 2,146,0,0,0,27.5,0.240,28,1
271
+ 10,101,86,37,0,45.6,1.136,38,1
272
+ 2,108,62,32,56,25.2,0.128,21,0
273
+ 3,122,78,0,0,23.0,0.254,40,0
274
+ 1,71,78,50,45,33.2,0.422,21,0
275
+ 13,106,70,0,0,34.2,0.251,52,0
276
+ 2,100,70,52,57,40.5,0.677,25,0
277
+ 7,106,60,24,0,26.5,0.296,29,1
278
+ 0,104,64,23,116,27.8,0.454,23,0
279
+ 5,114,74,0,0,24.9,0.744,57,0
280
+ 2,108,62,10,278,25.3,0.881,22,0
281
+ 0,146,70,0,0,37.9,0.334,28,1
282
+ 10,129,76,28,122,35.9,0.280,39,0
283
+ 7,133,88,15,155,32.4,0.262,37,0
284
+ 7,161,86,0,0,30.4,0.165,47,1
285
+ 2,108,80,0,0,27.0,0.259,52,1
286
+ 7,136,74,26,135,26.0,0.647,51,0
287
+ 5,155,84,44,545,38.7,0.619,34,0
288
+ 1,119,86,39,220,45.6,0.808,29,1
289
+ 4,96,56,17,49,20.8,0.340,26,0
290
+ 5,108,72,43,75,36.1,0.263,33,0
291
+ 0,78,88,29,40,36.9,0.434,21,0
292
+ 0,107,62,30,74,36.6,0.757,25,1
293
+ 2,128,78,37,182,43.3,1.224,31,1
294
+ 1,128,48,45,194,40.5,0.613,24,1
295
+ 0,161,50,0,0,21.9,0.254,65,0
296
+ 6,151,62,31,120,35.5,0.692,28,0
297
+ 2,146,70,38,360,28.0,0.337,29,1
298
+ 0,126,84,29,215,30.7,0.520,24,0
299
+ 14,100,78,25,184,36.6,0.412,46,1
300
+ 8,112,72,0,0,23.6,0.840,58,0
301
+ 0,167,0,0,0,32.3,0.839,30,1
302
+ 2,144,58,33,135,31.6,0.422,25,1
303
+ 5,77,82,41,42,35.8,0.156,35,0
304
+ 5,115,98,0,0,52.9,0.209,28,1
305
+ 3,150,76,0,0,21.0,0.207,37,0
306
+ 2,120,76,37,105,39.7,0.215,29,0
307
+ 10,161,68,23,132,25.5,0.326,47,1
308
+ 0,137,68,14,148,24.8,0.143,21,0
309
+ 0,128,68,19,180,30.5,1.391,25,1
310
+ 2,124,68,28,205,32.9,0.875,30,1
311
+ 6,80,66,30,0,26.2,0.313,41,0
312
+ 0,106,70,37,148,39.4,0.605,22,0
313
+ 2,155,74,17,96,26.6,0.433,27,1
314
+ 3,113,50,10,85,29.5,0.626,25,0
315
+ 7,109,80,31,0,35.9,1.127,43,1
316
+ 2,112,68,22,94,34.1,0.315,26,0
317
+ 3,99,80,11,64,19.3,0.284,30,0
318
+ 3,182,74,0,0,30.5,0.345,29,1
319
+ 3,115,66,39,140,38.1,0.150,28,0
320
+ 6,194,78,0,0,23.5,0.129,59,1
321
+ 4,129,60,12,231,27.5,0.527,31,0
322
+ 3,112,74,30,0,31.6,0.197,25,1
323
+ 0,124,70,20,0,27.4,0.254,36,1
324
+ 13,152,90,33,29,26.8,0.731,43,1
325
+ 2,112,75,32,0,35.7,0.148,21,0
326
+ 1,157,72,21,168,25.6,0.123,24,0
327
+ 1,122,64,32,156,35.1,0.692,30,1
328
+ 10,179,70,0,0,35.1,0.200,37,0
329
+ 2,102,86,36,120,45.5,0.127,23,1
330
+ 6,105,70,32,68,30.8,0.122,37,0
331
+ 8,118,72,19,0,23.1,1.476,46,0
332
+ 2,87,58,16,52,32.7,0.166,25,0
333
+ 1,180,0,0,0,43.3,0.282,41,1
334
+ 12,106,80,0,0,23.6,0.137,44,0
335
+ 1,95,60,18,58,23.9,0.260,22,0
336
+ 0,165,76,43,255,47.9,0.259,26,0
337
+ 0,117,0,0,0,33.8,0.932,44,0
338
+ 5,115,76,0,0,31.2,0.343,44,1
339
+ 9,152,78,34,171,34.2,0.893,33,1
340
+ 7,178,84,0,0,39.9,0.331,41,1
341
+ 1,130,70,13,105,25.9,0.472,22,0
342
+ 1,95,74,21,73,25.9,0.673,36,0
343
+ 1,0,68,35,0,32.0,0.389,22,0
344
+ 5,122,86,0,0,34.7,0.290,33,0
345
+ 8,95,72,0,0,36.8,0.485,57,0
346
+ 8,126,88,36,108,38.5,0.349,49,0
347
+ 1,139,46,19,83,28.7,0.654,22,0
348
+ 3,116,0,0,0,23.5,0.187,23,0
349
+ 3,99,62,19,74,21.8,0.279,26,0
350
+ 5,0,80,32,0,41.0,0.346,37,1
351
+ 4,92,80,0,0,42.2,0.237,29,0
352
+ 4,137,84,0,0,31.2,0.252,30,0
353
+ 3,61,82,28,0,34.4,0.243,46,0
354
+ 1,90,62,12,43,27.2,0.580,24,0
355
+ 3,90,78,0,0,42.7,0.559,21,0
356
+ 9,165,88,0,0,30.4,0.302,49,1
357
+ 1,125,50,40,167,33.3,0.962,28,1
358
+ 13,129,0,30,0,39.9,0.569,44,1
359
+ 12,88,74,40,54,35.3,0.378,48,0
360
+ 1,196,76,36,249,36.5,0.875,29,1
361
+ 5,189,64,33,325,31.2,0.583,29,1
362
+ 5,158,70,0,0,29.8,0.207,63,0
363
+ 5,103,108,37,0,39.2,0.305,65,0
364
+ 4,146,78,0,0,38.5,0.520,67,1
365
+ 4,147,74,25,293,34.9,0.385,30,0
366
+ 5,99,54,28,83,34.0,0.499,30,0
367
+ 6,124,72,0,0,27.6,0.368,29,1
368
+ 0,101,64,17,0,21.0,0.252,21,0
369
+ 3,81,86,16,66,27.5,0.306,22,0
370
+ 1,133,102,28,140,32.8,0.234,45,1
371
+ 3,173,82,48,465,38.4,2.137,25,1
372
+ 0,118,64,23,89,0.0,1.731,21,0
373
+ 0,84,64,22,66,35.8,0.545,21,0
374
+ 2,105,58,40,94,34.9,0.225,25,0
375
+ 2,122,52,43,158,36.2,0.816,28,0
376
+ 12,140,82,43,325,39.2,0.528,58,1
377
+ 0,98,82,15,84,25.2,0.299,22,0
378
+ 1,87,60,37,75,37.2,0.509,22,0
379
+ 4,156,75,0,0,48.3,0.238,32,1
380
+ 0,93,100,39,72,43.4,1.021,35,0
381
+ 1,107,72,30,82,30.8,0.821,24,0
382
+ 0,105,68,22,0,20.0,0.236,22,0
383
+ 1,109,60,8,182,25.4,0.947,21,0
384
+ 1,90,62,18,59,25.1,1.268,25,0
385
+ 1,125,70,24,110,24.3,0.221,25,0
386
+ 1,119,54,13,50,22.3,0.205,24,0
387
+ 5,116,74,29,0,32.3,0.660,35,1
388
+ 8,105,100,36,0,43.3,0.239,45,1
389
+ 5,144,82,26,285,32.0,0.452,58,1
390
+ 3,100,68,23,81,31.6,0.949,28,0
391
+ 1,100,66,29,196,32.0,0.444,42,0
392
+ 5,166,76,0,0,45.7,0.340,27,1
393
+ 1,131,64,14,415,23.7,0.389,21,0
394
+ 4,116,72,12,87,22.1,0.463,37,0
395
+ 4,158,78,0,0,32.9,0.803,31,1
396
+ 2,127,58,24,275,27.7,1.600,25,0
397
+ 3,96,56,34,115,24.7,0.944,39,0
398
+ 0,131,66,40,0,34.3,0.196,22,1
399
+ 3,82,70,0,0,21.1,0.389,25,0
400
+ 3,193,70,31,0,34.9,0.241,25,1
401
+ 4,95,64,0,0,32.0,0.161,31,1
402
+ 6,137,61,0,0,24.2,0.151,55,0
403
+ 5,136,84,41,88,35.0,0.286,35,1
404
+ 9,72,78,25,0,31.6,0.280,38,0
405
+ 5,168,64,0,0,32.9,0.135,41,1
406
+ 2,123,48,32,165,42.1,0.520,26,0
407
+ 4,115,72,0,0,28.9,0.376,46,1
408
+ 0,101,62,0,0,21.9,0.336,25,0
409
+ 8,197,74,0,0,25.9,1.191,39,1
410
+ 1,172,68,49,579,42.4,0.702,28,1
411
+ 6,102,90,39,0,35.7,0.674,28,0
412
+ 1,112,72,30,176,34.4,0.528,25,0
413
+ 1,143,84,23,310,42.4,1.076,22,0
414
+ 1,143,74,22,61,26.2,0.256,21,0
415
+ 0,138,60,35,167,34.6,0.534,21,1
416
+ 3,173,84,33,474,35.7,0.258,22,1
417
+ 1,97,68,21,0,27.2,1.095,22,0
418
+ 4,144,82,32,0,38.5,0.554,37,1
419
+ 1,83,68,0,0,18.2,0.624,27,0
420
+ 3,129,64,29,115,26.4,0.219,28,1
421
+ 1,119,88,41,170,45.3,0.507,26,0
422
+ 2,94,68,18,76,26.0,0.561,21,0
423
+ 0,102,64,46,78,40.6,0.496,21,0
424
+ 2,115,64,22,0,30.8,0.421,21,0
425
+ 8,151,78,32,210,42.9,0.516,36,1
426
+ 4,184,78,39,277,37.0,0.264,31,1
427
+ 0,94,0,0,0,0.0,0.256,25,0
428
+ 1,181,64,30,180,34.1,0.328,38,1
429
+ 0,135,94,46,145,40.6,0.284,26,0
430
+ 1,95,82,25,180,35.0,0.233,43,1
431
+ 2,99,0,0,0,22.2,0.108,23,0
432
+ 3,89,74,16,85,30.4,0.551,38,0
433
+ 1,80,74,11,60,30.0,0.527,22,0
434
+ 2,139,75,0,0,25.6,0.167,29,0
435
+ 1,90,68,8,0,24.5,1.138,36,0
436
+ 0,141,0,0,0,42.4,0.205,29,1
437
+ 12,140,85,33,0,37.4,0.244,41,0
438
+ 5,147,75,0,0,29.9,0.434,28,0
439
+ 1,97,70,15,0,18.2,0.147,21,0
440
+ 6,107,88,0,0,36.8,0.727,31,0
441
+ 0,189,104,25,0,34.3,0.435,41,1
442
+ 2,83,66,23,50,32.2,0.497,22,0
443
+ 4,117,64,27,120,33.2,0.230,24,0
444
+ 8,108,70,0,0,30.5,0.955,33,1
445
+ 4,117,62,12,0,29.7,0.380,30,1
446
+ 0,180,78,63,14,59.4,2.420,25,1
447
+ 1,100,72,12,70,25.3,0.658,28,0
448
+ 0,95,80,45,92,36.5,0.330,26,0
449
+ 0,104,64,37,64,33.6,0.510,22,1
450
+ 0,120,74,18,63,30.5,0.285,26,0
451
+ 1,82,64,13,95,21.2,0.415,23,0
452
+ 2,134,70,0,0,28.9,0.542,23,1
453
+ 0,91,68,32,210,39.9,0.381,25,0
454
+ 2,119,0,0,0,19.6,0.832,72,0
455
+ 2,100,54,28,105,37.8,0.498,24,0
456
+ 14,175,62,30,0,33.6,0.212,38,1
457
+ 1,135,54,0,0,26.7,0.687,62,0
458
+ 5,86,68,28,71,30.2,0.364,24,0
459
+ 10,148,84,48,237,37.6,1.001,51,1
460
+ 9,134,74,33,60,25.9,0.460,81,0
461
+ 9,120,72,22,56,20.8,0.733,48,0
462
+ 1,71,62,0,0,21.8,0.416,26,0
463
+ 8,74,70,40,49,35.3,0.705,39,0
464
+ 5,88,78,30,0,27.6,0.258,37,0
465
+ 10,115,98,0,0,24.0,1.022,34,0
466
+ 0,124,56,13,105,21.8,0.452,21,0
467
+ 0,74,52,10,36,27.8,0.269,22,0
468
+ 0,97,64,36,100,36.8,0.600,25,0
469
+ 8,120,0,0,0,30.0,0.183,38,1
470
+ 6,154,78,41,140,46.1,0.571,27,0
471
+ 1,144,82,40,0,41.3,0.607,28,0
472
+ 0,137,70,38,0,33.2,0.170,22,0
473
+ 0,119,66,27,0,38.8,0.259,22,0
474
+ 7,136,90,0,0,29.9,0.210,50,0
475
+ 4,114,64,0,0,28.9,0.126,24,0
476
+ 0,137,84,27,0,27.3,0.231,59,0
477
+ 2,105,80,45,191,33.7,0.711,29,1
478
+ 7,114,76,17,110,23.8,0.466,31,0
479
+ 8,126,74,38,75,25.9,0.162,39,0
480
+ 4,132,86,31,0,28.0,0.419,63,0
481
+ 3,158,70,30,328,35.5,0.344,35,1
482
+ 0,123,88,37,0,35.2,0.197,29,0
483
+ 4,85,58,22,49,27.8,0.306,28,0
484
+ 0,84,82,31,125,38.2,0.233,23,0
485
+ 0,145,0,0,0,44.2,0.630,31,1
486
+ 0,135,68,42,250,42.3,0.365,24,1
487
+ 1,139,62,41,480,40.7,0.536,21,0
488
+ 0,173,78,32,265,46.5,1.159,58,0
489
+ 4,99,72,17,0,25.6,0.294,28,0
490
+ 8,194,80,0,0,26.1,0.551,67,0
491
+ 2,83,65,28,66,36.8,0.629,24,0
492
+ 2,89,90,30,0,33.5,0.292,42,0
493
+ 4,99,68,38,0,32.8,0.145,33,0
494
+ 4,125,70,18,122,28.9,1.144,45,1
495
+ 3,80,0,0,0,0.0,0.174,22,0
496
+ 6,166,74,0,0,26.6,0.304,66,0
497
+ 5,110,68,0,0,26.0,0.292,30,0
498
+ 2,81,72,15,76,30.1,0.547,25,0
499
+ 7,195,70,33,145,25.1,0.163,55,1
500
+ 6,154,74,32,193,29.3,0.839,39,0
501
+ 2,117,90,19,71,25.2,0.313,21,0
502
+ 3,84,72,32,0,37.2,0.267,28,0
503
+ 6,0,68,41,0,39.0,0.727,41,1
504
+ 7,94,64,25,79,33.3,0.738,41,0
505
+ 3,96,78,39,0,37.3,0.238,40,0
506
+ 10,75,82,0,0,33.3,0.263,38,0
507
+ 0,180,90,26,90,36.5,0.314,35,1
508
+ 1,130,60,23,170,28.6,0.692,21,0
509
+ 2,84,50,23,76,30.4,0.968,21,0
510
+ 8,120,78,0,0,25.0,0.409,64,0
511
+ 12,84,72,31,0,29.7,0.297,46,1
512
+ 0,139,62,17,210,22.1,0.207,21,0
513
+ 9,91,68,0,0,24.2,0.200,58,0
514
+ 2,91,62,0,0,27.3,0.525,22,0
515
+ 3,99,54,19,86,25.6,0.154,24,0
516
+ 3,163,70,18,105,31.6,0.268,28,1
517
+ 9,145,88,34,165,30.3,0.771,53,1
518
+ 7,125,86,0,0,37.6,0.304,51,0
519
+ 13,76,60,0,0,32.8,0.180,41,0
520
+ 6,129,90,7,326,19.6,0.582,60,0
521
+ 2,68,70,32,66,25.0,0.187,25,0
522
+ 3,124,80,33,130,33.2,0.305,26,0
523
+ 6,114,0,0,0,0.0,0.189,26,0
524
+ 9,130,70,0,0,34.2,0.652,45,1
525
+ 3,125,58,0,0,31.6,0.151,24,0
526
+ 3,87,60,18,0,21.8,0.444,21,0
527
+ 1,97,64,19,82,18.2,0.299,21,0
528
+ 3,116,74,15,105,26.3,0.107,24,0
529
+ 0,117,66,31,188,30.8,0.493,22,0
530
+ 0,111,65,0,0,24.6,0.660,31,0
531
+ 2,122,60,18,106,29.8,0.717,22,0
532
+ 0,107,76,0,0,45.3,0.686,24,0
533
+ 1,86,66,52,65,41.3,0.917,29,0
534
+ 6,91,0,0,0,29.8,0.501,31,0
535
+ 1,77,56,30,56,33.3,1.251,24,0
536
+ 4,132,0,0,0,32.9,0.302,23,1
537
+ 0,105,90,0,0,29.6,0.197,46,0
538
+ 0,57,60,0,0,21.7,0.735,67,0
539
+ 0,127,80,37,210,36.3,0.804,23,0
540
+ 3,129,92,49,155,36.4,0.968,32,1
541
+ 8,100,74,40,215,39.4,0.661,43,1
542
+ 3,128,72,25,190,32.4,0.549,27,1
543
+ 10,90,85,32,0,34.9,0.825,56,1
544
+ 4,84,90,23,56,39.5,0.159,25,0
545
+ 1,88,78,29,76,32.0,0.365,29,0
546
+ 8,186,90,35,225,34.5,0.423,37,1
547
+ 5,187,76,27,207,43.6,1.034,53,1
548
+ 4,131,68,21,166,33.1,0.160,28,0
549
+ 1,164,82,43,67,32.8,0.341,50,0
550
+ 4,189,110,31,0,28.5,0.680,37,0
551
+ 1,116,70,28,0,27.4,0.204,21,0
552
+ 3,84,68,30,106,31.9,0.591,25,0
553
+ 6,114,88,0,0,27.8,0.247,66,0
554
+ 1,88,62,24,44,29.9,0.422,23,0
555
+ 1,84,64,23,115,36.9,0.471,28,0
556
+ 7,124,70,33,215,25.5,0.161,37,0
557
+ 1,97,70,40,0,38.1,0.218,30,0
558
+ 8,110,76,0,0,27.8,0.237,58,0
559
+ 11,103,68,40,0,46.2,0.126,42,0
560
+ 11,85,74,0,0,30.1,0.300,35,0
561
+ 6,125,76,0,0,33.8,0.121,54,1
562
+ 0,198,66,32,274,41.3,0.502,28,1
563
+ 1,87,68,34,77,37.6,0.401,24,0
564
+ 6,99,60,19,54,26.9,0.497,32,0
565
+ 0,91,80,0,0,32.4,0.601,27,0
566
+ 2,95,54,14,88,26.1,0.748,22,0
567
+ 1,99,72,30,18,38.6,0.412,21,0
568
+ 6,92,62,32,126,32.0,0.085,46,0
569
+ 4,154,72,29,126,31.3,0.338,37,0
570
+ 0,121,66,30,165,34.3,0.203,33,1
571
+ 3,78,70,0,0,32.5,0.270,39,0
572
+ 2,130,96,0,0,22.6,0.268,21,0
573
+ 3,111,58,31,44,29.5,0.430,22,0
574
+ 2,98,60,17,120,34.7,0.198,22,0
575
+ 1,143,86,30,330,30.1,0.892,23,0
576
+ 1,119,44,47,63,35.5,0.280,25,0
577
+ 6,108,44,20,130,24.0,0.813,35,0
578
+ 2,118,80,0,0,42.9,0.693,21,1
579
+ 10,133,68,0,0,27.0,0.245,36,0
580
+ 2,197,70,99,0,34.7,0.575,62,1
581
+ 0,151,90,46,0,42.1,0.371,21,1
582
+ 6,109,60,27,0,25.0,0.206,27,0
583
+ 12,121,78,17,0,26.5,0.259,62,0
584
+ 8,100,76,0,0,38.7,0.190,42,0
585
+ 8,124,76,24,600,28.7,0.687,52,1
586
+ 1,93,56,11,0,22.5,0.417,22,0
587
+ 8,143,66,0,0,34.9,0.129,41,1
588
+ 6,103,66,0,0,24.3,0.249,29,0
589
+ 3,176,86,27,156,33.3,1.154,52,1
590
+ 0,73,0,0,0,21.1,0.342,25,0
591
+ 11,111,84,40,0,46.8,0.925,45,1
592
+ 2,112,78,50,140,39.4,0.175,24,0
593
+ 3,132,80,0,0,34.4,0.402,44,1
594
+ 2,82,52,22,115,28.5,1.699,25,0
595
+ 6,123,72,45,230,33.6,0.733,34,0
596
+ 0,188,82,14,185,32.0,0.682,22,1
597
+ 0,67,76,0,0,45.3,0.194,46,0
598
+ 1,89,24,19,25,27.8,0.559,21,0
599
+ 1,173,74,0,0,36.8,0.088,38,1
600
+ 1,109,38,18,120,23.1,0.407,26,0
601
+ 1,108,88,19,0,27.1,0.400,24,0
602
+ 6,96,0,0,0,23.7,0.190,28,0
603
+ 1,124,74,36,0,27.8,0.100,30,0
604
+ 7,150,78,29,126,35.2,0.692,54,1
605
+ 4,183,0,0,0,28.4,0.212,36,1
606
+ 1,124,60,32,0,35.8,0.514,21,0
607
+ 1,181,78,42,293,40.0,1.258,22,1
608
+ 1,92,62,25,41,19.5,0.482,25,0
609
+ 0,152,82,39,272,41.5,0.270,27,0
610
+ 1,111,62,13,182,24.0,0.138,23,0
611
+ 3,106,54,21,158,30.9,0.292,24,0
612
+ 3,174,58,22,194,32.9,0.593,36,1
613
+ 7,168,88,42,321,38.2,0.787,40,1
614
+ 6,105,80,28,0,32.5,0.878,26,0
615
+ 11,138,74,26,144,36.1,0.557,50,1
616
+ 3,106,72,0,0,25.8,0.207,27,0
617
+ 6,117,96,0,0,28.7,0.157,30,0
618
+ 2,68,62,13,15,20.1,0.257,23,0
619
+ 9,112,82,24,0,28.2,1.282,50,1
620
+ 0,119,0,0,0,32.4,0.141,24,1
621
+ 2,112,86,42,160,38.4,0.246,28,0
622
+ 2,92,76,20,0,24.2,1.698,28,0
623
+ 6,183,94,0,0,40.8,1.461,45,0
624
+ 0,94,70,27,115,43.5,0.347,21,0
625
+ 2,108,64,0,0,30.8,0.158,21,0
626
+ 4,90,88,47,54,37.7,0.362,29,0
627
+ 0,125,68,0,0,24.7,0.206,21,0
628
+ 0,132,78,0,0,32.4,0.393,21,0
629
+ 5,128,80,0,0,34.6,0.144,45,0
630
+ 4,94,65,22,0,24.7,0.148,21,0
631
+ 7,114,64,0,0,27.4,0.732,34,1
632
+ 0,102,78,40,90,34.5,0.238,24,0
633
+ 2,111,60,0,0,26.2,0.343,23,0
634
+ 1,128,82,17,183,27.5,0.115,22,0
635
+ 10,92,62,0,0,25.9,0.167,31,0
636
+ 13,104,72,0,0,31.2,0.465,38,1
637
+ 5,104,74,0,0,28.8,0.153,48,0
638
+ 2,94,76,18,66,31.6,0.649,23,0
639
+ 7,97,76,32,91,40.9,0.871,32,1
640
+ 1,100,74,12,46,19.5,0.149,28,0
641
+ 0,102,86,17,105,29.3,0.695,27,0
642
+ 4,128,70,0,0,34.3,0.303,24,0
643
+ 6,147,80,0,0,29.5,0.178,50,1
644
+ 4,90,0,0,0,28.0,0.610,31,0
645
+ 3,103,72,30,152,27.6,0.730,27,0
646
+ 2,157,74,35,440,39.4,0.134,30,0
647
+ 1,167,74,17,144,23.4,0.447,33,1
648
+ 0,179,50,36,159,37.8,0.455,22,1
649
+ 11,136,84,35,130,28.3,0.260,42,1
650
+ 0,107,60,25,0,26.4,0.133,23,0
651
+ 1,91,54,25,100,25.2,0.234,23,0
652
+ 1,117,60,23,106,33.8,0.466,27,0
653
+ 5,123,74,40,77,34.1,0.269,28,0
654
+ 2,120,54,0,0,26.8,0.455,27,0
655
+ 1,106,70,28,135,34.2,0.142,22,0
656
+ 2,155,52,27,540,38.7,0.240,25,1
657
+ 2,101,58,35,90,21.8,0.155,22,0
658
+ 1,120,80,48,200,38.9,1.162,41,0
659
+ 11,127,106,0,0,39.0,0.190,51,0
660
+ 3,80,82,31,70,34.2,1.292,27,1
661
+ 10,162,84,0,0,27.7,0.182,54,0
662
+ 1,199,76,43,0,42.9,1.394,22,1
663
+ 8,167,106,46,231,37.6,0.165,43,1
664
+ 9,145,80,46,130,37.9,0.637,40,1
665
+ 6,115,60,39,0,33.7,0.245,40,1
666
+ 1,112,80,45,132,34.8,0.217,24,0
667
+ 4,145,82,18,0,32.5,0.235,70,1
668
+ 10,111,70,27,0,27.5,0.141,40,1
669
+ 6,98,58,33,190,34.0,0.430,43,0
670
+ 9,154,78,30,100,30.9,0.164,45,0
671
+ 6,165,68,26,168,33.6,0.631,49,0
672
+ 1,99,58,10,0,25.4,0.551,21,0
673
+ 10,68,106,23,49,35.5,0.285,47,0
674
+ 3,123,100,35,240,57.3,0.880,22,0
675
+ 8,91,82,0,0,35.6,0.587,68,0
676
+ 6,195,70,0,0,30.9,0.328,31,1
677
+ 9,156,86,0,0,24.8,0.230,53,1
678
+ 0,93,60,0,0,35.3,0.263,25,0
679
+ 3,121,52,0,0,36.0,0.127,25,1
680
+ 2,101,58,17,265,24.2,0.614,23,0
681
+ 2,56,56,28,45,24.2,0.332,22,0
682
+ 0,162,76,36,0,49.6,0.364,26,1
683
+ 0,95,64,39,105,44.6,0.366,22,0
684
+ 4,125,80,0,0,32.3,0.536,27,1
685
+ 5,136,82,0,0,0.0,0.640,69,0
686
+ 2,129,74,26,205,33.2,0.591,25,0
687
+ 3,130,64,0,0,23.1,0.314,22,0
688
+ 1,107,50,19,0,28.3,0.181,29,0
689
+ 1,140,74,26,180,24.1,0.828,23,0
690
+ 1,144,82,46,180,46.1,0.335,46,1
691
+ 8,107,80,0,0,24.6,0.856,34,0
692
+ 13,158,114,0,0,42.3,0.257,44,1
693
+ 2,121,70,32,95,39.1,0.886,23,0
694
+ 7,129,68,49,125,38.5,0.439,43,1
695
+ 2,90,60,0,0,23.5,0.191,25,0
696
+ 7,142,90,24,480,30.4,0.128,43,1
697
+ 3,169,74,19,125,29.9,0.268,31,1
698
+ 0,99,0,0,0,25.0,0.253,22,0
699
+ 4,127,88,11,155,34.5,0.598,28,0
700
+ 4,118,70,0,0,44.5,0.904,26,0
701
+ 2,122,76,27,200,35.9,0.483,26,0
702
+ 6,125,78,31,0,27.6,0.565,49,1
703
+ 1,168,88,29,0,35.0,0.905,52,1
704
+ 2,129,0,0,0,38.5,0.304,41,0
705
+ 4,110,76,20,100,28.4,0.118,27,0
706
+ 6,80,80,36,0,39.8,0.177,28,0
707
+ 10,115,0,0,0,0.0,0.261,30,1
708
+ 2,127,46,21,335,34.4,0.176,22,0
709
+ 9,164,78,0,0,32.8,0.148,45,1
710
+ 2,93,64,32,160,38.0,0.674,23,1
711
+ 3,158,64,13,387,31.2,0.295,24,0
712
+ 5,126,78,27,22,29.6,0.439,40,0
713
+ 10,129,62,36,0,41.2,0.441,38,1
714
+ 0,134,58,20,291,26.4,0.352,21,0
715
+ 3,102,74,0,0,29.5,0.121,32,0
716
+ 7,187,50,33,392,33.9,0.826,34,1
717
+ 3,173,78,39,185,33.8,0.970,31,1
718
+ 10,94,72,18,0,23.1,0.595,56,0
719
+ 1,108,60,46,178,35.5,0.415,24,0
720
+ 5,97,76,27,0,35.6,0.378,52,1
721
+ 4,83,86,19,0,29.3,0.317,34,0
722
+ 1,114,66,36,200,38.1,0.289,21,0
723
+ 1,149,68,29,127,29.3,0.349,42,1
724
+ 5,117,86,30,105,39.1,0.251,42,0
725
+ 1,111,94,0,0,32.8,0.265,45,0
726
+ 4,112,78,40,0,39.4,0.236,38,0
727
+ 1,116,78,29,180,36.1,0.496,25,0
728
+ 0,141,84,26,0,32.4,0.433,22,0
729
+ 2,175,88,0,0,22.9,0.326,22,0
730
+ 2,92,52,0,0,30.1,0.141,22,0
731
+ 3,130,78,23,79,28.4,0.323,34,1
732
+ 8,120,86,0,0,28.4,0.259,22,1
733
+ 2,174,88,37,120,44.5,0.646,24,1
734
+ 2,106,56,27,165,29.0,0.426,22,0
735
+ 2,105,75,0,0,23.3,0.560,53,0
736
+ 4,95,60,32,0,35.4,0.284,28,0
737
+ 0,126,86,27,120,27.4,0.515,21,0
738
+ 8,65,72,23,0,32.0,0.600,42,0
739
+ 2,99,60,17,160,36.6,0.453,21,0
740
+ 1,102,74,0,0,39.5,0.293,42,1
741
+ 11,120,80,37,150,42.3,0.785,48,1
742
+ 3,102,44,20,94,30.8,0.400,26,0
743
+ 1,109,58,18,116,28.5,0.219,22,0
744
+ 9,140,94,0,0,32.7,0.734,45,1
745
+ 13,153,88,37,140,40.6,1.174,39,0
746
+ 12,100,84,33,105,30.0,0.488,46,0
747
+ 1,147,94,41,0,49.3,0.358,27,1
748
+ 1,81,74,41,57,46.3,1.096,32,0
749
+ 3,187,70,22,200,36.4,0.408,36,1
750
+ 6,162,62,0,0,24.3,0.178,50,1
751
+ 4,136,70,0,0,31.2,1.182,22,1
752
+ 1,121,78,39,74,39.0,0.261,28,0
753
+ 3,108,62,24,0,26.0,0.223,25,0
754
+ 0,181,88,44,510,43.3,0.222,26,1
755
+ 8,154,78,32,0,32.4,0.443,45,1
756
+ 1,128,88,39,110,36.5,1.057,37,1
757
+ 7,137,90,41,0,32.0,0.391,39,0
758
+ 0,123,72,0,0,36.3,0.258,52,1
759
+ 1,106,76,0,0,37.5,0.197,26,0
760
+ 6,190,92,0,0,35.5,0.278,66,1
761
+ 2,88,58,26,16,28.4,0.766,22,0
762
+ 9,170,74,31,0,44.0,0.403,43,1
763
+ 9,89,62,0,0,22.5,0.142,33,0
764
+ 10,101,76,48,180,32.9,0.171,63,0
765
+ 2,122,70,27,0,36.8,0.340,27,0
766
+ 5,121,72,23,112,26.2,0.245,30,0
767
+ 1,126,60,0,0,30.1,0.349,47,1
768
+ 1,93,70,31,0,30.4,0.315,23,0
data_cache/hepatology_liver.csv ADDED
@@ -0,0 +1,583 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 65,Female,0.7,0.1,187,16,18,6.8,3.3,0.9,1
2
+ 62,Male,10.9,5.5,699,64,100,7.5,3.2,0.74,1
3
+ 62,Male,7.3,4.1,490,60,68,7,3.3,0.89,1
4
+ 58,Male,1,0.4,182,14,20,6.8,3.4,1,1
5
+ 72,Male,3.9,2,195,27,59,7.3,2.4,0.4,1
6
+ 46,Male,1.8,0.7,208,19,14,7.6,4.4,1.3,1
7
+ 26,Female,0.9,0.2,154,16,12,7,3.5,1,1
8
+ 29,Female,0.9,0.3,202,14,11,6.7,3.6,1.1,1
9
+ 17,Male,0.9,0.3,202,22,19,7.4,4.1,1.2,2
10
+ 55,Male,0.7,0.2,290,53,58,6.8,3.4,1,1
11
+ 57,Male,0.6,0.1,210,51,59,5.9,2.7,0.8,1
12
+ 72,Male,2.7,1.3,260,31,56,7.4,3,0.6,1
13
+ 64,Male,0.9,0.3,310,61,58,7,3.4,0.9,2
14
+ 74,Female,1.1,0.4,214,22,30,8.1,4.1,1,1
15
+ 61,Male,0.7,0.2,145,53,41,5.8,2.7,0.87,1
16
+ 25,Male,0.6,0.1,183,91,53,5.5,2.3,0.7,2
17
+ 38,Male,1.8,0.8,342,168,441,7.6,4.4,1.3,1
18
+ 33,Male,1.6,0.5,165,15,23,7.3,3.5,0.92,2
19
+ 40,Female,0.9,0.3,293,232,245,6.8,3.1,0.8,1
20
+ 40,Female,0.9,0.3,293,232,245,6.8,3.1,0.8,1
21
+ 51,Male,2.2,1,610,17,28,7.3,2.6,0.55,1
22
+ 51,Male,2.9,1.3,482,22,34,7,2.4,0.5,1
23
+ 62,Male,6.8,3,542,116,66,6.4,3.1,0.9,1
24
+ 40,Male,1.9,1,231,16,55,4.3,1.6,0.6,1
25
+ 63,Male,0.9,0.2,194,52,45,6,3.9,1.85,2
26
+ 34,Male,4.1,2,289,875,731,5,2.7,1.1,1
27
+ 34,Male,4.1,2,289,875,731,5,2.7,1.1,1
28
+ 34,Male,6.2,3,240,1680,850,7.2,4,1.2,1
29
+ 20,Male,1.1,0.5,128,20,30,3.9,1.9,0.95,2
30
+ 84,Female,0.7,0.2,188,13,21,6,3.2,1.1,2
31
+ 57,Male,4,1.9,190,45,111,5.2,1.5,0.4,1
32
+ 52,Male,0.9,0.2,156,35,44,4.9,2.9,1.4,1
33
+ 57,Male,1,0.3,187,19,23,5.2,2.9,1.2,2
34
+ 38,Female,2.6,1.2,410,59,57,5.6,3,0.8,2
35
+ 38,Female,2.6,1.2,410,59,57,5.6,3,0.8,2
36
+ 30,Male,1.3,0.4,482,102,80,6.9,3.3,0.9,1
37
+ 17,Female,0.7,0.2,145,18,36,7.2,3.9,1.18,2
38
+ 46,Female,14.2,7.8,374,38,77,4.3,2,0.8,1
39
+ 48,Male,1.4,0.6,263,38,66,5.8,2.2,0.61,1
40
+ 47,Male,2.7,1.3,275,123,73,6.2,3.3,1.1,1
41
+ 45,Male,2.4,1.1,168,33,50,5.1,2.6,1,1
42
+ 62,Male,0.6,0.1,160,42,110,4.9,2.6,1.1,2
43
+ 42,Male,6.8,3.2,630,25,47,6.1,2.3,0.6,2
44
+ 50,Male,2.6,1.2,415,407,576,6.4,3.2,1,1
45
+ 85,Female,1,0.3,208,17,15,7,3.6,1,2
46
+ 35,Male,1.8,0.6,275,48,178,6.5,3.2,0.9,2
47
+ 21,Male,3.9,1.8,150,36,27,6.8,3.9,1.34,1
48
+ 40,Male,1.1,0.3,230,1630,960,4.9,2.8,1.3,1
49
+ 32,Female,0.6,0.1,176,39,28,6,3,1,1
50
+ 55,Male,18.4,8.8,206,64,178,6.2,1.8,0.4,1
51
+ 45,Female,0.7,0.2,170,21,14,5.7,2.5,0.7,1
52
+ 34,Female,0.6,0.1,161,15,19,6.6,3.4,1,1
53
+ 38,Male,3.1,1.6,253,80,406,6.8,3.9,1.3,1
54
+ 38,Male,1.1,0.3,198,86,150,6.3,3.5,1.2,1
55
+ 42,Male,8.9,4.5,272,31,61,5.8,2,0.5,1
56
+ 42,Male,8.9,4.5,272,31,61,5.8,2,0.5,1
57
+ 33,Male,0.8,0.2,198,26,23,8,4,1,2
58
+ 48,Female,0.9,0.2,175,24,54,5.5,2.7,0.9,2
59
+ 51,Male,0.8,0.2,367,42,18,5.2,2,0.6,1
60
+ 64,Male,1.1,0.5,145,20,24,5.5,3.2,1.39,2
61
+ 31,Female,0.8,0.2,158,21,16,6,3,1,1
62
+ 58,Male,1,0.5,158,37,43,7.2,3.6,1,1
63
+ 58,Male,1,0.5,158,37,43,7.2,3.6,1,1
64
+ 57,Male,0.7,0.2,208,35,97,5.1,2.1,0.7,1
65
+ 57,Male,1.3,0.4,259,40,86,6.5,2.5,0.6,1
66
+ 57,Male,1.4,0.7,470,62,88,5.6,2.5,0.8,1
67
+ 54,Male,2.2,1.2,195,55,95,6,3.7,1.6,1
68
+ 37,Male,1.8,0.8,215,53,58,6.4,3.8,1.4,1
69
+ 66,Male,0.7,0.2,239,27,26,6.3,3.7,1.4,1
70
+ 60,Male,0.8,0.2,215,24,17,6.3,3,0.9,2
71
+ 19,Female,0.7,0.2,186,166,397,5.5,3,1.2,1
72
+ 75,Female,0.8,0.2,188,20,29,4.4,1.8,0.6,1
73
+ 75,Female,0.8,0.2,205,27,24,4.4,2,0.8,1
74
+ 52,Male,0.6,0.1,171,22,16,6.6,3.6,1.2,1
75
+ 68,Male,0.7,0.1,145,20,22,5.8,2.9,1,1
76
+ 29,Female,0.7,0.1,162,52,41,5.2,2.5,0.9,2
77
+ 31,Male,0.9,0.2,518,189,17,5.3,2.3,0.7,1
78
+ 68,Female,0.6,0.1,1620,95,127,4.6,2.1,0.8,1
79
+ 70,Male,1.4,0.6,146,12,24,6.2,3.8,1.58,2
80
+ 58,Female,2.8,1.3,670,48,79,4.7,1.6,0.5,1
81
+ 58,Female,2.4,1.1,915,60,142,4.7,1.8,0.6,1
82
+ 29,Male,1,0.3,75,25,26,5.1,2.9,1.3,1
83
+ 49,Male,0.7,0.1,148,14,12,5.4,2.8,1,2
84
+ 33,Male,2,1,258,194,152,5.4,3,1.25,1
85
+ 32,Male,0.6,0.1,237,45,31,7.5,4.3,1.34,1
86
+ 14,Male,1.4,0.5,269,58,45,6.7,3.9,1.4,1
87
+ 13,Male,0.6,0.1,320,28,56,7.2,3.6,1,2
88
+ 58,Male,0.8,0.2,298,33,59,6.2,3.1,1,1
89
+ 18,Male,0.6,0.2,538,33,34,7.5,3.2,0.7,1
90
+ 60,Male,4,1.9,238,119,350,7.1,3.3,0.8,1
91
+ 60,Male,5.7,2.8,214,412,850,7.3,3.2,0.78,1
92
+ 60,Male,6.8,3.2,308,404,794,6.8,3,0.7,1
93
+ 60,Male,8.6,4,298,412,850,7.4,3,0.6,1
94
+ 60,Male,5.8,2.7,204,220,400,7,3,0.7,1
95
+ 60,Male,5.2,2.4,168,126,202,6.8,2.9,0.7,1
96
+ 75,Male,0.9,0.2,282,25,23,4.4,2.2,1,1
97
+ 39,Male,3.8,1.5,298,102,630,7.1,3.3,0.8,1
98
+ 39,Male,6.6,3,215,190,950,4,1.7,0.7,1
99
+ 18,Male,0.6,0.1,265,97,161,5.9,3.1,1.1,1
100
+ 18,Male,0.7,0.1,312,308,405,6.9,3.7,1.1,1
101
+ 27,Male,0.6,0.2,161,27,28,3.7,1.6,0.76,2
102
+ 27,Male,0.7,0.2,243,21,23,5.3,2.3,0.7,2
103
+ 17,Male,0.9,0.2,224,36,45,6.9,4.2,1.55,1
104
+ 55,Female,0.8,0.2,225,14,23,6.1,3.3,1.2,2
105
+ 63,Male,0.5,0.1,170,21,28,5.5,2.5,0.8,1
106
+ 36,Male,5.3,2.3,145,32,92,5.1,2.6,1,2
107
+ 36,Male,5.3,2.3,145,32,92,5.1,2.6,1,2
108
+ 36,Male,0.8,0.2,158,29,39,6,2.2,0.5,2
109
+ 36,Male,0.8,0.2,158,29,39,6,2.2,0.5,2
110
+ 36,Male,0.9,0.1,486,25,34,5.9,2.8,0.9,2
111
+ 24,Female,0.7,0.2,188,11,10,5.5,2.3,0.71,2
112
+ 48,Male,3.2,1.6,257,33,116,5.7,2.2,0.62,1
113
+ 27,Male,1.2,0.4,179,63,39,6.1,3.3,1.1,2
114
+ 74,Male,0.6,0.1,272,24,98,5,2,0.6,1
115
+ 50,Male,5.8,3,661,181,285,5.7,2.3,0.67,2
116
+ 50,Male,7.3,3.6,1580,88,64,5.6,2.3,0.6,2
117
+ 48,Male,0.7,0.1,1630,74,149,5.3,2,0.6,1
118
+ 32,Male,12.7,6.2,194,2000,2946,5.7,3.3,1.3,1
119
+ 32,Male,15.9,7,280,1350,1600,5.6,2.8,1,1
120
+ 32,Male,18,8.2,298,1250,1050,5.4,2.6,0.9,1
121
+ 32,Male,23,11.3,300,482,275,7.1,3.5,0.9,1
122
+ 32,Male,22.7,10.2,290,322,113,6.6,2.8,0.7,1
123
+ 58,Male,1.7,0.8,188,60,84,5.9,3.5,1.4,2
124
+ 64,Female,0.8,0.2,178,17,18,6.3,3.1,0.9,1
125
+ 28,Male,0.6,0.1,177,36,29,6.9,4.1,1.4,2
126
+ 60,Male,1.8,0.5,201,45,25,3.9,1.7,0.7,2
127
+ 48,Male,5.8,2.5,802,133,88,6,2.8,0.8,1
128
+ 64,Male,3,1.4,248,46,40,6.5,3.2,0.9,1
129
+ 58,Female,1.7,0.8,1896,61,83,8,3.9,0.95,1
130
+ 45,Male,2.8,1.7,263,57,65,5.1,2.3,0.8,1
131
+ 45,Male,3.2,1.4,512,50,58,6,2.7,0.8,1
132
+ 70,Female,0.7,0.2,237,18,28,5.8,2.5,0.75,2
133
+ 18,Female,0.8,0.2,199,34,31,6.5,3.5,1.16,2
134
+ 53,Male,0.9,0.4,238,17,14,6.6,2.9,0.8,1
135
+ 18,Male,1.8,0.7,178,35,36,6.8,3.6,1.1,1
136
+ 66,Male,11.3,5.6,1110,1250,4929,7,2.4,0.5,1
137
+ 46,Female,4.7,2.2,310,62,90,6.4,2.5,0.6,1
138
+ 18,Male,0.8,0.2,282,72,140,5.5,2.5,0.8,1
139
+ 18,Male,0.8,0.2,282,72,140,5.5,2.5,0.8,1
140
+ 15,Male,0.8,0.2,380,25,66,6.1,3.7,1.5,1
141
+ 60,Male,0.6,0.1,186,20,21,6.2,3.3,1.1,2
142
+ 66,Female,4.2,2.1,159,15,30,7.1,2.2,0.4,1
143
+ 30,Male,1.6,0.4,332,84,139,5.6,2.7,0.9,1
144
+ 30,Male,1.6,0.4,332,84,139,5.6,2.7,0.9,1
145
+ 45,Female,3.5,1.5,189,63,87,5.6,2.9,1,1
146
+ 65,Male,0.8,0.2,201,18,22,5.4,2.9,1.1,2
147
+ 66,Female,2.9,1.3,168,21,38,5.5,1.8,0.4,1
148
+ 65,Male,0.7,0.1,392,20,30,5.3,2.8,1.1,1
149
+ 50,Male,0.9,0.2,202,20,26,7.2,4.5,1.66,1
150
+ 60,Male,0.8,0.2,286,21,27,7.1,4,1.2,1
151
+ 56,Male,1.1,0.5,180,30,42,6.9,3.8,1.2,2
152
+ 50,Male,1.6,0.8,218,18,20,5.9,2.9,0.96,1
153
+ 46,Female,0.8,0.2,182,20,40,6,2.9,0.9,1
154
+ 52,Male,0.6,0.1,178,26,27,6.5,3.6,1.2,2
155
+ 34,Male,5.9,2.5,290,45,233,5.6,2.7,0.9,1
156
+ 34,Male,8.7,4,298,58,138,5.8,2.4,0.7,1
157
+ 32,Male,0.9,0.3,462,70,82,6.2,3.1,1,1
158
+ 72,Male,0.7,0.1,196,20,35,5.8,2,0.5,1
159
+ 72,Male,0.7,0.1,196,20,35,5.8,2,0.5,1
160
+ 50,Male,1.2,0.4,282,36,32,7.2,3.9,1.1,1
161
+ 60,Male,11,4.9,750,140,350,5.5,2.1,0.6,1
162
+ 60,Male,11.5,5,1050,99,187,6.2,2.8,0.8,1
163
+ 60,Male,5.8,2.7,599,43,66,5.4,1.8,0.5,1
164
+ 39,Male,1.9,0.9,180,42,62,7.4,4.3,1.38,1
165
+ 39,Male,1.9,0.9,180,42,62,7.4,4.3,1.38,1
166
+ 48,Male,4.5,2.3,282,13,74,7,2.4,0.52,1
167
+ 55,Male,75,3.6,332,40,66,6.2,2.5,0.6,1
168
+ 47,Female,3,1.5,292,64,67,5.6,1.8,0.47,1
169
+ 60,Male,22.8,12.6,962,53,41,6.9,3.3,0.9,1
170
+ 60,Male,8.9,4,950,33,32,6.8,3.1,0.8,1
171
+ 72,Male,1.7,0.8,200,28,37,6.2,3,0.93,1
172
+ 44,Female,1.9,0.6,298,378,602,6.6,3.3,1,1
173
+ 55,Male,14.1,7.6,750,35,63,5,1.6,0.47,1
174
+ 31,Male,0.6,0.1,175,48,34,6,3.7,1.6,1
175
+ 31,Male,0.6,0.1,175,48,34,6,3.7,1.6,1
176
+ 31,Male,0.8,0.2,198,43,31,7.3,4,1.2,1
177
+ 55,Male,0.8,0.2,482,112,99,5.7,2.6,0.8,1
178
+ 75,Male,14.8,9,1020,71,42,5.3,2.2,0.7,1
179
+ 75,Male,10.6,5,562,37,29,5.1,1.8,0.5,1
180
+ 75,Male,8,4.6,386,30,25,5.5,1.8,0.48,1
181
+ 75,Male,2.8,1.3,250,23,29,2.7,0.9,0.5,1
182
+ 75,Male,2.9,1.3,218,33,37,3,1.5,1,1
183
+ 65,Male,1.9,0.8,170,36,43,3.8,1.4,0.58,2
184
+ 40,Male,0.6,0.1,171,20,17,5.4,2.5,0.8,1
185
+ 64,Male,1.1,0.4,201,18,19,6.9,4.1,1.4,1
186
+ 38,Male,1.5,0.4,298,60,103,6,3,1,2
187
+ 60,Male,3.2,1.8,750,79,145,7.8,3.2,0.69,1
188
+ 60,Male,2.1,1,191,114,247,4,1.6,0.6,1
189
+ 60,Male,1.9,0.8,614,42,38,4.5,1.8,0.6,1
190
+ 48,Female,0.8,0.2,218,32,28,5.2,2.5,0.9,2
191
+ 60,Male,6.3,3.2,314,118,114,6.6,3.7,1.27,1
192
+ 60,Male,5.8,3,257,107,104,6.6,3.5,1.12,1
193
+ 60,Male,2.3,0.6,272,79,51,6.6,3.5,1.1,1
194
+ 49,Male,1.3,0.4,206,30,25,6,3.1,1.06,2
195
+ 49,Male,2,0.6,209,48,32,5.7,3,1.1,2
196
+ 60,Male,2.4,1,1124,30,54,5.2,1.9,0.5,1
197
+ 60,Male,2,1.1,664,52,104,6,2.1,0.53,1
198
+ 26,Female,0.6,0.2,142,12,32,5.7,2.4,0.75,1
199
+ 41,Male,0.9,0.2,169,22,18,6.1,3,0.9,2
200
+ 7,Female,27.2,11.8,1420,790,1050,6.1,2,0.4,1
201
+ 49,Male,0.6,0.1,218,50,53,5,2.4,0.9,1
202
+ 49,Male,0.6,0.1,218,50,53,5,2.4,0.9,1
203
+ 38,Female,0.8,0.2,145,19,23,6.1,3.1,1.03,2
204
+ 21,Male,1,0.3,142,27,21,6.4,3.5,1.2,2
205
+ 21,Male,0.7,0.2,135,27,26,6.4,3.3,1,2
206
+ 45,Male,2.5,1.2,163,28,22,7.6,4,1.1,1
207
+ 40,Male,3.6,1.8,285,50,60,7,2.9,0.7,1
208
+ 40,Male,3.9,1.7,350,950,1500,6.7,3.8,1.3,1
209
+ 70,Female,0.9,0.3,220,53,95,6.1,2.8,0.68,1
210
+ 45,Female,0.9,0.3,189,23,33,6.6,3.9,,1
211
+ 28,Male,0.8,0.3,190,20,14,4.1,2.4,1.4,1
212
+ 42,Male,2.7,1.3,219,60,180,7,3.2,0.8,1
213
+ 22,Male,2.7,1,160,82,127,5.5,3.1,1.2,2
214
+ 8,Female,0.9,0.2,401,25,58,7.5,3.4,0.8,1
215
+ 38,Male,1.7,1,180,18,34,7.2,3.6,1,1
216
+ 66,Male,0.6,0.2,100,17,148,5,3.3,1.9,2
217
+ 55,Male,0.9,0.2,116,36,16,6.2,3.2,1,2
218
+ 49,Male,1.1,0.5,159,30,31,7,4.3,1.5,1
219
+ 6,Male,0.6,0.1,289,38,30,4.8,2,0.7,2
220
+ 37,Male,0.8,0.2,125,41,39,6.4,3.4,1.1,1
221
+ 37,Male,0.8,0.2,147,27,46,5,2.5,1,1
222
+ 47,Male,0.9,0.2,192,38,24,7.3,4.3,1.4,1
223
+ 47,Male,0.9,0.2,265,40,28,8,4,1,1
224
+ 50,Male,1.1,0.3,175,20,19,7.1,4.5,1.7,2
225
+ 70,Male,1.7,0.5,400,56,44,5.7,3.1,1.1,1
226
+ 26,Male,0.6,0.2,120,45,51,7.9,4,1,1
227
+ 26,Male,1.3,0.4,173,38,62,8,4,1,1
228
+ 68,Female,0.7,0.2,186,18,15,6.4,3.8,1.4,1
229
+ 65,Female,1,0.3,202,26,13,5.3,2.6,0.9,2
230
+ 46,Male,0.6,0.2,290,26,21,6,3,1,1
231
+ 61,Male,1.5,0.6,196,61,85,6.7,3.8,1.3,2
232
+ 61,Male,0.8,0.1,282,85,231,8.5,4.3,1,1
233
+ 50,Male,2.7,1.6,157,149,156,7.9,3.1,0.6,1
234
+ 33,Male,2,1.4,2110,48,89,6.2,3,0.9,1
235
+ 40,Female,0.9,0.2,285,32,27,7.7,3.5,0.8,1
236
+ 60,Male,1.5,0.6,360,230,298,4.5,2,0.8,1
237
+ 22,Male,0.8,0.2,300,57,40,7.9,3.8,0.9,2
238
+ 35,Female,0.9,0.3,158,20,16,8,4,1,1
239
+ 35,Female,0.9,0.2,190,40,35,7.3,4.7,1.8,2
240
+ 40,Male,0.9,0.3,196,69,48,6.8,3.1,0.8,1
241
+ 48,Male,0.7,0.2,165,32,30,8,4,1,2
242
+ 51,Male,0.8,0.2,230,24,46,6.5,3.1,,1
243
+ 29,Female,0.8,0.2,205,30,23,8.2,4.1,1,1
244
+ 28,Female,0.9,0.2,316,25,23,8.5,5.5,1.8,1
245
+ 54,Male,0.8,0.2,218,20,19,6.3,2.5,0.6,1
246
+ 54,Male,0.9,0.2,290,15,18,6.1,2.8,0.8,1
247
+ 55,Male,1.8,9,272,22,79,6.1,2.7,0.7,1
248
+ 55,Male,0.9,0.2,190,25,28,5.9,2.7,0.8,1
249
+ 40,Male,0.7,0.1,202,37,29,5,2.6,1,1
250
+ 33,Male,1.2,0.3,498,28,25,7,3,0.7,1
251
+ 33,Male,2.1,1.3,480,38,22,6.5,3,0.8,1
252
+ 33,Male,0.9,0.8,680,37,40,5.9,2.6,0.8,1
253
+ 65,Male,1.1,0.3,258,48,40,7,3.9,1.2,2
254
+ 35,Female,0.6,0.2,180,12,15,5.2,2.7,,2
255
+ 38,Female,0.7,0.1,152,90,21,7.1,4.2,1.4,2
256
+ 38,Male,1.7,0.7,859,89,48,6,3,1,1
257
+ 50,Male,0.9,0.3,901,23,17,6.2,3.5,1.2,1
258
+ 44,Male,0.8,0.2,335,148,86,5.6,3,1.1,1
259
+ 36,Male,0.8,0.2,182,31,34,6.4,3.8,1.4,2
260
+ 42,Male,30.5,14.2,285,65,130,5.2,2.1,0.6,1
261
+ 42,Male,16.4,8.9,245,56,87,5.4,2,0.5,1
262
+ 33,Male,1.5,7,505,205,140,7.5,3.9,1,1
263
+ 18,Male,0.8,0.2,228,55,54,6.9,4,1.3,1
264
+ 38,Female,0.8,0.2,185,25,21,7,3,0.7,1
265
+ 38,Male,0.8,0.2,247,55,92,7.4,4.3,1.38,2
266
+ 4,Male,0.9,0.2,348,30,34,8,4,1,2
267
+ 62,Male,1.2,0.4,195,38,54,6.3,3.8,1.5,1
268
+ 43,Female,0.9,0.3,140,12,29,7.4,3.5,1.8,1
269
+ 40,Male,14.5,6.4,358,50,75,5.7,2.1,0.5,1
270
+ 26,Male,0.6,0.1,110,15,20,2.8,1.6,1.3,1
271
+ 37,Male,0.7,0.2,235,96,54,9.5,4.9,1,1
272
+ 4,Male,0.8,0.2,460,152,231,6.5,3.2,0.9,2
273
+ 21,Male,18.5,9.5,380,390,500,8.2,4.1,1,1
274
+ 30,Male,0.7,0.2,262,15,18,9.6,4.7,1.2,1
275
+ 33,Male,1.8,0.8,196,25,22,8,4,1,1
276
+ 26,Male,1.9,0.8,180,22,19,8.2,4.1,1,2
277
+ 35,Male,0.9,0.2,190,25,20,6.4,3.6,1.2,2
278
+ 60,Male,2,0.8,190,45,40,6,2.8,0.8,1
279
+ 45,Male,2.2,0.8,209,25,20,8,4,1,1
280
+ 48,Female,1,1.4,144,18,14,8.3,4.2,1,1
281
+ 58,Male,0.8,0.2,123,56,48,6,3,1,1
282
+ 50,Male,0.7,0.2,192,18,15,7.4,4.2,1.3,2
283
+ 50,Male,0.7,0.2,188,12,14,7,3.4,0.9,1
284
+ 18,Male,1.3,0.7,316,10,21,6,2.1,0.5,2
285
+ 18,Male,0.9,0.3,300,30,48,8,4,1,1
286
+ 13,Male,1.5,0.5,575,29,24,7.9,3.9,0.9,1
287
+ 34,Female,0.8,0.2,192,15,12,8.6,4.7,1.2,1
288
+ 43,Male,1.3,0.6,155,15,20,8,4,1,2
289
+ 50,Female,1,0.5,239,16,39,7.5,3.7,0.9,1
290
+ 57,Male,4.5,2.3,315,120,105,7,4,1.3,1
291
+ 45,Female,1,0.3,250,48,44,8.6,4.3,1,1
292
+ 60,Male,0.7,0.2,174,32,14,7.8,4.2,1.1,2
293
+ 45,Male,0.6,0.2,245,22,24,7.1,3.4,0.9,1
294
+ 23,Male,1.1,0.5,191,37,41,7.7,4.3,1.2,2
295
+ 22,Male,2.4,1,340,25,21,8.3,4.5,1.1,1
296
+ 22,Male,0.6,0.2,202,78,41,8,3.9,0.9,1
297
+ 74,Female,0.9,0.3,234,16,19,7.9,4,1,1
298
+ 25,Female,0.9,0.3,159,24,25,6.9,4.4,1.7,2
299
+ 31,Female,1.1,0.3,190,26,15,7.9,3.8,0.9,1
300
+ 24,Female,0.9,0.2,195,40,35,7.4,4.1,1.2,2
301
+ 58,Male,0.8,0.2,180,32,25,8.2,4.4,1.1,2
302
+ 51,Female,0.9,0.2,280,21,30,6.7,3.2,0.8,1
303
+ 50,Female,1.7,0.6,430,28,32,6.8,3.5,1,1
304
+ 50,Male,0.7,0.2,206,18,17,8.4,4.2,1,2
305
+ 55,Female,0.8,0.2,155,21,17,6.9,3.8,1.4,1
306
+ 54,Female,1.4,0.7,195,36,16,7.9,3.7,0.9,2
307
+ 48,Male,1.6,1,588,74,113,7.3,2.4,0.4,1
308
+ 30,Male,0.8,0.2,174,21,47,4.6,2.3,1,1
309
+ 45,Female,0.8,0.2,165,22,18,8.2,4.1,1,1
310
+ 48,Female,1.1,0.7,527,178,250,8,4.2,1.1,1
311
+ 51,Male,0.8,0.2,175,48,22,8.1,4.6,1.3,1
312
+ 54,Female,23.2,12.6,574,43,47,7.2,3.5,0.9,1
313
+ 27,Male,1.3,0.6,106,25,54,8.5,4.8,,2
314
+ 30,Female,0.8,0.2,158,25,22,7.9,4.5,1.3,2
315
+ 26,Male,2,0.9,195,24,65,7.8,4.3,1.2,1
316
+ 22,Male,0.9,0.3,179,18,21,6.7,3.7,1.2,2
317
+ 44,Male,0.9,0.2,182,29,82,7.1,3.7,1,2
318
+ 35,Male,0.7,0.2,198,42,30,6.8,3.4,1,1
319
+ 38,Male,3.7,2.2,216,179,232,7.8,4.5,1.3,1
320
+ 14,Male,0.9,0.3,310,21,16,8.1,4.2,1,2
321
+ 30,Female,0.7,0.2,63,31,27,5.8,3.4,1.4,1
322
+ 30,Female,0.8,0.2,198,30,58,5.2,2.8,1.1,1
323
+ 36,Male,1.7,0.5,205,36,34,7.1,3.9,1.2,1
324
+ 12,Male,0.8,0.2,302,47,67,6.7,3.5,1.1,2
325
+ 60,Male,2.6,1.2,171,42,37,5.4,2.7,1,1
326
+ 42,Male,0.8,0.2,158,27,23,6.7,3.1,0.8,2
327
+ 36,Female,1.2,0.4,358,160,90,8.3,4.4,1.1,2
328
+ 24,Male,3.3,1.6,174,11,33,7.6,3.9,1,2
329
+ 43,Male,0.8,0.2,192,29,20,6,2.9,0.9,2
330
+ 21,Male,0.7,0.2,211,14,23,7.3,4.1,1.2,2
331
+ 26,Male,2,0.9,157,54,68,6.1,2.7,0.8,1
332
+ 26,Male,1.7,0.6,210,62,56,5.4,2.2,0.6,1
333
+ 26,Male,7.1,3.3,258,80,113,6.2,2.9,0.8,1
334
+ 36,Female,0.7,0.2,152,21,25,5.9,3.1,1.1,2
335
+ 13,Female,0.7,0.2,350,17,24,7.4,4,1.1,1
336
+ 13,Female,0.7,0.1,182,24,19,8.9,4.9,1.2,1
337
+ 75,Male,6.7,3.6,458,198,143,6.2,3.2,1,1
338
+ 75,Male,2.5,1.2,375,85,68,6.4,2.9,0.8,1
339
+ 75,Male,1.8,0.8,405,79,50,6.1,2.9,0.9,1
340
+ 75,Male,1.4,0.4,215,50,30,5.9,2.6,0.7,1
341
+ 75,Male,0.9,0.2,206,44,33,6.2,2.9,0.8,1
342
+ 36,Female,0.8,0.2,650,70,138,6.6,3.1,0.8,1
343
+ 35,Male,0.8,0.2,198,36,32,7,4,1.3,2
344
+ 70,Male,3.1,1.6,198,40,28,5.6,2,0.5,1
345
+ 37,Male,0.8,0.2,195,60,40,8.2,5,1.5,2
346
+ 60,Male,2.9,1.3,230,32,44,5.6,2,0.5,1
347
+ 46,Male,0.6,0.2,115,14,11,6.9,3.4,0.9,1
348
+ 38,Male,0.7,0.2,216,349,105,7,3.5,1,1
349
+ 70,Male,1.3,0.4,358,19,14,6.1,2.8,0.8,1
350
+ 49,Female,0.8,0.2,158,19,15,6.6,3.6,1.2,2
351
+ 37,Male,1.8,0.8,145,62,58,5.7,2.9,1,1
352
+ 37,Male,1.3,0.4,195,41,38,5.3,2.1,0.6,1
353
+ 26,Female,0.7,0.2,144,36,33,8.2,4.3,1.1,1
354
+ 48,Female,1.4,0.8,621,110,176,7.2,3.9,1.1,1
355
+ 48,Female,0.8,0.2,150,25,23,7.5,3.9,1,1
356
+ 19,Male,1.4,0.8,178,13,26,8,4.6,1.3,2
357
+ 33,Male,0.7,0.2,256,21,30,8.5,3.9,0.8,1
358
+ 33,Male,2.1,0.7,205,50,38,6.8,3,0.7,1
359
+ 37,Male,0.7,0.2,176,28,34,5.6,2.6,0.8,1
360
+ 69,Female,0.8,0.2,146,42,70,8.4,4.9,1.4,2
361
+ 24,Male,0.7,0.2,218,47,26,6.6,3.3,1,1
362
+ 65,Female,0.7,0.2,182,23,28,6.8,2.9,0.7,2
363
+ 55,Male,1.1,0.3,215,21,15,6.2,2.9,0.8,2
364
+ 42,Female,0.9,0.2,165,26,29,8.5,4.4,1,2
365
+ 21,Male,0.8,0.2,183,33,57,6.8,3.5,1,2
366
+ 40,Male,0.7,0.2,176,28,43,5.3,2.4,0.8,2
367
+ 16,Male,0.7,0.2,418,28,35,7.2,4.1,1.3,2
368
+ 60,Male,2.2,1,271,45,52,6.1,2.9,0.9,2
369
+ 42,Female,0.8,0.2,182,22,20,7.2,3.9,1.1,1
370
+ 58,Female,0.8,0.2,130,24,25,7,4,1.3,1
371
+ 54,Female,22.6,11.4,558,30,37,7.8,3.4,0.8,1
372
+ 33,Male,0.8,0.2,135,30,29,7.2,4.4,1.5,2
373
+ 48,Male,0.7,0.2,326,29,17,8.7,5.5,1.7,1
374
+ 25,Female,0.7,0.1,140,32,25,7.6,4.3,1.3,2
375
+ 56,Female,0.7,0.1,145,26,23,7,4,1.3,2
376
+ 47,Male,3.5,1.6,206,32,31,6.8,3.4,1,1
377
+ 33,Male,0.7,0.1,168,35,33,7,3.7,1.1,1
378
+ 20,Female,0.6,0.2,202,12,13,6.1,3,0.9,2
379
+ 50,Female,0.7,0.1,192,20,41,7.3,3.3,0.8,1
380
+ 72,Male,0.7,0.2,185,16,22,7.3,3.7,1,2
381
+ 50,Male,1.7,0.8,331,36,53,7.3,3.4,0.9,1
382
+ 39,Male,0.6,0.2,188,28,43,8.1,3.3,0.6,1
383
+ 58,Female,0.7,0.1,172,27,22,6.7,3.2,0.9,1
384
+ 60,Female,1.4,0.7,159,10,12,4.9,2.5,1,2
385
+ 34,Male,3.7,2.1,490,115,91,6.5,2.8,0.7,1
386
+ 50,Male,0.8,0.2,152,29,30,7.4,4.1,1.3,1
387
+ 38,Male,2.7,1.4,105,25,21,7.5,4.2,1.2,2
388
+ 51,Male,0.8,0.2,160,34,20,6.9,3.7,1.1,1
389
+ 46,Male,0.8,0.2,160,31,40,7.3,3.8,1.1,1
390
+ 72,Male,0.6,0.1,102,31,35,6.3,3.2,1,1
391
+ 72,Male,0.8,0.2,148,23,35,6,3,1,1
392
+ 75,Male,0.9,0.2,162,25,20,6.9,3.7,1.1,1
393
+ 41,Male,7.5,4.3,149,94,92,6.3,3.1,0.9,1
394
+ 41,Male,2.7,1.3,580,142,68,8,4,1,1
395
+ 48,Female,1,0.3,310,37,56,5.9,2.5,0.7,1
396
+ 45,Male,0.8,0.2,140,24,20,6.3,3.2,1,2
397
+ 74,Male,1,0.3,175,30,32,6.4,3.4,1.1,1
398
+ 78,Male,1,0.3,152,28,70,6.3,3.1,0.9,1
399
+ 38,Male,0.8,0.2,208,25,50,7.1,3.7,1,1
400
+ 27,Male,1,0.2,205,137,145,6,3,1,1
401
+ 66,Female,0.7,0.2,162,24,20,6.4,3.2,1,2
402
+ 50,Male,7.3,3.7,92,44,236,6.8,1.6,0.3,1
403
+ 42,Female,0.5,0.1,162,155,108,8.1,4,0.9,1
404
+ 65,Male,0.7,0.2,199,19,22,6.3,3.6,1.3,2
405
+ 22,Male,0.8,0.2,198,20,26,6.8,3.9,1.3,1
406
+ 31,Female,0.8,0.2,215,15,21,7.6,4,1.1,1
407
+ 45,Male,0.7,0.2,180,18,58,6.7,3.7,1.2,2
408
+ 12,Male,1,0.2,719,157,108,7.2,3.7,1,1
409
+ 48,Male,2.4,1.1,554,141,73,7.5,3.6,0.9,1
410
+ 48,Male,5,2.6,555,284,190,6.5,3.3,1,1
411
+ 18,Male,1.4,0.6,215,440,850,5,1.9,0.6,1
412
+ 23,Female,2.3,0.8,509,28,44,6.9,2.9,0.7,2
413
+ 65,Male,4.9,2.7,190,33,71,7.1,2.9,0.7,1
414
+ 48,Male,0.7,0.2,208,15,30,4.6,2.1,0.8,2
415
+ 65,Male,1.4,0.6,260,28,24,5.2,2.2,0.7,2
416
+ 70,Male,1.3,0.3,690,93,40,3.6,2.7,0.7,1
417
+ 70,Male,0.6,0.1,862,76,180,6.3,2.7,0.75,1
418
+ 11,Male,0.7,0.1,592,26,29,7.1,4.2,1.4,2
419
+ 50,Male,4.2,2.3,450,69,50,7,3,0.7,1
420
+ 55,Female,8.2,3.9,1350,52,65,6.7,2.9,0.7,1
421
+ 55,Female,10.9,5.1,1350,48,57,6.4,2.3,0.5,1
422
+ 26,Male,1,0.3,163,48,71,7.1,3.7,1,2
423
+ 41,Male,1.2,0.5,246,34,42,6.9,3.4,0.97,1
424
+ 53,Male,1.6,0.9,178,44,59,6.5,3.9,1.5,2
425
+ 32,Female,0.7,0.1,240,12,15,7,3,0.7,1
426
+ 58,Male,0.4,0.1,100,59,126,4.3,2.5,1.4,1
427
+ 45,Male,1.3,0.6,166,49,42,5.6,2.5,0.8,2
428
+ 65,Male,0.9,0.2,170,33,66,7,3,0.75,1
429
+ 52,Female,0.6,0.1,194,10,12,6.9,3.3,0.9,2
430
+ 73,Male,1.9,0.7,1750,102,141,5.5,2,0.5,1
431
+ 53,Female,0.7,0.1,182,20,33,4.8,1.9,0.6,1
432
+ 47,Female,0.8,0.2,236,10,13,6.7,2.9,0.76,2
433
+ 29,Male,0.7,0.2,165,55,87,7.5,4.6,1.58,1
434
+ 41,Female,0.9,0.2,201,31,24,7.6,3.8,1,2
435
+ 30,Female,0.7,0.2,194,32,36,7.5,3.6,0.92,2
436
+ 17,Female,0.5,0.1,206,28,21,7.1,4.5,1.7,2
437
+ 23,Male,1,0.3,212,41,80,6.2,3.1,1,1
438
+ 35,Male,1.6,0.7,157,15,44,5.2,2.5,0.9,1
439
+ 65,Male,0.8,0.2,162,30,90,3.8,1.4,0.5,1
440
+ 42,Female,0.8,0.2,168,25,18,6.2,3.1,1,1
441
+ 49,Female,0.8,0.2,198,23,20,7,4.3,1.5,1
442
+ 42,Female,2.3,1.1,292,29,39,4.1,1.8,0.7,1
443
+ 42,Female,7.4,3.6,298,52,102,4.6,1.9,0.7,1
444
+ 42,Female,0.7,0.2,152,35,81,6.2,3.2,1.06,1
445
+ 61,Male,0.8,0.2,163,18,19,6.3,2.8,0.8,2
446
+ 17,Male,0.9,0.2,279,40,46,7.3,4,1.2,2
447
+ 54,Male,0.8,0.2,181,35,20,5.5,2.7,0.96,1
448
+ 45,Female,23.3,12.8,1550,425,511,7.7,3.5,0.8,1
449
+ 48,Female,0.8,0.2,142,26,25,6,2.6,0.7,1
450
+ 48,Female,0.9,0.2,173,26,27,6.2,3.1,1,1
451
+ 65,Male,7.9,4.3,282,50,72,6,3,1,1
452
+ 35,Male,0.8,0.2,279,20,25,7.2,3.2,0.8,1
453
+ 58,Male,0.9,0.2,1100,25,36,7.1,3.5,0.9,1
454
+ 46,Male,0.7,0.2,224,40,23,7.1,3,0.7,1
455
+ 28,Male,0.6,0.2,159,15,16,7,3.5,1,2
456
+ 21,Female,0.6,0.1,186,25,22,6.8,3.4,1,1
457
+ 32,Male,0.7,0.2,189,22,43,7.4,3.1,0.7,2
458
+ 61,Male,0.8,0.2,192,28,35,6.9,3.4,0.9,2
459
+ 26,Male,6.8,3.2,140,37,19,3.6,0.9,0.3,1
460
+ 65,Male,1.1,0.5,686,16,46,5.7,1.5,0.35,1
461
+ 22,Female,2.2,1,215,159,51,5.5,2.5,0.8,1
462
+ 28,Female,0.8,0.2,309,55,23,6.8,4.1,1.51,1
463
+ 38,Male,0.7,0.2,110,22,18,6.4,2.5,0.64,1
464
+ 25,Male,0.8,0.1,130,23,42,8,4,1,1
465
+ 45,Female,0.7,0.2,164,21,53,4.5,1.4,0.45,2
466
+ 45,Female,0.6,0.1,270,23,42,5.1,2,0.5,2
467
+ 28,Female,0.6,0.1,137,22,16,4.9,1.9,0.6,2
468
+ 28,Female,1,0.3,90,18,108,6.8,3.1,0.8,2
469
+ 66,Male,1,0.3,190,30,54,5.3,2.1,0.6,1
470
+ 66,Male,0.8,0.2,165,22,32,4.4,2,0.8,1
471
+ 66,Male,1.1,0.5,167,13,56,7.1,4.1,1.36,1
472
+ 49,Female,0.6,0.1,185,17,26,6.6,2.9,0.7,2
473
+ 42,Male,0.7,0.2,197,64,33,5.8,2.4,0.7,2
474
+ 42,Male,1,0.3,154,38,21,6.8,3.9,1.3,2
475
+ 35,Male,2,1.1,226,33,135,6,2.7,0.8,2
476
+ 38,Male,2.2,1,310,119,42,7.9,4.1,1,2
477
+ 38,Male,0.9,0.3,310,15,25,5.5,2.7,1,1
478
+ 55,Male,0.6,0.2,220,24,32,5.1,2.4,0.88,1
479
+ 33,Male,7.1,3.7,196,622,497,6.9,3.6,1.09,1
480
+ 33,Male,3.4,1.6,186,779,844,7.3,3.2,0.7,1
481
+ 7,Male,0.5,0.1,352,28,51,7.9,4.2,1.1,2
482
+ 45,Male,2.3,1.3,282,132,368,7.3,4,1.2,1
483
+ 45,Male,1.1,0.4,92,91,188,7.2,3.8,1.11,1
484
+ 30,Male,0.8,0.2,182,46,57,7.8,4.3,1.2,2
485
+ 62,Male,5,2.1,103,18,40,5,2.1,1.72,1
486
+ 22,Female,6.7,3.2,850,154,248,6.2,2.8,0.8,1
487
+ 42,Female,0.8,0.2,195,18,15,6.7,3,0.8,1
488
+ 32,Male,0.7,0.2,276,102,190,6,2.9,0.93,1
489
+ 60,Male,0.7,0.2,171,31,26,7,3.5,1,2
490
+ 65,Male,0.8,0.1,146,17,29,5.9,3.2,1.18,2
491
+ 53,Female,0.8,0.2,193,96,57,6.7,3.6,1.16,1
492
+ 27,Male,1,0.3,180,56,111,6.8,3.9,1.85,2
493
+ 35,Female,1,0.3,805,133,103,7.9,3.3,0.7,1
494
+ 65,Male,0.7,0.2,265,30,28,5.2,1.8,0.52,2
495
+ 25,Male,0.7,0.2,185,196,401,6.5,3.9,1.5,1
496
+ 32,Male,0.7,0.2,165,31,29,6.1,3,0.96,2
497
+ 24,Male,1,0.2,189,52,31,8,4.8,1.5,1
498
+ 67,Male,2.2,1.1,198,42,39,7.2,3,0.7,1
499
+ 68,Male,1.8,0.5,151,18,22,6.5,4,1.6,1
500
+ 55,Male,3.6,1.6,349,40,70,7.2,2.9,0.6,1
501
+ 70,Male,2.7,1.2,365,62,55,6,2.4,0.6,1
502
+ 36,Male,2.8,1.5,305,28,76,5.9,2.5,0.7,1
503
+ 42,Male,0.8,0.2,127,29,30,4.9,2.7,1.2,1
504
+ 53,Male,19.8,10.4,238,39,221,8.1,2.5,0.4,1
505
+ 32,Male,30.5,17.1,218,39,79,5.5,2.7,0.9,1
506
+ 32,Male,32.6,14.1,219,95,235,5.8,3.1,1.1,1
507
+ 56,Male,17.7,8.8,239,43,185,5.6,2.4,0.7,1
508
+ 50,Male,0.9,0.3,194,190,73,7.5,3.9,1,1
509
+ 46,Male,18.4,8.5,450,119,230,7.5,3.3,0.7,1
510
+ 46,Male,20,10,254,140,540,5.4,3,1.2,1
511
+ 37,Female,0.8,0.2,205,31,36,9.2,4.6,1,2
512
+ 45,Male,2.2,1.6,320,37,48,6.8,3.4,1,1
513
+ 56,Male,1,0.3,195,22,28,5.8,2.6,0.8,2
514
+ 69,Male,0.9,0.2,215,32,24,6.9,3,0.7,1
515
+ 49,Male,1,0.3,230,48,58,8.4,4.2,1,1
516
+ 49,Male,3.9,2.1,189,65,181,6.9,3,0.7,1
517
+ 60,Male,0.9,0.3,168,16,24,6.7,3,0.8,1
518
+ 28,Male,0.9,0.2,215,50,28,8,4,1,1
519
+ 45,Male,2.9,1.4,210,74,68,7.2,3.6,1,1
520
+ 35,Male,26.3,12.1,108,168,630,9.2,2,0.3,1
521
+ 62,Male,1.8,0.9,224,69,155,8.6,4,0.8,1
522
+ 55,Male,4.4,2.9,230,14,25,7.1,2.1,0.4,1
523
+ 46,Female,0.8,0.2,185,24,15,7.9,3.7,0.8,1
524
+ 50,Male,0.6,0.2,137,15,16,4.8,2.6,1.1,1
525
+ 29,Male,0.8,0.2,156,12,15,6.8,3.7,1.1,2
526
+ 53,Female,0.9,0.2,210,35,32,8,3.9,0.9,2
527
+ 46,Male,9.4,5.2,268,21,63,6.4,2.8,0.8,1
528
+ 40,Male,3.5,1.6,298,68,200,7.1,3.4,0.9,1
529
+ 45,Male,1.7,0.8,315,12,38,6.3,2.1,0.5,1
530
+ 55,Male,3.3,1.5,214,54,152,5.1,1.8,0.5,1
531
+ 22,Female,1.1,0.3,138,14,21,7,3.8,1.1,2
532
+ 40,Male,30.8,18.3,285,110,186,7.9,2.7,0.5,1
533
+ 62,Male,0.7,0.2,162,12,17,8.2,3.2,0.6,2
534
+ 46,Female,1.4,0.4,298,509,623,3.6,1,0.3,1
535
+ 39,Male,1.6,0.8,230,88,74,8,4,1,2
536
+ 60,Male,19.6,9.5,466,46,52,6.1,2,0.4,1
537
+ 46,Male,15.8,7.2,227,67,220,6.9,2.6,0.6,1
538
+ 10,Female,0.8,0.1,395,25,75,7.6,3.6,0.9,1
539
+ 52,Male,1.8,0.8,97,85,78,6.4,2.7,0.7,1
540
+ 65,Female,0.7,0.2,406,24,45,7.2,3.5,0.9,2
541
+ 42,Male,0.8,0.2,114,21,23,7,3,0.7,2
542
+ 42,Male,0.8,0.2,198,29,19,6.6,3,0.8,2
543
+ 62,Male,0.7,0.2,173,46,47,7.3,4.1,1.2,2
544
+ 40,Male,1.2,0.6,204,23,27,7.6,4,1.1,1
545
+ 54,Female,5.5,3.2,350,67,42,7,3.2,0.8,1
546
+ 45,Female,0.7,0.2,153,41,42,4.5,2.2,0.9,2
547
+ 45,Male,20.2,11.7,188,47,32,5.4,2.3,0.7,1
548
+ 50,Female,27.7,10.8,380,39,348,7.1,2.3,0.4,1
549
+ 42,Male,11.1,6.1,214,60,186,6.9,2.8,2.8,1
550
+ 40,Female,2.1,1,768,74,141,7.8,4.9,1.6,1
551
+ 46,Male,3.3,1.5,172,25,41,5.6,2.4,0.7,1
552
+ 29,Male,1.2,0.4,160,20,22,6.2,3,0.9,2
553
+ 45,Male,0.6,0.1,196,29,30,5.8,2.9,1,1
554
+ 46,Male,10.2,4.2,232,58,140,7,2.7,0.6,1
555
+ 73,Male,1.8,0.9,220,20,43,6.5,3,0.8,1
556
+ 55,Male,0.8,0.2,290,139,87,7,3,0.7,1
557
+ 51,Male,0.7,0.1,180,25,27,6.1,3.1,1,1
558
+ 51,Male,2.9,1.2,189,80,125,6.2,3.1,1,1
559
+ 51,Male,4,2.5,275,382,330,7.5,4,1.1,1
560
+ 26,Male,42.8,19.7,390,75,138,7.5,2.6,0.5,1
561
+ 66,Male,15.2,7.7,356,321,562,6.5,2.2,0.4,1
562
+ 66,Male,16.6,7.6,315,233,384,6.9,2,0.4,1
563
+ 66,Male,17.3,8.5,388,173,367,7.8,2.6,0.5,1
564
+ 64,Male,1.4,0.5,298,31,83,7.2,2.6,0.5,1
565
+ 38,Female,0.6,0.1,165,22,34,5.9,2.9,0.9,2
566
+ 43,Male,22.5,11.8,143,22,143,6.6,2.1,0.46,1
567
+ 50,Female,1,0.3,191,22,31,7.8,4,1,2
568
+ 52,Male,2.7,1.4,251,20,40,6,1.7,0.39,1
569
+ 20,Female,16.7,8.4,200,91,101,6.9,3.5,1.02,1
570
+ 16,Male,7.7,4.1,268,213,168,7.1,4,1.2,1
571
+ 16,Male,2.6,1.2,236,131,90,5.4,2.6,0.9,1
572
+ 90,Male,1.1,0.3,215,46,134,6.9,3,0.7,1
573
+ 32,Male,15.6,9.5,134,54,125,5.6,4,2.5,1
574
+ 32,Male,3.7,1.6,612,50,88,6.2,1.9,0.4,1
575
+ 32,Male,12.1,6,515,48,92,6.6,2.4,0.5,1
576
+ 32,Male,25,13.7,560,41,88,7.9,2.5,2.5,1
577
+ 32,Male,15,8.2,289,58,80,5.3,2.2,0.7,1
578
+ 32,Male,12.7,8.4,190,28,47,5.4,2.6,0.9,1
579
+ 60,Male,0.5,0.1,500,20,34,5.9,1.6,0.37,2
580
+ 40,Male,0.6,0.1,98,35,31,6,3.2,1.1,1
581
+ 52,Male,0.8,0.2,245,48,49,6.4,3.2,1,1
582
+ 31,Male,1.3,0.5,184,29,32,6.8,3.4,1,1
583
+ 38,Male,1,0.3,216,21,24,7.3,4.4,1.5,2
data_cache/icu_sepsis.csv ADDED
The diff for this file is too large to render. See raw diff
 
data_cache/nephrology_ckd.csv ADDED
@@ -0,0 +1,363 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ age,bp,sg,al,su,rbc,pc,pcc,ba,bgr,bu,sc,sod,pot,hemo,pcv,wbcc,rbcc,htn,dm,cad,appet,pe,ane,class
2
+ 48,80,1.02,1,0,?,normal,notpresent,notpresent,121,36,1.2,?,?,15.4,44,7800,5.2,yes,yes,no,good,no,no,ckd
3
+ 7,50,1.02,4,0,?,normal,notpresent,notpresent,?,18,0.8,?,?,11.3,38,6000,?,no,no,no,good,no,no,ckd
4
+ 62,80,1.01,2,3,normal,normal,notpresent,notpresent,423,53,1.8,?,?,9.6,31,7500,?,no,yes,no,poor,no,yes,ckd
5
+ 48,70,1.005,4,0,normal,abnormal,present,notpresent,117,56,3.8,111,2.5,11.2,32,6700,3.9,yes,no,no,poor,yes,yes,ckd
6
+ 51,80,1.01,2,0,normal,normal,notpresent,notpresent,106,26,1.4,?,?,11.6,35,7300,4.6,no,no,no,good,no,no,ckd
7
+ 60,90,1.015,3,0,?,?,notpresent,notpresent,74,25,1.1,142,3.2,12.2,39,7800,4.4,yes,yes,no,good,yes,no,ckd
8
+ 68,70,1.01,0,0,?,normal,notpresent,notpresent,100,54,24,104,4,12.4,36,?,?,no,no,no,good,no,no,ckd
9
+ 52,100,1.015,3,0,normal,abnormal,present,notpresent,138,60,1.9,?,?,10.8,33,9600,4,yes,yes,no,good,no,yes,ckd
10
+ 53,90,1.02,2,0,abnormal,abnormal,present,notpresent,70,107,7.2,114,3.7,9.5,29,12100,3.7,yes,yes,no,poor,no,yes,ckd
11
+ 50,60,1.01,2,4,?,abnormal,present,notpresent,490,55,4,?,?,9.4,28,?,?,yes,yes,no,good,no,yes,ckd
12
+ 63,70,1.01,3,0,abnormal,abnormal,present,notpresent,380,60,2.7,131,4.2,10.8,32,4500,3.8,yes,yes,no,poor,yes,no,ckd
13
+ 68,70,1.015,3,1,?,normal,present,notpresent,208,72,2.1,138,5.8,9.7,28,12200,3.4,yes,yes,yes,poor,yes,no,ckd
14
+ 68,70,?,?,?,?,?,notpresent,notpresent,98,86,4.6,135,3.4,9.8,?,?,?,yes,yes,yes,poor,yes,no,ckd
15
+ 68,80,1.01,3,2,normal,abnormal,present,present,157,90,4.1,130,6.4,5.6,16,11000,2.6,yes,yes,yes,poor,yes,no,ckd
16
+ 40,80,1.015,3,0,?,normal,notpresent,notpresent,76,162,9.6,141,4.9,7.6,24,3800,2.8,yes,no,no,good,no,yes,ckd
17
+ 47,70,1.015,2,0,?,normal,notpresent,notpresent,99,46,2.2,138,4.1,12.6,?,?,?,no,no,no,good,no,no,ckd
18
+ 47,80,?,?,?,?,?,notpresent,notpresent,114,87,5.2,139,3.7,12.1,?,?,?,yes,no,no,poor,no,no,ckd
19
+ 60,100,1.025,0,3,?,normal,notpresent,notpresent,263,27,1.3,135,4.3,12.7,37,11400,4.3,yes,yes,yes,good,no,no,ckd
20
+ 62,60,1.015,1,0,?,abnormal,present,notpresent,100,31,1.6,?,?,10.3,30,5300,3.7,yes,no,yes,good,no,no,ckd
21
+ 61,80,1.015,2,0,abnormal,abnormal,notpresent,notpresent,173,148,3.9,135,5.2,7.7,24,9200,3.2,yes,yes,yes,poor,yes,yes,ckd
22
+ 60,90,?,?,?,?,?,notpresent,notpresent,?,180,76,4.5,?,10.9,32,6200,3.6,yes,yes,yes,good,no,no,ckd
23
+ 48,80,1.025,4,0,normal,abnormal,notpresent,notpresent,95,163,7.7,136,3.8,9.8,32,6900,3.4,yes,no,no,good,no,yes,ckd
24
+ 42,100,1.015,4,0,normal,abnormal,notpresent,present,?,50,1.4,129,4,11.1,39,8300,4.6,yes,no,no,poor,no,no,ckd
25
+ 61,60,1.025,0,0,?,normal,notpresent,notpresent,108,75,1.9,141,5.2,9.9,29,8400,3.7,yes,yes,no,good,no,yes,ckd
26
+ 75,80,1.015,0,0,?,normal,notpresent,notpresent,156,45,2.4,140,3.4,11.6,35,10300,4,yes,yes,no,poor,no,no,ckd
27
+ 69,70,1.01,3,4,normal,abnormal,notpresent,notpresent,264,87,2.7,130,4,12.5,37,9600,4.1,yes,yes,yes,good,yes,no,ckd
28
+ 75,70,?,1,3,?,?,notpresent,notpresent,123,31,1.4,?,?,?,?,?,?,no,yes,no,good,no,no,ckd
29
+ 68,70,1.005,1,0,abnormal,abnormal,present,notpresent,?,28,1.4,?,?,12.9,38,?,?,no,no,yes,good,no,no,ckd
30
+ 73,90,1.015,3,0,?,abnormal,present,notpresent,107,33,1.5,141,4.6,10.1,30,7800,4,no,no,no,poor,no,no,ckd
31
+ 61,90,1.01,1,1,?,normal,notpresent,notpresent,159,39,1.5,133,4.9,11.3,34,9600,4,yes,yes,no,poor,no,no,ckd
32
+ 60,100,1.02,2,0,abnormal,abnormal,notpresent,notpresent,140,55,2.5,?,?,10.1,29,?,?,yes,no,no,poor,no,no,ckd
33
+ 70,70,1.01,1,0,normal,?,present,present,171,153,5.2,?,?,?,?,?,?,no,yes,no,poor,no,no,ckd
34
+ 65,90,1.02,2,1,abnormal,normal,notpresent,notpresent,270,39,2,?,?,12,36,9800,4.9,yes,yes,no,poor,no,yes,ckd
35
+ 76,70,1.015,1,0,normal,normal,notpresent,notpresent,92,29,1.8,133,3.9,10.3,32,?,?,yes,no,no,good,no,no,ckd
36
+ 72,80,?,?,?,?,?,notpresent,notpresent,137,65,3.4,141,4.7,9.7,28,6900,2.5,yes,yes,no,poor,no,yes,ckd
37
+ 69,80,1.02,3,0,abnormal,normal,notpresent,notpresent,?,103,4.1,132,5.9,12.5,?,?,?,yes,no,no,good,no,no,ckd
38
+ 82,80,1.01,2,2,normal,?,notpresent,notpresent,140,70,3.4,136,4.2,13,40,9800,4.2,yes,yes,no,good,no,no,ckd
39
+ 46,90,1.01,2,0,normal,abnormal,notpresent,notpresent,99,80,2.1,?,?,11.1,32,9100,4.1,yes,no,no,good,no,no,ckd
40
+ 45,70,1.01,0,0,?,normal,notpresent,notpresent,?,20,0.7,?,?,?,?,?,?,no,no,no,good,yes,no,ckd
41
+ 47,100,1.01,0,0,?,normal,notpresent,notpresent,204,29,1,139,4.2,9.7,33,9200,4.5,yes,no,no,good,no,yes,ckd
42
+ 35,80,1.01,1,0,abnormal,?,notpresent,notpresent,79,202,10.8,134,3.4,7.9,24,7900,3.1,no,yes,no,good,no,no,ckd
43
+ 54,80,1.01,3,0,abnormal,abnormal,notpresent,notpresent,207,77,6.3,134,4.8,9.7,28,?,?,yes,yes,no,poor,yes,no,ckd
44
+ 54,80,1.02,3,0,?,abnormal,notpresent,notpresent,208,89,5.9,130,4.9,9.3,?,?,?,yes,yes,no,poor,yes,no,ckd
45
+ 48,70,1.015,0,0,?,normal,notpresent,notpresent,124,24,1.2,142,4.2,12.4,37,6400,4.7,no,yes,no,good,no,no,ckd
46
+ 11,80,1.01,3,0,?,normal,notpresent,notpresent,?,17,0.8,?,?,15,45,8600,?,no,no,no,good,no,no,ckd
47
+ 73,70,1.005,0,0,normal,normal,notpresent,notpresent,70,32,0.9,125,4,10,29,18900,3.5,yes,yes,no,good,yes,no,ckd
48
+ 60,70,1.01,2,0,normal,abnormal,present,notpresent,144,72,3,?,?,9.7,29,21600,3.5,yes,yes,no,poor,no,yes,ckd
49
+ 53,60,?,?,?,?,?,notpresent,notpresent,91,114,3.25,142,4.3,8.6,28,11000,3.8,yes,yes,no,poor,yes,yes,ckd
50
+ 54,100,1.015,3,0,?,normal,present,notpresent,162,66,1.6,136,4.4,10.3,33,?,?,yes,yes,no,poor,yes,no,ckd
51
+ 53,90,1.015,0,0,?,normal,notpresent,notpresent,?,38,2.2,?,?,10.9,34,4300,3.7,no,no,no,poor,no,yes,ckd
52
+ 62,80,1.015,0,5,?,?,notpresent,notpresent,246,24,1,?,?,13.6,40,8500,4.7,yes,yes,no,good,no,no,ckd
53
+ 63,80,1.01,2,2,normal,?,notpresent,notpresent,?,?,3.4,136,4.2,13,40,9800,4.2,yes,no,yes,good,no,no,ckd
54
+ 76,70,1.015,3,4,normal,abnormal,present,notpresent,?,164,9.7,131,4.4,10.2,30,11300,3.4,yes,yes,yes,poor,yes,no,ckd
55
+ 76,90,?,?,?,?,normal,notpresent,notpresent,93,155,7.3,132,4.9,?,?,?,?,yes,yes,yes,poor,no,no,ckd
56
+ 73,80,1.02,2,0,abnormal,abnormal,notpresent,notpresent,253,142,4.6,138,5.8,10.5,33,7200,4.3,yes,yes,yes,good,no,no,ckd
57
+ 59,100,?,?,?,?,?,notpresent,notpresent,?,96,6.4,?,?,6.6,?,?,?,yes,yes,no,good,no,yes,ckd
58
+ 67,90,1.02,1,0,?,abnormal,present,notpresent,141,66,3.2,138,6.6,?,?,?,?,yes,no,no,good,no,no,ckd
59
+ 67,80,1.01,1,3,normal,abnormal,notpresent,notpresent,182,391,32,163,39,?,?,?,?,no,no,no,good,yes,no,ckd
60
+ 15,60,1.02,3,0,?,normal,notpresent,notpresent,86,15,0.6,138,4,11,33,7700,3.8,yes,yes,no,good,no,no,ckd
61
+ 46,70,1.015,1,0,abnormal,normal,notpresent,notpresent,150,111,6.1,131,3.7,7.5,27,?,?,no,no,no,good,no,yes,ckd
62
+ 44,90,1.01,1,0,?,normal,notpresent,notpresent,?,20,1.1,?,?,15,48,?,?,no,no,no,good,no,no,ckd
63
+ 67,70,1.02,2,0,abnormal,normal,notpresent,notpresent,150,55,1.6,131,4.8,?,?,?,?,yes,yes,no,good,yes,no,ckd
64
+ 65,70,1.01,2,0,?,normal,present,notpresent,112,73,3.3,?,?,10.9,37,?,?,no,no,no,good,no,no,ckd
65
+ 26,70,1.015,0,4,?,normal,notpresent,notpresent,250,20,1.1,?,?,15.6,52,6900,6,no,yes,no,good,no,no,ckd
66
+ 61,80,1.015,0,4,?,normal,notpresent,notpresent,360,19,0.7,137,4.4,15.2,44,8300,5.2,yes,yes,no,good,no,no,ckd
67
+ 46,60,1.01,1,0,normal,normal,notpresent,notpresent,163,92,3.3,141,4,9.8,28,14600,3.2,yes,yes,no,good,no,no,ckd
68
+ 64,90,1.01,3,3,?,abnormal,present,notpresent,?,35,1.3,?,?,10.3,?,?,?,yes,yes,no,good,yes,no,ckd
69
+ 56,90,1.015,2,0,abnormal,abnormal,notpresent,notpresent,129,107,6.7,131,4.8,9.1,29,6400,3.4,yes,no,no,good,no,no,ckd
70
+ 48,80,1.005,4,0,abnormal,abnormal,notpresent,present,133,139,8.5,132,5.5,10.3,36,6200,4,no,yes,no,good,yes,no,ckd
71
+ 67,70,1.01,1,0,?,normal,notpresent,notpresent,102,48,3.2,137,5,11.9,34,7100,3.7,yes,yes,no,good,yes,no,ckd
72
+ 70,80,?,?,?,?,?,notpresent,notpresent,158,85,3.2,141,3.5,10.1,30,?,?,yes,no,no,good,yes,no,ckd
73
+ 56,80,1.01,1,0,?,normal,notpresent,notpresent,165,55,1.8,?,?,13.5,40,11800,5,yes,yes,no,poor,yes,no,ckd
74
+ 74,80,1.01,0,0,?,normal,notpresent,notpresent,132,98,2.8,133,5,10.8,31,9400,3.8,yes,yes,no,good,no,no,ckd
75
+ 45,90,?,?,?,?,?,notpresent,notpresent,360,45,2.4,128,4.4,8.3,29,5500,3.7,yes,yes,no,good,no,no,ckd
76
+ 38,70,?,?,?,?,?,notpresent,notpresent,104,77,1.9,140,3.9,?,?,?,?,yes,no,no,poor,yes,no,ckd
77
+ 48,70,1.015,1,0,normal,normal,notpresent,notpresent,127,19,1,134,3.6,?,?,?,?,yes,yes,no,good,no,no,ckd
78
+ 59,70,1.01,3,0,normal,abnormal,notpresent,notpresent,76,186,15,135,7.6,7.1,22,3800,2.1,yes,no,no,poor,yes,yes,ckd
79
+ 70,70,1.015,2,?,?,?,notpresent,notpresent,?,46,1.5,?,?,9.9,?,?,?,no,yes,no,poor,yes,no,ckd
80
+ 56,80,?,?,?,?,?,notpresent,notpresent,415,37,1.9,?,?,?,?,?,?,no,yes,no,good,no,no,ckd
81
+ 70,100,1.005,1,0,normal,abnormal,present,notpresent,169,47,2.9,?,?,11.1,32,5800,5,yes,yes,no,poor,no,no,ckd
82
+ 58,110,1.01,4,0,?,normal,notpresent,notpresent,251,52,2.2,?,?,?,?,13200,4.7,yes,yes,no,good,no,no,ckd
83
+ 50,70,1.02,0,0,?,normal,notpresent,notpresent,109,32,1.4,139,4.7,?,?,?,?,no,no,no,poor,no,no,ckd
84
+ 63,100,1.01,2,2,normal,normal,notpresent,present,280,35,3.2,143,3.5,13,40,9800,4.2,yes,no,yes,good,no,no,ckd
85
+ 56,70,1.015,4,1,abnormal,normal,notpresent,notpresent,210,26,1.7,136,3.8,16.1,52,12500,5.6,no,no,no,good,no,no,ckd
86
+ 71,70,1.01,3,0,normal,abnormal,present,present,219,82,3.6,133,4.4,10.4,33,5600,3.6,yes,yes,yes,good,no,no,ckd
87
+ 73,100,1.01,3,2,abnormal,abnormal,present,notpresent,295,90,5.6,140,2.9,9.2,30,7000,3.2,yes,yes,yes,poor,no,no,ckd
88
+ 65,70,1.01,0,0,?,normal,notpresent,notpresent,93,66,1.6,137,4.5,11.6,36,11900,3.9,no,yes,no,good,no,no,ckd
89
+ 62,90,1.015,1,0,?,normal,notpresent,notpresent,94,25,1.1,131,3.7,?,?,?,?,yes,no,no,good,yes,yes,ckd
90
+ 60,80,1.01,1,1,?,normal,notpresent,notpresent,172,32,2.7,?,?,11.2,36,?,?,no,yes,yes,poor,no,no,ckd
91
+ 65,60,1.015,1,0,?,normal,notpresent,notpresent,91,51,2.2,132,3.8,10,32,9100,4,yes,yes,no,poor,yes,no,ckd
92
+ 50,140,?,?,?,?,?,notpresent,notpresent,101,106,6.5,135,4.3,6.2,18,5800,2.3,yes,yes,no,poor,no,yes,ckd
93
+ 56,180,?,0,4,?,abnormal,notpresent,notpresent,298,24,1.2,139,3.9,11.2,32,10400,4.2,yes,yes,no,poor,yes,no,ckd
94
+ 34,70,1.015,4,0,abnormal,abnormal,notpresent,notpresent,153,22,0.9,133,3.8,?,?,?,?,no,no,no,good,yes,no,ckd
95
+ 71,90,1.015,2,0,?,abnormal,present,present,88,80,4.4,139,5.7,11.3,33,10700,3.9,no,no,no,good,no,no,ckd
96
+ 17,60,1.01,0,0,?,normal,notpresent,notpresent,92,32,2.1,141,4.2,13.9,52,7000,?,no,no,no,good,no,no,ckd
97
+ 76,70,1.015,2,0,normal,abnormal,present,notpresent,226,217,10.2,?,?,10.2,36,12700,4.2,yes,no,no,poor,yes,yes,ckd
98
+ 55,90,?,?,?,?,?,notpresent,notpresent,143,88,2,?,?,?,?,?,?,yes,yes,no,poor,yes,no,ckd
99
+ 65,80,1.015,0,0,?,normal,notpresent,notpresent,115,32,11.5,139,4,14.1,42,6800,5.2,no,no,no,good,no,no,ckd
100
+ 50,90,?,?,?,?,?,notpresent,notpresent,89,118,6.1,127,4.4,6,17,6500,?,yes,yes,no,good,yes,yes,ckd
101
+ 55,100,1.015,1,4,normal,?,notpresent,notpresent,297,53,2.8,139,4.5,11.2,34,13600,4.4,yes,yes,no,good,no,no,ckd
102
+ 45,80,1.015,0,0,?,abnormal,notpresent,notpresent,107,15,1,141,4.2,11.8,37,10200,4.2,no,no,no,good,no,no,ckd
103
+ 54,70,?,?,?,?,?,notpresent,notpresent,233,50.1,1.9,?,?,11.7,?,?,?,no,yes,no,good,no,no,ckd
104
+ 63,90,1.015,0,0,?,normal,notpresent,notpresent,123,19,2,142,3.8,11.7,34,11400,4.7,no,no,no,good,no,no,ckd
105
+ 65,80,1.01,3,3,?,normal,notpresent,notpresent,294,71,4.4,128,5.4,10,32,9000,3.9,yes,yes,yes,good,no,no,ckd
106
+ 12,60,1.015,3,0,abnormal,abnormal,present,notpresent,?,51,1.8,?,?,12.1,?,10300,?,no,no,no,good,no,no,ckd
107
+ 47,80,1.01,0,0,?,abnormal,notpresent,notpresent,?,28,0.9,?,?,12.4,44,5600,4.3,no,no,no,good,no,yes,ckd
108
+ 55,70,1.01,3,0,?,normal,notpresent,notpresent,99,25,1.2,?,?,11.4,?,?,?,no,no,no,poor,yes,no,ckd
109
+ 60,70,1.01,0,0,?,normal,notpresent,notpresent,140,27,1.2,?,?,?,?,?,?,no,no,no,good,no,no,ckd
110
+ 72,90,1.025,1,3,?,normal,notpresent,notpresent,323,40,2.2,137,5.3,12.6,?,?,?,no,yes,yes,poor,no,no,ckd
111
+ 54,60,?,3,?,?,?,notpresent,notpresent,125,21,1.3,137,3.4,15,46,?,?,yes,yes,no,good,yes,no,ckd
112
+ 34,70,?,?,?,?,?,notpresent,notpresent,?,219,12.2,130,3.8,6,?,?,?,yes,no,no,good,no,yes,ckd
113
+ 43,80,1.015,2,3,?,abnormal,present,present,?,30,1.1,?,?,14,42,14900,?,no,no,no,good,no,no,ckd
114
+ 65,100,1.015,0,0,?,normal,notpresent,notpresent,90,98,2.5,?,?,9.1,28,5500,3.6,yes,no,no,good,no,no,ckd
115
+ 72,90,?,?,?,?,?,notpresent,notpresent,308,36,2.5,131,4.3,?,?,?,?,yes,yes,no,poor,no,no,ckd
116
+ 70,90,1.015,0,0,?,normal,notpresent,notpresent,144,125,4,136,4.6,12,37,8200,4.5,yes,yes,no,poor,yes,no,ckd
117
+ 71,60,1.015,4,0,normal,normal,notpresent,notpresent,118,125,5.3,136,4.9,11.4,35,15200,4.3,yes,yes,no,poor,yes,no,ckd
118
+ 52,90,1.015,4,3,normal,abnormal,notpresent,notpresent,224,166,5.6,133,47,8.1,23,5000,2.9,yes,yes,no,good,no,yes,ckd
119
+ 75,70,1.025,1,0,?,normal,notpresent,notpresent,158,49,1.4,135,4.7,11.1,?,?,?,yes,no,no,poor,yes,no,ckd
120
+ 50,90,1.01,2,0,normal,abnormal,present,present,128,208,9.2,134,4.8,8.2,22,16300,2.7,no,no,no,poor,yes,yes,ckd
121
+ 5,50,1.01,0,0,?,normal,notpresent,notpresent,?,25,0.6,?,?,11.8,36,12400,?,no,no,no,good,no,no,ckd
122
+ 70,100,1.015,4,0,normal,normal,notpresent,notpresent,118,125,5.3,136,4.9,12,37,8400,8,yes,no,no,good,no,no,ckd
123
+ 47,100,1.01,?,?,normal,?,notpresent,notpresent,122,?,16.9,138,5.2,10.8,33,10200,3.8,no,yes,no,good,no,no,ckd
124
+ 48,80,1.015,0,2,?,normal,notpresent,notpresent,214,24,1.3,140,4,13.2,39,?,?,no,yes,no,poor,no,no,ckd
125
+ 46,90,1.02,?,?,?,normal,notpresent,notpresent,213,68,2.8,146,6.3,9.3,?,?,?,yes,yes,no,good,no,no,ckd
126
+ 45,60,1.01,2,0,normal,abnormal,present,notpresent,268,86,4,134,5.1,10,29,9200,?,yes,yes,no,good,no,no,ckd
127
+ 41,70,1.015,2,0,?,abnormal,notpresent,present,?,68,2.8,132,4.1,11.1,33,?,?,yes,no,no,good,yes,yes,ckd
128
+ 69,70,1.01,0,4,?,normal,notpresent,notpresent,256,40,1.2,142,5.6,?,?,?,?,no,no,no,good,no,no,ckd
129
+ 67,70,1.01,1,0,normal,normal,notpresent,notpresent,?,106,6,137,4.9,6.1,19,6500,?,yes,no,no,good,no,yes,ckd
130
+ 72,90,?,?,?,?,?,notpresent,notpresent,84,145,7.1,135,5.3,?,?,?,?,no,yes,no,good,no,no,ckd
131
+ 41,80,1.015,1,4,abnormal,normal,notpresent,notpresent,210,165,18,135,4.7,?,?,?,?,no,yes,no,good,no,no,ckd
132
+ 60,90,1.01,2,0,abnormal,normal,notpresent,notpresent,105,53,2.3,136,5.2,11.1,33,10500,4.1,no,no,no,good,no,no,ckd
133
+ 57,90,1.015,5,0,abnormal,abnormal,notpresent,present,?,322,13,126,4.8,8,24,4200,3.3,yes,yes,yes,poor,yes,yes,ckd
134
+ 53,100,1.01,1,3,abnormal,normal,notpresent,notpresent,213,23,1,139,4,?,?,?,?,no,yes,no,good,no,no,ckd
135
+ 60,60,1.01,3,1,normal,abnormal,present,notpresent,288,36,1.7,130,3,7.9,25,15200,3,yes,no,no,poor,no,yes,ckd
136
+ 69,60,?,?,?,?,?,notpresent,notpresent,171,26,48.1,?,?,?,?,?,?,yes,no,no,poor,no,no,ckd
137
+ 65,70,1.02,1,0,abnormal,abnormal,notpresent,notpresent,139,29,1,?,?,10.5,32,?,?,yes,no,no,good,yes,no,ckd
138
+ 8,60,1.025,3,0,normal,normal,notpresent,notpresent,78,27,0.9,?,?,12.3,41,6700,?,no,no,no,poor,yes,no,ckd
139
+ 76,90,?,?,?,?,?,notpresent,notpresent,172,46,1.7,141,5.5,9.6,30,?,?,yes,yes,no,good,no,yes,ckd
140
+ 39,70,1.01,0,0,?,normal,notpresent,notpresent,121,20,0.8,133,3.5,10.9,32,?,?,no,yes,no,good,no,no,ckd
141
+ 55,90,1.01,2,1,abnormal,abnormal,notpresent,notpresent,273,235,14.2,132,3.4,8.3,22,14600,2.9,yes,yes,no,poor,yes,yes,ckd
142
+ 56,90,1.005,4,3,abnormal,abnormal,notpresent,notpresent,242,132,16.4,140,4.2,8.4,26,?,3,yes,yes,no,poor,yes,yes,ckd
143
+ 50,70,1.02,3,0,abnormal,normal,present,present,123,40,1.8,?,?,11.1,36,4700,?,no,no,no,good,no,no,ckd
144
+ 66,90,1.015,2,0,?,normal,notpresent,present,153,76,3.3,?,?,?,?,?,?,no,no,no,poor,no,no,ckd
145
+ 62,70,1.025,3,0,normal,abnormal,notpresent,notpresent,122,42,1.7,136,4.7,12.6,39,7900,3.9,yes,yes,no,good,no,no,ckd
146
+ 71,60,1.02,3,2,normal,normal,present,notpresent,424,48,1.5,132,4,10.9,31,?,?,yes,yes,yes,good,no,no,ckd
147
+ 59,80,1.01,1,0,abnormal,normal,notpresent,notpresent,303,35,1.3,122,3.5,10.4,35,10900,4.3,no,yes,no,poor,no,no,ckd
148
+ 81,60,?,?,?,?,?,notpresent,notpresent,148,39,2.1,147,4.2,10.9,35,9400,2.4,yes,yes,yes,poor,yes,no,ckd
149
+ 59,70,?,?,?,?,?,notpresent,notpresent,204,34,1.5,124,4.1,9.8,37,6000,?,no,yes,no,good,no,no,ckd
150
+ 46,80,1.01,0,0,?,normal,notpresent,notpresent,160,40,2,140,4.1,9,27,8100,3.2,yes,no,no,poor,no,yes,ckd
151
+ 27,60,?,?,?,?,?,notpresent,notpresent,76,44,3.9,127,4.3,?,?,?,?,no,no,no,poor,yes,yes,ckd
152
+ 34,70,1.02,0,0,abnormal,normal,notpresent,notpresent,139,19,0.9,?,?,12.7,42,2200,?,no,no,no,poor,no,no,ckd
153
+ 65,70,1.015,4,4,?,normal,present,notpresent,307,28,1.5,?,?,11,39,6700,?,yes,yes,no,good,no,no,ckd
154
+ 66,70,1.015,2,5,?,normal,notpresent,notpresent,447,41,1.7,131,3.9,12.5,33,9600,4.4,yes,yes,no,good,no,no,ckd
155
+ 83,70,1.02,3,0,normal,normal,notpresent,notpresent,102,60,2.6,115,5.7,8.7,26,12800,3.1,yes,no,no,poor,no,yes,ckd
156
+ 62,80,1.01,1,2,?,?,notpresent,notpresent,309,113,2.9,130,2.5,10.6,34,12800,4.9,no,no,no,good,no,no,ckd
157
+ 17,70,1.015,1,0,abnormal,normal,notpresent,notpresent,22,1.5,7.3,145,2.8,13.1,41,11200,?,no,no,no,good,no,no,ckd
158
+ 54,70,?,?,?,?,?,notpresent,notpresent,111,146,7.5,141,4.7,11,35,8600,4.6,no,no,no,good,no,no,ckd
159
+ 60,50,1.01,0,0,?,normal,notpresent,notpresent,261,58,2.2,113,3,?,?,4200,3.4,yes,no,no,good,no,no,ckd
160
+ 21,90,1.01,4,0,normal,abnormal,present,present,107,40,1.7,125,3.5,8.3,23,12400,3.9,no,no,no,good,no,yes,ckd
161
+ 65,80,1.015,2,1,normal,normal,present,notpresent,215,133,2.5,?,?,13.2,41,?,?,no,yes,no,good,no,no,ckd
162
+ 42,90,1.02,2,0,abnormal,abnormal,present,notpresent,93,153,2.7,139,4.3,9.8,34,9800,?,no,no,no,poor,yes,yes,ckd
163
+ 72,90,1.01,2,0,?,abnormal,present,notpresent,124,53,2.3,?,?,11.9,39,?,?,no,no,no,good,no,no,ckd
164
+ 73,90,1.01,1,4,abnormal,abnormal,present,notpresent,234,56,1.9,?,?,10.3,28,?,?,no,yes,no,good,no,no,ckd
165
+ 45,70,1.025,2,0,normal,abnormal,present,notpresent,117,52,2.2,136,3.8,10,30,19100,3.7,no,no,no,good,no,no,ckd
166
+ 61,80,1.02,0,0,?,normal,notpresent,notpresent,131,23,0.8,140,4.1,11.3,35,?,?,no,no,no,good,no,no,ckd
167
+ 30,70,1.015,0,0,?,normal,notpresent,notpresent,101,106,6.5,135,4.3,?,?,?,?,no,no,no,poor,no,no,ckd
168
+ 54,60,1.015,3,2,?,abnormal,notpresent,notpresent,352,137,3.3,133,4.5,11.3,31,5800,3.6,yes,yes,yes,poor,yes,no,ckd
169
+ 8,50,1.02,4,0,normal,normal,notpresent,notpresent,?,46,1,135,3.8,?,?,?,?,no,no,no,good,yes,no,ckd
170
+ 64,60,1.01,4,1,abnormal,abnormal,notpresent,present,239,58,4.3,137,5.4,9.5,29,7500,3.4,yes,yes,no,poor,yes,no,ckd
171
+ 6,60,1.01,4,0,abnormal,abnormal,notpresent,present,94,67,1,135,4.9,9.9,30,16700,4.8,no,no,no,poor,no,no,ckd
172
+ 46,110,1.015,0,0,?,normal,notpresent,notpresent,130,16,0.9,?,?,?,?,?,?,no,no,no,good,no,no,ckd
173
+ 32,90,1.025,1,0,abnormal,abnormal,notpresent,notpresent,?,223,18.1,113,6.5,5.5,15,2600,2.8,yes,yes,no,poor,yes,yes,ckd
174
+ 80,70,1.01,2,?,?,abnormal,notpresent,notpresent,?,49,1.2,?,?,?,?,?,?,yes,yes,no,good,no,no,ckd
175
+ 70,90,1.02,2,1,abnormal,abnormal,notpresent,present,184,98.6,3.3,138,3.9,5.8,?,?,?,yes,yes,yes,poor,no,no,ckd
176
+ 49,100,1.01,3,0,abnormal,abnormal,notpresent,notpresent,129,158,11.8,122,3.2,8.1,24,9600,3.5,yes,yes,no,poor,yes,yes,ckd
177
+ 57,80,?,?,?,?,?,notpresent,notpresent,?,111,9.3,124,5.3,6.8,?,4300,3,yes,yes,no,good,no,yes,ckd
178
+ 59,100,1.02,4,2,normal,normal,notpresent,notpresent,252,40,3.2,137,4.7,11.2,30,26400,3.9,yes,yes,no,poor,yes,no,ckd
179
+ 65,80,1.015,0,0,?,normal,notpresent,notpresent,92,37,1.5,140,5.2,8.8,25,10700,3.2,yes,no,yes,good,yes,no,ckd
180
+ 90,90,1.025,1,0,?,normal,notpresent,notpresent,139,89,3,140,4.1,12,37,7900,3.9,yes,yes,no,good,no,no,ckd
181
+ 64,70,?,?,?,?,?,notpresent,notpresent,113,94,7.3,137,4.3,7.9,21,?,?,yes,yes,yes,good,yes,yes,ckd
182
+ 78,60,?,?,?,?,?,notpresent,notpresent,114,74,2.9,135,5.9,8,24,?,?,no,yes,no,good,no,yes,ckd
183
+ 65,90,1.01,4,2,normal,normal,notpresent,notpresent,172,82,13.5,145,6.3,8.8,31,?,?,yes,yes,no,good,yes,yes,ckd
184
+ 61,70,?,?,?,?,?,notpresent,notpresent,100,28,2.1,?,?,12.6,43,?,?,yes,yes,no,good,no,no,ckd
185
+ 60,70,1.01,1,0,?,normal,notpresent,notpresent,109,96,3.9,135,4,13.8,41,?,?,yes,no,no,good,no,no,ckd
186
+ 50,70,1.01,0,0,?,normal,notpresent,notpresent,230,50,2.2,?,?,12,41,10400,4.6,yes,yes,no,good,no,no,ckd
187
+ 67,80,?,?,?,?,?,notpresent,notpresent,341,37,1.5,?,?,12.3,41,6900,4.9,yes,yes,no,good,no,yes,ckd
188
+ 59,100,1.015,4,2,normal,normal,notpresent,notpresent,255,132,12.8,135,5.7,7.3,20,9800,3.9,yes,yes,yes,good,no,yes,ckd
189
+ 54,120,1.015,0,0,?,normal,notpresent,notpresent,103,18,1.2,?,?,?,?,?,?,no,no,no,good,no,no,ckd
190
+ 40,70,1.015,3,4,normal,normal,notpresent,notpresent,253,150,11.9,132,5.6,10.9,31,8800,3.4,yes,yes,no,poor,yes,no,ckd
191
+ 55,80,1.01,3,1,normal,abnormal,present,present,214,73,3.9,137,4.9,10.9,34,7400,3.7,yes,yes,no,good,yes,no,ckd
192
+ 68,80,1.015,0,0,?,abnormal,notpresent,notpresent,171,30,1,?,?,13.7,43,4900,5.2,no,yes,no,good,no,no,ckd
193
+ 63,100,1.01,1,0,?,normal,notpresent,notpresent,78,61,1.8,141,4.4,12.2,36,10500,4.3,no,yes,no,good,no,no,ckd
194
+ 33,90,1.015,0,0,?,normal,notpresent,notpresent,92,19,0.8,?,?,11.8,34,7000,?,no,no,no,good,no,no,ckd
195
+ 68,90,1.01,0,0,?,normal,notpresent,notpresent,238,57,2.5,?,?,9.8,28,8000,3.3,yes,yes,no,poor,no,no,ckd
196
+ 66,70,1.02,1,0,normal,?,notpresent,notpresent,248,30,1.7,138,5.3,?,?,?,?,yes,yes,no,good,no,no,ckd
197
+ 74,60,?,?,?,?,?,notpresent,notpresent,108,68,1.8,?,?,?,?,?,?,yes,yes,no,good,no,no,ckd
198
+ 71,90,1.01,0,3,?,normal,notpresent,notpresent,303,30,1.3,136,4.1,13,38,9200,4.6,yes,yes,no,good,no,no,ckd
199
+ 34,60,1.02,0,0,?,normal,notpresent,notpresent,117,28,2.2,138,3.8,?,?,?,?,no,no,no,good,yes,no,ckd
200
+ 60,90,1.01,3,5,abnormal,normal,notpresent,present,490,95,2.7,131,3.8,11.5,35,12000,4.5,yes,yes,no,good,no,no,ckd
201
+ 64,100,1.015,4,2,abnormal,abnormal,notpresent,present,163,54,7.2,140,4.6,7.9,26,7500,3.4,yes,yes,no,good,yes,no,ckd
202
+ 57,80,1.015,0,0,?,normal,notpresent,notpresent,120,48,1.6,?,?,11.3,36,7200,3.8,yes,yes,no,good,no,no,ckd
203
+ 60,70,?,?,?,?,?,notpresent,notpresent,124,52,2.5,?,?,?,?,?,?,yes,no,no,good,no,no,ckd
204
+ 59,50,1.01,3,0,normal,abnormal,notpresent,notpresent,241,191,12,114,2.9,9.6,31,15700,3.8,no,yes,no,good,yes,no,ckd
205
+ 65,60,1.01,2,0,normal,abnormal,present,notpresent,192,17,1.7,130,4.3,?,?,9500,?,yes,yes,no,poor,no,no,ckd
206
+ 60,90,?,?,?,?,?,notpresent,notpresent,269,51,2.8,138,3.7,11.5,35,?,?,yes,yes,yes,good,yes,no,ckd
207
+ 51,100,1.015,2,0,normal,normal,notpresent,present,93,20,1.6,146,4.5,?,?,?,?,no,no,no,poor,no,no,ckd
208
+ 37,100,1.01,0,0,abnormal,normal,notpresent,notpresent,?,19,1.3,?,?,15,44,4100,5.2,yes,no,no,good,no,no,ckd
209
+ 45,70,1.01,2,0,?,normal,notpresent,notpresent,113,93,2.3,?,?,7.9,26,5700,?,no,no,yes,good,no,yes,ckd
210
+ 65,80,?,?,?,?,?,notpresent,notpresent,74,66,2,136,5.4,9.1,25,?,?,yes,yes,yes,good,yes,no,ckd
211
+ 80,70,1.015,2,2,?,normal,notpresent,notpresent,141,53,2.2,?,?,12.7,40,9600,?,yes,yes,no,poor,yes,no,ckd
212
+ 72,100,?,?,?,?,?,notpresent,notpresent,201,241,13.4,127,4.8,9.4,28,?,?,yes,yes,no,good,no,yes,ckd
213
+ 34,90,1.015,2,0,normal,normal,notpresent,notpresent,104,50,1.6,137,4.1,11.9,39,?,?,no,no,no,good,no,no,ckd
214
+ 65,70,1.015,1,0,?,normal,notpresent,notpresent,203,46,1.4,?,?,11.4,36,5000,4.1,yes,yes,no,poor,yes,no,ckd
215
+ 57,70,1.015,1,0,?,abnormal,notpresent,notpresent,165,45,1.5,140,3.3,10.4,31,4200,3.9,no,no,no,good,no,no,ckd
216
+ 69,70,1.01,4,3,normal,abnormal,present,present,214,96,6.3,120,3.9,9.4,28,11500,3.3,yes,yes,yes,good,yes,yes,ckd
217
+ 62,90,1.02,2,1,?,normal,notpresent,notpresent,169,48,2.4,138,2.9,13.4,47,11000,6.1,yes,no,no,good,no,no,ckd
218
+ 64,90,1.015,3,2,?,abnormal,present,notpresent,463,64,2.8,135,4.1,12.2,40,9800,4.6,yes,yes,no,good,no,yes,ckd
219
+ 48,100,?,?,?,?,?,notpresent,notpresent,103,79,5.3,135,6.3,6.3,19,7200,2.6,yes,no,yes,poor,no,no,ckd
220
+ 48,110,1.015,3,0,abnormal,normal,present,notpresent,106,215,15.2,120,5.7,8.6,26,5000,2.5,yes,no,yes,good,no,yes,ckd
221
+ 54,90,1.025,1,0,normal,abnormal,notpresent,notpresent,150,18,1.2,140,4.2,?,?,?,?,no,no,no,poor,yes,yes,ckd
222
+ 59,70,1.01,1,3,abnormal,abnormal,notpresent,notpresent,424,55,1.7,138,4.5,12.6,37,10200,4.1,yes,yes,yes,good,no,no,ckd
223
+ 56,90,1.01,4,1,normal,abnormal,present,notpresent,176,309,13.3,124,6.5,3.1,9,5400,2.1,yes,yes,no,poor,yes,yes,ckd
224
+ 40,80,1.025,0,0,normal,normal,notpresent,notpresent,140,10,1.2,135,5,15,48,10400,4.5,no,no,no,good,no,no,notckd
225
+ 23,80,1.025,0,0,normal,normal,notpresent,notpresent,70,36,1,150,4.6,17,52,9800,5,no,no,no,good,no,no,notckd
226
+ 45,80,1.025,0,0,normal,normal,notpresent,notpresent,82,49,0.6,147,4.4,15.9,46,9100,4.7,no,no,no,good,no,no,notckd
227
+ 57,80,1.025,0,0,normal,normal,notpresent,notpresent,119,17,1.2,135,4.7,15.4,42,6200,6.2,no,no,no,good,no,no,notckd
228
+ 51,60,1.025,0,0,normal,normal,notpresent,notpresent,99,38,0.8,135,3.7,13,49,8300,5.2,no,no,no,good,no,no,notckd
229
+ 34,80,1.025,0,0,normal,normal,notpresent,notpresent,121,27,1.2,144,3.9,13.6,52,9200,6.3,no,no,no,good,no,no,notckd
230
+ 60,80,1.025,0,0,normal,normal,notpresent,notpresent,131,10,0.5,146,5,14.5,41,10700,5.1,no,no,no,good,no,no,notckd
231
+ 38,60,1.02,0,0,normal,normal,notpresent,notpresent,91,36,0.7,135,3.7,14,46,9100,5.8,no,no,no,good,no,no,notckd
232
+ 42,80,1.02,0,0,normal,normal,notpresent,notpresent,98,20,0.5,140,3.5,13.9,44,8400,5.5,no,no,no,good,no,no,notckd
233
+ 35,80,1.02,0,0,normal,normal,notpresent,notpresent,104,31,1.2,135,5,16.1,45,4300,5.2,no,no,no,good,no,no,notckd
234
+ 30,80,1.02,0,0,normal,normal,notpresent,notpresent,131,38,1,147,3.8,14.1,45,9400,5.3,no,no,no,good,no,no,notckd
235
+ 49,80,1.02,0,0,normal,normal,notpresent,notpresent,122,32,1.2,139,3.9,17,41,5600,4.9,no,no,no,good,no,no,notckd
236
+ 55,80,1.02,0,0,normal,normal,notpresent,notpresent,118,18,0.9,135,3.6,15.5,43,7200,5.4,no,no,no,good,no,no,notckd
237
+ 45,80,1.02,0,0,normal,normal,notpresent,notpresent,117,46,1.2,137,5,16.2,45,8600,5.2,no,no,no,good,no,no,notckd
238
+ 42,80,1.02,0,0,normal,normal,notpresent,notpresent,132,24,0.7,140,4.1,14.4,50,5000,4.5,no,no,no,good,no,no,notckd
239
+ 50,80,1.02,0,0,normal,normal,notpresent,notpresent,97,40,0.6,150,4.5,14.2,48,10500,5,no,no,no,good,no,no,notckd
240
+ 55,80,1.02,0,0,normal,normal,notpresent,notpresent,133,17,1.2,135,4.8,13.2,41,6800,5.3,no,no,no,good,no,no,notckd
241
+ 48,80,1.025,0,0,normal,normal,notpresent,notpresent,122,33,0.9,146,3.9,13.9,48,9500,4.8,no,no,no,good,no,no,notckd
242
+ 25,80,1.025,0,0,normal,normal,notpresent,notpresent,121,19,1.2,142,4.9,15,48,6900,5.3,no,no,no,good,no,no,notckd
243
+ 23,80,1.025,0,0,normal,normal,notpresent,notpresent,111,34,1.1,145,4,14.3,41,7200,5,no,no,no,good,no,no,notckd
244
+ 30,80,1.025,0,0,normal,normal,notpresent,notpresent,96,25,0.5,144,4.8,13.8,42,9000,4.5,no,no,no,good,no,no,notckd
245
+ 56,80,1.025,0,0,normal,normal,notpresent,notpresent,139,15,1.2,135,5,14.8,42,5600,5.5,no,no,no,good,no,no,notckd
246
+ 47,80,1.02,0,0,normal,normal,notpresent,notpresent,95,35,0.9,140,4.1,?,?,?,?,no,no,no,good,no,no,notckd
247
+ 19,80,1.02,0,0,normal,normal,notpresent,notpresent,107,23,0.7,141,4.2,14.4,44,?,?,no,no,no,good,no,no,notckd
248
+ 52,80,1.02,0,0,normal,normal,notpresent,notpresent,125,22,1.2,139,4.6,16.5,43,4700,4.6,no,no,no,good,no,no,notckd
249
+ 46,60,1.025,0,0,normal,normal,notpresent,notpresent,123,46,1,135,5,15.7,50,6300,4.8,no,no,no,good,no,no,notckd
250
+ 48,60,1.02,0,0,normal,normal,notpresent,notpresent,112,44,1.2,142,4.9,14.5,44,9400,6.4,no,no,no,good,no,no,notckd
251
+ 24,70,1.025,0,0,normal,normal,notpresent,notpresent,140,23,0.6,140,4.7,16.3,48,5800,5.6,no,no,no,good,no,no,notckd
252
+ 47,80,?,?,?,?,?,notpresent,notpresent,93,33,0.9,144,4.5,13.3,52,8100,5.2,no,no,no,good,no,no,notckd
253
+ 55,80,1.025,0,0,normal,normal,notpresent,notpresent,130,50,1.2,147,5,15.5,41,9100,6,no,no,no,good,no,no,notckd
254
+ 20,70,1.02,0,0,normal,normal,notpresent,notpresent,123,44,1,135,3.8,14.6,44,5500,4.8,no,no,no,good,no,no,notckd
255
+ 33,80,1.025,0,0,normal,normal,notpresent,notpresent,100,37,1.2,142,4,16.9,52,6700,6,no,no,no,good,no,no,notckd
256
+ 66,70,1.02,0,0,normal,normal,notpresent,notpresent,94,19,0.7,135,3.9,16,41,5300,5.9,no,no,no,good,no,no,notckd
257
+ 71,70,1.02,0,0,normal,normal,notpresent,notpresent,81,18,0.8,145,5,14.7,44,9800,6,no,no,no,good,no,no,notckd
258
+ 39,70,1.025,0,0,normal,normal,notpresent,notpresent,124,22,0.6,137,3.8,13.4,43,?,?,no,no,no,good,no,no,notckd
259
+ 42,70,1.02,0,0,normal,normal,notpresent,notpresent,93,32,0.9,143,4.7,16.6,43,7100,5.3,no,no,no,good,no,no,notckd
260
+ 54,70,1.02,0,0,?,?,?,?,76,28,0.6,146,3.5,14.8,52,8400,5.9,no,no,no,good,no,no,notckd
261
+ 47,80,1.025,0,0,normal,normal,notpresent,notpresent,124,44,1,140,4.9,14.9,41,7000,5.7,no,no,no,good,no,no,notckd
262
+ 30,80,1.02,0,0,normal,normal,notpresent,notpresent,89,42,0.5,139,5,16.7,52,10200,5,no,no,no,good,no,no,notckd
263
+ 75,60,1.02,0,0,normal,normal,notpresent,notpresent,110,50,0.7,135,5,14.3,40,8300,5.8,no,no,no,?,?,?,notckd
264
+ 44,70,?,?,?,?,?,notpresent,notpresent,106,25,0.9,150,3.6,15,50,9600,6.5,no,no,no,good,no,no,notckd
265
+ 41,70,1.02,0,0,normal,normal,notpresent,notpresent,125,38,0.6,140,5,16.8,41,6300,5.9,no,no,no,good,no,no,notckd
266
+ 34,60,1.02,0,0,normal,normal,notpresent,notpresent,91,49,1.2,135,4.5,13.5,48,8600,4.9,no,no,no,good,no,no,notckd
267
+ 73,60,1.02,0,0,normal,normal,notpresent,notpresent,127,48,0.5,150,3.5,15.1,52,11000,4.7,no,no,no,good,no,no,notckd
268
+ 45,60,1.02,0,0,normal,normal,?,?,114,26,0.7,141,4.2,15,43,9200,5.8,no,no,no,good,no,no,notckd
269
+ 44,60,1.025,0,0,normal,normal,notpresent,notpresent,96,33,0.9,147,4.5,16.9,41,7200,5,no,no,no,good,no,no,notckd
270
+ 29,70,1.02,0,0,normal,normal,notpresent,notpresent,127,44,1.2,145,5,14.8,48,?,?,no,no,no,good,no,no,notckd
271
+ 55,70,1.02,0,0,normal,normal,notpresent,notpresent,107,26,1.1,?,?,17,50,6700,6.1,no,no,no,good,no,no,notckd
272
+ 33,80,1.025,0,0,normal,normal,notpresent,notpresent,128,38,0.6,135,3.9,13.1,45,6200,4.5,no,no,no,good,no,no,notckd
273
+ 41,80,1.02,0,0,normal,normal,notpresent,notpresent,122,25,0.8,138,5,17.1,41,9100,5.2,no,no,no,good,no,no,notckd
274
+ 52,80,1.02,0,0,normal,normal,notpresent,notpresent,128,30,1.2,140,4.5,15.2,52,4300,5.7,no,no,no,good,no,no,notckd
275
+ 47,60,1.02,0,0,normal,normal,notpresent,notpresent,137,17,0.5,150,3.5,13.6,44,7900,4.5,no,no,no,good,no,no,notckd
276
+ 43,80,1.025,0,0,normal,normal,notpresent,notpresent,81,46,0.6,135,4.9,13.9,48,6900,4.9,no,no,no,good,no,no,notckd
277
+ 51,60,1.02,0,0,?,?,notpresent,notpresent,129,25,1.2,139,5,17.2,40,8100,5.9,no,no,no,good,no,no,notckd
278
+ 46,60,1.02,0,0,normal,normal,notpresent,notpresent,102,27,0.7,142,4.9,13.2,44,11000,5.4,no,no,no,good,no,no,notckd
279
+ 56,60,1.025,0,0,normal,normal,notpresent,notpresent,132,18,1.1,147,4.7,13.7,45,7500,5.6,no,no,no,good,no,no,notckd
280
+ 55,80,1.02,0,0,normal,normal,notpresent,notpresent,104,28,0.9,142,4.8,17.3,52,8200,4.8,no,no,no,good,no,no,notckd
281
+ 39,70,1.025,0,0,normal,normal,notpresent,notpresent,131,46,0.6,145,5,15.6,41,9400,4.7,no,no,no,good,no,no,notckd
282
+ 58,70,1.02,0,0,normal,normal,notpresent,notpresent,102,48,1.2,139,4.3,15,40,8100,4.9,no,no,no,good,no,no,notckd
283
+ 61,70,1.025,0,0,normal,normal,notpresent,notpresent,120,29,0.7,137,3.5,17.4,52,7000,5.3,no,no,no,good,no,no,notckd
284
+ 30,60,1.02,0,0,normal,normal,notpresent,notpresent,138,15,1.1,135,4.4,?,?,?,?,no,no,no,good,no,no,notckd
285
+ 57,60,1.02,0,0,normal,normal,notpresent,notpresent,105,49,1.2,150,4.7,15.7,44,10400,6.2,no,no,no,good,no,no,notckd
286
+ 65,60,1.02,0,0,normal,normal,notpresent,notpresent,109,39,1,144,3.5,13.9,48,9600,4.8,no,no,no,good,no,no,notckd
287
+ 70,60,?,?,?,?,?,notpresent,notpresent,120,40,0.5,140,4.6,16,43,4500,4.9,no,no,no,good,no,no,notckd
288
+ 43,80,1.025,0,0,normal,normal,notpresent,notpresent,130,30,1.1,143,5,15.9,45,7800,4.5,no,no,no,good,no,no,notckd
289
+ 40,80,1.02,0,0,normal,normal,notpresent,notpresent,119,15,0.7,150,4.9,?,?,?,?,no,no,no,good,no,no,notckd
290
+ 58,80,1.02,0,0,normal,normal,notpresent,notpresent,100,50,1.2,140,3.5,14,50,6700,6.5,no,no,no,good,no,no,notckd
291
+ 47,60,1.02,0,0,normal,normal,notpresent,notpresent,109,25,1.1,141,4.7,15.8,41,8300,5.2,no,no,no,good,no,no,notckd
292
+ 30,60,1.025,0,0,normal,normal,notpresent,notpresent,120,31,0.8,150,4.6,13.4,44,10700,5.8,no,no,no,good,no,no,notckd
293
+ 28,70,1.02,0,0,normal,normal,?,?,131,29,0.6,145,4.9,?,45,8600,6.5,no,no,no,good,no,no,notckd
294
+ 33,60,1.025,0,0,normal,normal,notpresent,notpresent,80,25,0.9,146,3.5,14.1,48,7800,5.1,no,no,no,good,no,no,notckd
295
+ 43,80,1.02,0,0,normal,normal,notpresent,notpresent,114,32,1.1,135,3.9,?,42,?,?,no,no,no,good,no,no,notckd
296
+ 59,70,1.025,0,0,normal,normal,notpresent,notpresent,130,39,0.7,147,4.7,13.5,46,6700,4.5,no,no,no,good,no,no,notckd
297
+ 34,70,1.025,0,0,normal,normal,notpresent,notpresent,?,33,1,150,5,15.3,44,10500,6.1,no,no,no,good,no,no,notckd
298
+ 23,80,1.02,0,0,normal,normal,notpresent,notpresent,99,46,1.2,142,4,17.7,46,4300,5.5,no,no,no,good,no,no,notckd
299
+ 60,60,1.02,0,0,normal,normal,notpresent,notpresent,134,45,0.5,139,4.8,14.2,48,10700,5.6,no,no,no,good,no,no,notckd
300
+ 25,60,1.02,0,0,normal,normal,notpresent,notpresent,119,27,0.5,?,?,15.2,40,9200,5.2,no,no,no,good,no,no,notckd
301
+ 44,70,1.025,0,0,normal,normal,notpresent,notpresent,92,40,0.9,141,4.9,14,52,7500,6.2,no,no,no,good,no,no,notckd
302
+ 62,80,1.02,0,0,normal,normal,notpresent,notpresent,132,34,0.8,147,3.5,17.8,44,4700,4.5,no,no,no,good,no,no,notckd
303
+ 25,70,1.02,0,0,normal,normal,notpresent,notpresent,88,42,0.5,136,3.5,13.3,48,7000,4.9,no,no,no,good,no,no,notckd
304
+ 32,70,1.025,0,0,normal,normal,notpresent,notpresent,100,29,1.1,142,4.5,14.3,43,6700,5.9,no,no,no,good,no,no,notckd
305
+ 63,70,1.025,0,0,normal,normal,notpresent,notpresent,130,37,0.9,150,5,13.4,41,7300,4.7,no,no,no,good,no,no,notckd
306
+ 44,60,1.02,0,0,normal,normal,notpresent,notpresent,95,46,0.5,138,4.2,15,50,7700,6.3,no,no,no,good,no,no,notckd
307
+ 37,60,1.025,0,0,normal,normal,notpresent,notpresent,111,35,0.8,135,4.1,16.2,50,5500,5.7,no,no,no,good,no,no,notckd
308
+ 64,60,1.02,0,0,normal,normal,notpresent,notpresent,106,27,0.7,150,3.3,14.4,42,8100,4.7,no,no,no,good,no,no,notckd
309
+ 22,60,1.025,0,0,normal,normal,notpresent,notpresent,97,18,1.2,138,4.3,13.5,42,7900,6.4,no,no,no,good,no,no,notckd
310
+ 33,60,?,?,?,normal,normal,notpresent,notpresent,130,41,0.9,141,4.4,15.5,52,4300,5.8,no,no,no,good,no,no,notckd
311
+ 43,60,1.025,0,0,normal,normal,notpresent,notpresent,108,25,1,144,5,17.8,43,7200,5.5,no,no,no,good,no,no,notckd
312
+ 38,80,1.02,0,0,normal,normal,notpresent,notpresent,99,19,0.5,147,3.5,13.6,44,7300,6.4,no,no,no,good,no,no,notckd
313
+ 35,70,1.025,0,0,?,?,notpresent,notpresent,82,36,1.1,150,3.5,14.5,52,9400,6.1,no,no,no,good,no,no,notckd
314
+ 65,70,1.025,0,0,?,?,notpresent,notpresent,85,20,1,142,4.8,16.1,43,9600,4.5,no,no,no,good,no,no,notckd
315
+ 29,80,1.02,0,0,normal,normal,notpresent,notpresent,83,49,0.9,139,3.3,17.5,40,9900,4.7,no,no,no,good,no,no,notckd
316
+ 37,60,1.02,0,0,normal,normal,notpresent,notpresent,109,47,1.1,141,4.9,15,48,7000,5.2,no,no,no,good,no,no,notckd
317
+ 39,60,1.02,0,0,normal,normal,notpresent,notpresent,86,37,0.6,150,5,13.6,51,5800,4.5,no,no,no,good,no,no,notckd
318
+ 32,60,1.025,0,0,normal,normal,notpresent,notpresent,102,17,0.4,147,4.7,14.6,41,6800,5.1,no,no,no,good,no,no,notckd
319
+ 23,60,1.02,0,0,normal,normal,notpresent,notpresent,95,24,0.8,145,5,15,52,6300,4.6,no,no,no,good,no,no,notckd
320
+ 34,70,1.025,0,0,normal,normal,notpresent,notpresent,87,38,0.5,144,4.8,17.1,47,7400,6.1,no,no,no,good,no,no,notckd
321
+ 66,70,1.025,0,0,normal,normal,notpresent,notpresent,107,16,1.1,140,3.6,13.6,42,11000,4.9,no,no,no,good,no,no,notckd
322
+ 47,60,1.02,0,0,normal,normal,notpresent,notpresent,117,22,1.2,138,3.5,13,45,5200,5.6,no,no,no,good,no,no,notckd
323
+ 74,60,1.02,0,0,normal,normal,notpresent,notpresent,88,50,0.6,147,3.7,17.2,53,6000,4.5,no,no,no,good,no,no,notckd
324
+ 35,60,1.025,0,0,normal,normal,notpresent,notpresent,105,39,0.5,135,3.9,14.7,43,5800,6.2,no,no,no,good,no,no,notckd
325
+ 29,80,1.02,0,0,normal,normal,notpresent,notpresent,70,16,0.7,138,3.5,13.7,54,5400,5.8,no,no,no,good,no,no,notckd
326
+ 33,80,1.025,0,0,normal,normal,notpresent,notpresent,89,19,1.1,144,5,15,40,10300,4.8,no,no,no,good,no,no,notckd
327
+ 67,80,1.025,0,0,normal,normal,notpresent,notpresent,99,40,0.5,?,?,17.8,44,5900,5.2,no,no,no,good,no,no,notckd
328
+ 73,80,1.025,0,0,normal,normal,notpresent,notpresent,118,44,0.7,137,3.5,14.8,45,9300,4.7,no,no,no,good,no,no,notckd
329
+ 24,80,1.02,0,0,normal,normal,notpresent,notpresent,93,46,1,145,3.5,?,?,10700,6.3,no,no,no,good,no,no,notckd
330
+ 60,80,1.025,0,0,normal,normal,notpresent,notpresent,81,15,0.5,141,3.6,15,46,10500,5.3,no,no,no,good,no,no,notckd
331
+ 68,60,1.025,0,0,normal,normal,notpresent,notpresent,125,41,1.1,139,3.8,17.4,50,6700,6.1,no,no,no,good,no,no,notckd
332
+ 30,80,1.025,0,0,normal,normal,notpresent,notpresent,82,42,0.7,146,5,14.9,45,9400,5.9,no,no,no,good,no,no,notckd
333
+ 75,70,1.02,0,0,normal,normal,notpresent,notpresent,107,48,0.8,144,3.5,13.6,46,10300,4.8,no,no,no,good,no,no,notckd
334
+ 69,70,1.02,0,0,normal,normal,notpresent,notpresent,83,42,1.2,139,3.7,16.2,50,9300,5.4,no,no,no,good,no,no,notckd
335
+ 28,60,1.025,0,0,normal,normal,notpresent,notpresent,79,50,0.5,145,5,17.6,51,6500,5,no,no,no,good,no,no,notckd
336
+ 72,60,1.02,0,0,normal,normal,notpresent,notpresent,109,26,0.9,150,4.9,15,52,10500,5.5,no,no,no,good,no,no,notckd
337
+ 61,70,1.025,0,0,normal,normal,notpresent,notpresent,133,38,1,142,3.6,13.7,47,9200,4.9,no,no,no,good,no,no,notckd
338
+ 79,80,1.025,0,0,normal,normal,notpresent,notpresent,111,44,1.2,146,3.6,16.3,40,8000,6.4,no,no,no,good,no,no,notckd
339
+ 70,80,1.02,0,0,normal,normal,notpresent,notpresent,74,41,0.5,143,4.5,15.1,48,9700,5.6,no,no,no,good,no,no,notckd
340
+ 58,70,1.025,0,0,normal,normal,notpresent,notpresent,88,16,1.1,147,3.5,16.4,53,9100,5.2,no,no,no,good,no,no,notckd
341
+ 64,70,1.02,0,0,normal,normal,notpresent,notpresent,97,27,0.7,145,4.8,13.8,49,6400,4.8,no,no,no,good,no,no,notckd
342
+ 71,60,1.025,0,0,normal,normal,notpresent,notpresent,?,?,0.9,140,4.8,15.2,42,7700,5.5,no,no,no,good,no,no,notckd
343
+ 62,80,1.025,0,0,normal,normal,notpresent,notpresent,78,45,0.6,138,3.5,16.1,50,5400,5.7,no,no,no,good,no,no,notckd
344
+ 59,60,1.02,0,0,normal,normal,notpresent,notpresent,113,23,1.1,139,3.5,15.3,54,6500,4.9,no,no,no,good,no,no,notckd
345
+ 71,70,1.025,0,0,?,?,notpresent,notpresent,79,47,0.5,142,4.8,16.6,40,5800,5.9,no,no,no,good,no,no,notckd
346
+ 48,80,1.025,0,0,normal,normal,notpresent,notpresent,75,22,0.8,137,5,16.8,51,6000,6.5,no,no,no,good,no,no,notckd
347
+ 80,80,1.025,0,0,normal,normal,notpresent,notpresent,119,46,0.7,141,4.9,13.9,49,5100,5,no,no,no,good,no,no,notckd
348
+ 57,60,1.02,0,0,normal,normal,notpresent,notpresent,132,18,1.1,150,4.7,15.4,42,11000,4.5,no,no,no,good,no,no,notckd
349
+ 63,70,1.02,0,0,normal,normal,notpresent,notpresent,113,25,0.6,146,4.9,16.5,52,8000,5.1,no,no,no,good,no,no,notckd
350
+ 46,70,1.025,0,0,normal,normal,notpresent,notpresent,100,47,0.5,142,3.5,16.4,43,5700,6.5,no,no,no,good,no,no,notckd
351
+ 15,80,1.025,0,0,normal,normal,notpresent,notpresent,93,17,0.9,136,3.9,16.7,50,6200,5.2,no,no,no,good,no,no,notckd
352
+ 51,80,1.02,0,0,normal,normal,notpresent,notpresent,94,15,1.2,144,3.7,15.5,46,9500,6.4,no,no,no,good,no,no,notckd
353
+ 41,80,1.025,0,0,normal,normal,notpresent,notpresent,112,48,0.7,140,5,17,52,7200,5.8,no,no,no,good,no,no,notckd
354
+ 52,80,1.025,0,0,normal,normal,notpresent,notpresent,99,25,0.8,135,3.7,15,52,6300,5.3,no,no,no,good,no,no,notckd
355
+ 36,80,1.025,0,0,normal,normal,notpresent,notpresent,85,16,1.1,142,4.1,15.6,44,5800,6.3,no,no,no,good,no,no,notckd
356
+ 57,80,1.02,0,0,normal,normal,notpresent,notpresent,133,48,1.2,147,4.3,14.8,46,6600,5.5,no,no,no,good,no,no,notckd
357
+ 43,60,1.025,0,0,normal,normal,notpresent,notpresent,117,45,0.7,141,4.4,13,54,7400,5.4,no,no,no,good,no,no,notckd
358
+ 50,80,1.02,0,0,normal,normal,notpresent,notpresent,137,46,0.8,139,5,14.1,45,9500,4.6,no,no,no,good,no,no,notckd
359
+ 55,80,1.02,0,0,normal,normal,notpresent,notpresent,140,49,0.5,150,4.9,15.7,47,6700,4.9,no,no,no,good,no,no,notckd
360
+ 42,70,1.025,0,0,normal,normal,notpresent,notpresent,75,31,1.2,141,3.5,16.5,54,7800,6.2,no,no,no,good,no,no,notckd
361
+ 12,80,1.02,0,0,normal,normal,notpresent,notpresent,100,26,0.6,137,4.4,15.8,49,6600,5.4,no,no,no,good,no,no,notckd
362
+ 17,60,1.025,0,0,normal,normal,notpresent,notpresent,114,50,1,135,4.9,14.2,51,7200,5.9,no,no,no,good,no,no,notckd
363
+ 58,80,1.025,0,0,normal,normal,notpresent,notpresent,131,18,1.1,141,3.5,15.8,53,6800,6.1,no,no,no,good,no,no,notckd
data_cache/neurology_parkinsons.csv ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name,MDVP:Fo(Hz),MDVP:Fhi(Hz),MDVP:Flo(Hz),MDVP:Jitter(%),MDVP:Jitter(Abs),MDVP:RAP,MDVP:PPQ,Jitter:DDP,MDVP:Shimmer,MDVP:Shimmer(dB),Shimmer:APQ3,Shimmer:APQ5,MDVP:APQ,Shimmer:DDA,NHR,HNR,status,RPDE,DFA,spread1,spread2,D2,PPE
2
+ phon_R01_S01_1,119.99200,157.30200,74.99700,0.00784,0.00007,0.00370,0.00554,0.01109,0.04374,0.42600,0.02182,0.03130,0.02971,0.06545,0.02211,21.03300,1,0.414783,0.815285,-4.813031,0.266482,2.301442,0.284654
3
+ phon_R01_S01_2,122.40000,148.65000,113.81900,0.00968,0.00008,0.00465,0.00696,0.01394,0.06134,0.62600,0.03134,0.04518,0.04368,0.09403,0.01929,19.08500,1,0.458359,0.819521,-4.075192,0.335590,2.486855,0.368674
4
+ phon_R01_S01_3,116.68200,131.11100,111.55500,0.01050,0.00009,0.00544,0.00781,0.01633,0.05233,0.48200,0.02757,0.03858,0.03590,0.08270,0.01309,20.65100,1,0.429895,0.825288,-4.443179,0.311173,2.342259,0.332634
5
+ phon_R01_S01_4,116.67600,137.87100,111.36600,0.00997,0.00009,0.00502,0.00698,0.01505,0.05492,0.51700,0.02924,0.04005,0.03772,0.08771,0.01353,20.64400,1,0.434969,0.819235,-4.117501,0.334147,2.405554,0.368975
6
+ phon_R01_S01_5,116.01400,141.78100,110.65500,0.01284,0.00011,0.00655,0.00908,0.01966,0.06425,0.58400,0.03490,0.04825,0.04465,0.10470,0.01767,19.64900,1,0.417356,0.823484,-3.747787,0.234513,2.332180,0.410335
7
+ phon_R01_S01_6,120.55200,131.16200,113.78700,0.00968,0.00008,0.00463,0.00750,0.01388,0.04701,0.45600,0.02328,0.03526,0.03243,0.06985,0.01222,21.37800,1,0.415564,0.825069,-4.242867,0.299111,2.187560,0.357775
8
+ phon_R01_S02_1,120.26700,137.24400,114.82000,0.00333,0.00003,0.00155,0.00202,0.00466,0.01608,0.14000,0.00779,0.00937,0.01351,0.02337,0.00607,24.88600,1,0.596040,0.764112,-5.634322,0.257682,1.854785,0.211756
9
+ phon_R01_S02_2,107.33200,113.84000,104.31500,0.00290,0.00003,0.00144,0.00182,0.00431,0.01567,0.13400,0.00829,0.00946,0.01256,0.02487,0.00344,26.89200,1,0.637420,0.763262,-6.167603,0.183721,2.064693,0.163755
10
+ phon_R01_S02_3,95.73000,132.06800,91.75400,0.00551,0.00006,0.00293,0.00332,0.00880,0.02093,0.19100,0.01073,0.01277,0.01717,0.03218,0.01070,21.81200,1,0.615551,0.773587,-5.498678,0.327769,2.322511,0.231571
11
+ phon_R01_S02_4,95.05600,120.10300,91.22600,0.00532,0.00006,0.00268,0.00332,0.00803,0.02838,0.25500,0.01441,0.01725,0.02444,0.04324,0.01022,21.86200,1,0.547037,0.798463,-5.011879,0.325996,2.432792,0.271362
12
+ phon_R01_S02_5,88.33300,112.24000,84.07200,0.00505,0.00006,0.00254,0.00330,0.00763,0.02143,0.19700,0.01079,0.01342,0.01892,0.03237,0.01166,21.11800,1,0.611137,0.776156,-5.249770,0.391002,2.407313,0.249740
13
+ phon_R01_S02_6,91.90400,115.87100,86.29200,0.00540,0.00006,0.00281,0.00336,0.00844,0.02752,0.24900,0.01424,0.01641,0.02214,0.04272,0.01141,21.41400,1,0.583390,0.792520,-4.960234,0.363566,2.642476,0.275931
14
+ phon_R01_S04_1,136.92600,159.86600,131.27600,0.00293,0.00002,0.00118,0.00153,0.00355,0.01259,0.11200,0.00656,0.00717,0.01140,0.01968,0.00581,25.70300,1,0.460600,0.646846,-6.547148,0.152813,2.041277,0.138512
15
+ phon_R01_S04_2,139.17300,179.13900,76.55600,0.00390,0.00003,0.00165,0.00208,0.00496,0.01642,0.15400,0.00728,0.00932,0.01797,0.02184,0.01041,24.88900,1,0.430166,0.665833,-5.660217,0.254989,2.519422,0.199889
16
+ phon_R01_S04_3,152.84500,163.30500,75.83600,0.00294,0.00002,0.00121,0.00149,0.00364,0.01828,0.15800,0.01064,0.00972,0.01246,0.03191,0.00609,24.92200,1,0.474791,0.654027,-6.105098,0.203653,2.125618,0.170100
17
+ phon_R01_S04_4,142.16700,217.45500,83.15900,0.00369,0.00003,0.00157,0.00203,0.00471,0.01503,0.12600,0.00772,0.00888,0.01359,0.02316,0.00839,25.17500,1,0.565924,0.658245,-5.340115,0.210185,2.205546,0.234589
18
+ phon_R01_S04_5,144.18800,349.25900,82.76400,0.00544,0.00004,0.00211,0.00292,0.00632,0.02047,0.19200,0.00969,0.01200,0.02074,0.02908,0.01859,22.33300,1,0.567380,0.644692,-5.440040,0.239764,2.264501,0.218164
19
+ phon_R01_S04_6,168.77800,232.18100,75.60300,0.00718,0.00004,0.00284,0.00387,0.00853,0.03327,0.34800,0.01441,0.01893,0.03430,0.04322,0.02919,20.37600,1,0.631099,0.605417,-2.931070,0.434326,3.007463,0.430788
20
+ phon_R01_S05_1,153.04600,175.82900,68.62300,0.00742,0.00005,0.00364,0.00432,0.01092,0.05517,0.54200,0.02471,0.03572,0.05767,0.07413,0.03160,17.28000,1,0.665318,0.719467,-3.949079,0.357870,3.109010,0.377429
21
+ phon_R01_S05_2,156.40500,189.39800,142.82200,0.00768,0.00005,0.00372,0.00399,0.01116,0.03995,0.34800,0.01721,0.02374,0.04310,0.05164,0.03365,17.15300,1,0.649554,0.686080,-4.554466,0.340176,2.856676,0.322111
22
+ phon_R01_S05_3,153.84800,165.73800,65.78200,0.00840,0.00005,0.00428,0.00450,0.01285,0.03810,0.32800,0.01667,0.02383,0.04055,0.05000,0.03871,17.53600,1,0.660125,0.704087,-4.095442,0.262564,2.739710,0.365391
23
+ phon_R01_S05_4,153.88000,172.86000,78.12800,0.00480,0.00003,0.00232,0.00267,0.00696,0.04137,0.37000,0.02021,0.02591,0.04525,0.06062,0.01849,19.49300,1,0.629017,0.698951,-5.186960,0.237622,2.557536,0.259765
24
+ phon_R01_S05_5,167.93000,193.22100,79.06800,0.00442,0.00003,0.00220,0.00247,0.00661,0.04351,0.37700,0.02228,0.02540,0.04246,0.06685,0.01280,22.46800,1,0.619060,0.679834,-4.330956,0.262384,2.916777,0.285695
25
+ phon_R01_S05_6,173.91700,192.73500,86.18000,0.00476,0.00003,0.00221,0.00258,0.00663,0.04192,0.36400,0.02187,0.02470,0.03772,0.06562,0.01840,20.42200,1,0.537264,0.686894,-5.248776,0.210279,2.547508,0.253556
26
+ phon_R01_S06_1,163.65600,200.84100,76.77900,0.00742,0.00005,0.00380,0.00390,0.01140,0.01659,0.16400,0.00738,0.00948,0.01497,0.02214,0.01778,23.83100,1,0.397937,0.732479,-5.557447,0.220890,2.692176,0.215961
27
+ phon_R01_S06_2,104.40000,206.00200,77.96800,0.00633,0.00006,0.00316,0.00375,0.00948,0.03767,0.38100,0.01732,0.02245,0.03780,0.05197,0.02887,22.06600,1,0.522746,0.737948,-5.571843,0.236853,2.846369,0.219514
28
+ phon_R01_S06_3,171.04100,208.31300,75.50100,0.00455,0.00003,0.00250,0.00234,0.00750,0.01966,0.18600,0.00889,0.01169,0.01872,0.02666,0.01095,25.90800,1,0.418622,0.720916,-6.183590,0.226278,2.589702,0.147403
29
+ phon_R01_S06_4,146.84500,208.70100,81.73700,0.00496,0.00003,0.00250,0.00275,0.00749,0.01919,0.19800,0.00883,0.01144,0.01826,0.02650,0.01328,25.11900,1,0.358773,0.726652,-6.271690,0.196102,2.314209,0.162999
30
+ phon_R01_S06_5,155.35800,227.38300,80.05500,0.00310,0.00002,0.00159,0.00176,0.00476,0.01718,0.16100,0.00769,0.01012,0.01661,0.02307,0.00677,25.97000,1,0.470478,0.676258,-7.120925,0.279789,2.241742,0.108514
31
+ phon_R01_S06_6,162.56800,198.34600,77.63000,0.00502,0.00003,0.00280,0.00253,0.00841,0.01791,0.16800,0.00793,0.01057,0.01799,0.02380,0.01170,25.67800,1,0.427785,0.723797,-6.635729,0.209866,1.957961,0.135242
32
+ phon_R01_S07_1,197.07600,206.89600,192.05500,0.00289,0.00001,0.00166,0.00168,0.00498,0.01098,0.09700,0.00563,0.00680,0.00802,0.01689,0.00339,26.77500,0,0.422229,0.741367,-7.348300,0.177551,1.743867,0.085569
33
+ phon_R01_S07_2,199.22800,209.51200,192.09100,0.00241,0.00001,0.00134,0.00138,0.00402,0.01015,0.08900,0.00504,0.00641,0.00762,0.01513,0.00167,30.94000,0,0.432439,0.742055,-7.682587,0.173319,2.103106,0.068501
34
+ phon_R01_S07_3,198.38300,215.20300,193.10400,0.00212,0.00001,0.00113,0.00135,0.00339,0.01263,0.11100,0.00640,0.00825,0.00951,0.01919,0.00119,30.77500,0,0.465946,0.738703,-7.067931,0.175181,1.512275,0.096320
35
+ phon_R01_S07_4,202.26600,211.60400,197.07900,0.00180,0.000009,0.00093,0.00107,0.00278,0.00954,0.08500,0.00469,0.00606,0.00719,0.01407,0.00072,32.68400,0,0.368535,0.742133,-7.695734,0.178540,1.544609,0.056141
36
+ phon_R01_S07_5,203.18400,211.52600,196.16000,0.00178,0.000009,0.00094,0.00106,0.00283,0.00958,0.08500,0.00468,0.00610,0.00726,0.01403,0.00065,33.04700,0,0.340068,0.741899,-7.964984,0.163519,1.423287,0.044539
37
+ phon_R01_S07_6,201.46400,210.56500,195.70800,0.00198,0.000010,0.00105,0.00115,0.00314,0.01194,0.10700,0.00586,0.00760,0.00957,0.01758,0.00135,31.73200,0,0.344252,0.742737,-7.777685,0.170183,2.447064,0.057610
38
+ phon_R01_S08_1,177.87600,192.92100,168.01300,0.00411,0.00002,0.00233,0.00241,0.00700,0.02126,0.18900,0.01154,0.01347,0.01612,0.03463,0.00586,23.21600,1,0.360148,0.778834,-6.149653,0.218037,2.477082,0.165827
39
+ phon_R01_S08_2,176.17000,185.60400,163.56400,0.00369,0.00002,0.00205,0.00218,0.00616,0.01851,0.16800,0.00938,0.01160,0.01491,0.02814,0.00340,24.95100,1,0.341435,0.783626,-6.006414,0.196371,2.536527,0.173218
40
+ phon_R01_S08_3,180.19800,201.24900,175.45600,0.00284,0.00002,0.00153,0.00166,0.00459,0.01444,0.13100,0.00726,0.00885,0.01190,0.02177,0.00231,26.73800,1,0.403884,0.766209,-6.452058,0.212294,2.269398,0.141929
41
+ phon_R01_S08_4,187.73300,202.32400,173.01500,0.00316,0.00002,0.00168,0.00182,0.00504,0.01663,0.15100,0.00829,0.01003,0.01366,0.02488,0.00265,26.31000,1,0.396793,0.758324,-6.006647,0.266892,2.382544,0.160691
42
+ phon_R01_S08_5,186.16300,197.72400,177.58400,0.00298,0.00002,0.00165,0.00175,0.00496,0.01495,0.13500,0.00774,0.00941,0.01233,0.02321,0.00231,26.82200,1,0.326480,0.765623,-6.647379,0.201095,2.374073,0.130554
43
+ phon_R01_S08_6,184.05500,196.53700,166.97700,0.00258,0.00001,0.00134,0.00147,0.00403,0.01463,0.13200,0.00742,0.00901,0.01234,0.02226,0.00257,26.45300,1,0.306443,0.759203,-7.044105,0.063412,2.361532,0.115730
44
+ phon_R01_S10_1,237.22600,247.32600,225.22700,0.00298,0.00001,0.00169,0.00182,0.00507,0.01752,0.16400,0.01035,0.01024,0.01133,0.03104,0.00740,22.73600,0,0.305062,0.654172,-7.310550,0.098648,2.416838,0.095032
45
+ phon_R01_S10_2,241.40400,248.83400,232.48300,0.00281,0.00001,0.00157,0.00173,0.00470,0.01760,0.15400,0.01006,0.01038,0.01251,0.03017,0.00675,23.14500,0,0.457702,0.634267,-6.793547,0.158266,2.256699,0.117399
46
+ phon_R01_S10_3,243.43900,250.91200,232.43500,0.00210,0.000009,0.00109,0.00137,0.00327,0.01419,0.12600,0.00777,0.00898,0.01033,0.02330,0.00454,25.36800,0,0.438296,0.635285,-7.057869,0.091608,2.330716,0.091470
47
+ phon_R01_S10_4,242.85200,255.03400,227.91100,0.00225,0.000009,0.00117,0.00139,0.00350,0.01494,0.13400,0.00847,0.00879,0.01014,0.02542,0.00476,25.03200,0,0.431285,0.638928,-6.995820,0.102083,2.365800,0.102706
48
+ phon_R01_S10_5,245.51000,262.09000,231.84800,0.00235,0.000010,0.00127,0.00148,0.00380,0.01608,0.14100,0.00906,0.00977,0.01149,0.02719,0.00476,24.60200,0,0.467489,0.631653,-7.156076,0.127642,2.392122,0.097336
49
+ phon_R01_S10_6,252.45500,261.48700,182.78600,0.00185,0.000007,0.00092,0.00113,0.00276,0.01152,0.10300,0.00614,0.00730,0.00860,0.01841,0.00432,26.80500,0,0.610367,0.635204,-7.319510,0.200873,2.028612,0.086398
50
+ phon_R01_S13_1,122.18800,128.61100,115.76500,0.00524,0.00004,0.00169,0.00203,0.00507,0.01613,0.14300,0.00855,0.00776,0.01433,0.02566,0.00839,23.16200,0,0.579597,0.733659,-6.439398,0.266392,2.079922,0.133867
51
+ phon_R01_S13_2,122.96400,130.04900,114.67600,0.00428,0.00003,0.00124,0.00155,0.00373,0.01681,0.15400,0.00930,0.00802,0.01400,0.02789,0.00462,24.97100,0,0.538688,0.754073,-6.482096,0.264967,2.054419,0.128872
52
+ phon_R01_S13_3,124.44500,135.06900,117.49500,0.00431,0.00003,0.00141,0.00167,0.00422,0.02184,0.19700,0.01241,0.01024,0.01685,0.03724,0.00479,25.13500,0,0.553134,0.775933,-6.650471,0.254498,1.840198,0.103561
53
+ phon_R01_S13_4,126.34400,134.23100,112.77300,0.00448,0.00004,0.00131,0.00169,0.00393,0.02033,0.18500,0.01143,0.00959,0.01614,0.03429,0.00474,25.03000,0,0.507504,0.760361,-6.689151,0.291954,2.431854,0.105993
54
+ phon_R01_S13_5,128.00100,138.05200,122.08000,0.00436,0.00003,0.00137,0.00166,0.00411,0.02297,0.21000,0.01323,0.01072,0.01677,0.03969,0.00481,24.69200,0,0.459766,0.766204,-7.072419,0.220434,1.972297,0.119308
55
+ phon_R01_S13_6,129.33600,139.86700,118.60400,0.00490,0.00004,0.00165,0.00183,0.00495,0.02498,0.22800,0.01396,0.01219,0.01947,0.04188,0.00484,25.42900,0,0.420383,0.785714,-6.836811,0.269866,2.223719,0.147491
56
+ phon_R01_S16_1,108.80700,134.65600,102.87400,0.00761,0.00007,0.00349,0.00486,0.01046,0.02719,0.25500,0.01483,0.01609,0.02067,0.04450,0.01036,21.02800,1,0.536009,0.819032,-4.649573,0.205558,1.986899,0.316700
57
+ phon_R01_S16_2,109.86000,126.35800,104.43700,0.00874,0.00008,0.00398,0.00539,0.01193,0.03209,0.30700,0.01789,0.01992,0.02454,0.05368,0.01180,20.76700,1,0.558586,0.811843,-4.333543,0.221727,2.014606,0.344834
58
+ phon_R01_S16_3,110.41700,131.06700,103.37000,0.00784,0.00007,0.00352,0.00514,0.01056,0.03715,0.33400,0.02032,0.02302,0.02802,0.06097,0.00969,21.42200,1,0.541781,0.821364,-4.438453,0.238298,1.922940,0.335041
59
+ phon_R01_S16_4,117.27400,129.91600,110.40200,0.00752,0.00006,0.00299,0.00469,0.00898,0.02293,0.22100,0.01189,0.01459,0.01948,0.03568,0.00681,22.81700,1,0.530529,0.817756,-4.608260,0.290024,2.021591,0.314464
60
+ phon_R01_S16_5,116.87900,131.89700,108.15300,0.00788,0.00007,0.00334,0.00493,0.01003,0.02645,0.26500,0.01394,0.01625,0.02137,0.04183,0.00786,22.60300,1,0.540049,0.813432,-4.476755,0.262633,1.827012,0.326197
61
+ phon_R01_S16_6,114.84700,271.31400,104.68000,0.00867,0.00008,0.00373,0.00520,0.01120,0.03225,0.35000,0.01805,0.01974,0.02519,0.05414,0.01143,21.66000,1,0.547975,0.817396,-4.609161,0.221711,1.831691,0.316395
62
+ phon_R01_S17_1,209.14400,237.49400,109.37900,0.00282,0.00001,0.00147,0.00152,0.00442,0.01861,0.17000,0.00975,0.01258,0.01382,0.02925,0.00871,25.55400,0,0.341788,0.678874,-7.040508,0.066994,2.460791,0.101516
63
+ phon_R01_S17_2,223.36500,238.98700,98.66400,0.00264,0.00001,0.00154,0.00151,0.00461,0.01906,0.16500,0.01013,0.01296,0.01340,0.03039,0.00301,26.13800,0,0.447979,0.686264,-7.293801,0.086372,2.321560,0.098555
64
+ phon_R01_S17_3,222.23600,231.34500,205.49500,0.00266,0.00001,0.00152,0.00144,0.00457,0.01643,0.14500,0.00867,0.01108,0.01200,0.02602,0.00340,25.85600,0,0.364867,0.694399,-6.966321,0.095882,2.278687,0.103224
65
+ phon_R01_S17_4,228.83200,234.61900,223.63400,0.00296,0.00001,0.00175,0.00155,0.00526,0.01644,0.14500,0.00882,0.01075,0.01179,0.02647,0.00351,25.96400,0,0.256570,0.683296,-7.245620,0.018689,2.498224,0.093534
66
+ phon_R01_S17_5,229.40100,252.22100,221.15600,0.00205,0.000009,0.00114,0.00113,0.00342,0.01457,0.12900,0.00769,0.00957,0.01016,0.02308,0.00300,26.41500,0,0.276850,0.673636,-7.496264,0.056844,2.003032,0.073581
67
+ phon_R01_S17_6,228.96900,239.54100,113.20100,0.00238,0.00001,0.00136,0.00140,0.00408,0.01745,0.15400,0.00942,0.01160,0.01234,0.02827,0.00420,24.54700,0,0.305429,0.681811,-7.314237,0.006274,2.118596,0.091546
68
+ phon_R01_S18_1,140.34100,159.77400,67.02100,0.00817,0.00006,0.00430,0.00440,0.01289,0.03198,0.31300,0.01830,0.01810,0.02428,0.05490,0.02183,19.56000,1,0.460139,0.720908,-5.409423,0.226850,2.359973,0.226156
69
+ phon_R01_S18_2,136.96900,166.60700,66.00400,0.00923,0.00007,0.00507,0.00463,0.01520,0.03111,0.30800,0.01638,0.01759,0.02603,0.04914,0.02659,19.97900,1,0.498133,0.729067,-5.324574,0.205660,2.291558,0.226247
70
+ phon_R01_S18_3,143.53300,162.21500,65.80900,0.01101,0.00008,0.00647,0.00467,0.01941,0.05384,0.47800,0.03152,0.02422,0.03392,0.09455,0.04882,20.33800,1,0.513237,0.731444,-5.869750,0.151814,2.118496,0.185580
71
+ phon_R01_S18_4,148.09000,162.82400,67.34300,0.00762,0.00005,0.00467,0.00354,0.01400,0.05428,0.49700,0.03357,0.02494,0.03635,0.10070,0.02431,21.71800,1,0.487407,0.727313,-6.261141,0.120956,2.137075,0.141958
72
+ phon_R01_S18_5,142.72900,162.40800,65.47600,0.00831,0.00006,0.00469,0.00419,0.01407,0.03485,0.36500,0.01868,0.01906,0.02949,0.05605,0.02599,20.26400,1,0.489345,0.730387,-5.720868,0.158830,2.277927,0.180828
73
+ phon_R01_S18_6,136.35800,176.59500,65.75000,0.00971,0.00007,0.00534,0.00478,0.01601,0.04978,0.48300,0.02749,0.02466,0.03736,0.08247,0.03361,18.57000,1,0.543299,0.733232,-5.207985,0.224852,2.642276,0.242981
74
+ phon_R01_S19_1,120.08000,139.71000,111.20800,0.00405,0.00003,0.00180,0.00220,0.00540,0.01706,0.15200,0.00974,0.00925,0.01345,0.02921,0.00442,25.74200,1,0.495954,0.762959,-5.791820,0.329066,2.205024,0.188180
75
+ phon_R01_S19_2,112.01400,588.51800,107.02400,0.00533,0.00005,0.00268,0.00329,0.00805,0.02448,0.22600,0.01373,0.01375,0.01956,0.04120,0.00623,24.17800,1,0.509127,0.789532,-5.389129,0.306636,1.928708,0.225461
76
+ phon_R01_S19_3,110.79300,128.10100,107.31600,0.00494,0.00004,0.00260,0.00283,0.00780,0.02442,0.21600,0.01432,0.01325,0.01831,0.04295,0.00479,25.43800,1,0.437031,0.815908,-5.313360,0.201861,2.225815,0.244512
77
+ phon_R01_S19_4,110.70700,122.61100,105.00700,0.00516,0.00005,0.00277,0.00289,0.00831,0.02215,0.20600,0.01284,0.01219,0.01715,0.03851,0.00472,25.19700,1,0.463514,0.807217,-5.477592,0.315074,1.862092,0.228624
78
+ phon_R01_S19_5,112.87600,148.82600,106.98100,0.00500,0.00004,0.00270,0.00289,0.00810,0.03999,0.35000,0.02413,0.02231,0.02704,0.07238,0.00905,23.37000,1,0.489538,0.789977,-5.775966,0.341169,2.007923,0.193918
79
+ phon_R01_S19_6,110.56800,125.39400,106.82100,0.00462,0.00004,0.00226,0.00280,0.00677,0.02199,0.19700,0.01284,0.01199,0.01636,0.03852,0.00420,25.82000,1,0.429484,0.816340,-5.391029,0.250572,1.777901,0.232744
80
+ phon_R01_S20_1,95.38500,102.14500,90.26400,0.00608,0.00006,0.00331,0.00332,0.00994,0.03202,0.26300,0.01803,0.01886,0.02455,0.05408,0.01062,21.87500,1,0.644954,0.779612,-5.115212,0.249494,2.017753,0.260015
81
+ phon_R01_S20_2,100.77000,115.69700,85.54500,0.01038,0.00010,0.00622,0.00576,0.01865,0.03121,0.36100,0.01773,0.01783,0.02139,0.05320,0.02220,19.20000,1,0.594387,0.790117,-4.913885,0.265699,2.398422,0.277948
82
+ phon_R01_S20_3,96.10600,108.66400,84.51000,0.00694,0.00007,0.00389,0.00415,0.01168,0.04024,0.36400,0.02266,0.02451,0.02876,0.06799,0.01823,19.05500,1,0.544805,0.770466,-4.441519,0.155097,2.645959,0.327978
83
+ phon_R01_S20_4,95.60500,107.71500,87.54900,0.00702,0.00007,0.00428,0.00371,0.01283,0.03156,0.29600,0.01792,0.01841,0.02190,0.05377,0.01825,19.65900,1,0.576084,0.778747,-5.132032,0.210458,2.232576,0.260633
84
+ phon_R01_S20_5,100.96000,110.01900,95.62800,0.00606,0.00006,0.00351,0.00348,0.01053,0.02427,0.21600,0.01371,0.01421,0.01751,0.04114,0.01237,20.53600,1,0.554610,0.787896,-5.022288,0.146948,2.428306,0.264666
85
+ phon_R01_S20_6,98.80400,102.30500,87.80400,0.00432,0.00004,0.00247,0.00258,0.00742,0.02223,0.20200,0.01277,0.01343,0.01552,0.03831,0.00882,22.24400,1,0.576644,0.772416,-6.025367,0.078202,2.053601,0.177275
86
+ phon_R01_S21_1,176.85800,205.56000,75.34400,0.00747,0.00004,0.00418,0.00420,0.01254,0.04795,0.43500,0.02679,0.03022,0.03510,0.08037,0.05470,13.89300,1,0.556494,0.729586,-5.288912,0.343073,3.099301,0.242119
87
+ phon_R01_S21_2,180.97800,200.12500,155.49500,0.00406,0.00002,0.00220,0.00244,0.00659,0.03852,0.33100,0.02107,0.02493,0.02877,0.06321,0.02782,16.17600,1,0.583574,0.727747,-5.657899,0.315903,3.098256,0.200423
88
+ phon_R01_S21_3,178.22200,202.45000,141.04700,0.00321,0.00002,0.00163,0.00194,0.00488,0.03759,0.32700,0.02073,0.02415,0.02784,0.06219,0.03151,15.92400,1,0.598714,0.712199,-6.366916,0.335753,2.654271,0.144614
89
+ phon_R01_S21_4,176.28100,227.38100,125.61000,0.00520,0.00003,0.00287,0.00312,0.00862,0.06511,0.58000,0.03671,0.04159,0.04683,0.11012,0.04824,13.92200,1,0.602874,0.740837,-5.515071,0.299549,3.136550,0.220968
90
+ phon_R01_S21_5,173.89800,211.35000,74.67700,0.00448,0.00003,0.00237,0.00254,0.00710,0.06727,0.65000,0.03788,0.04254,0.04802,0.11363,0.04214,14.73900,1,0.599371,0.743937,-5.783272,0.299793,3.007096,0.194052
91
+ phon_R01_S21_6,179.71100,225.93000,144.87800,0.00709,0.00004,0.00391,0.00419,0.01172,0.04313,0.44200,0.02297,0.02768,0.03455,0.06892,0.07223,11.86600,1,0.590951,0.745526,-4.379411,0.375531,3.671155,0.332086
92
+ phon_R01_S21_7,166.60500,206.00800,78.03200,0.00742,0.00004,0.00387,0.00453,0.01161,0.06640,0.63400,0.03650,0.04282,0.05114,0.10949,0.08725,11.74400,1,0.653410,0.733165,-4.508984,0.389232,3.317586,0.301952
93
+ phon_R01_S22_1,151.95500,163.33500,147.22600,0.00419,0.00003,0.00224,0.00227,0.00672,0.07959,0.77200,0.04421,0.04962,0.05690,0.13262,0.01658,19.66400,1,0.501037,0.714360,-6.411497,0.207156,2.344876,0.134120
94
+ phon_R01_S22_2,148.27200,164.98900,142.29900,0.00459,0.00003,0.00250,0.00256,0.00750,0.04190,0.38300,0.02383,0.02521,0.03051,0.07150,0.01914,18.78000,1,0.454444,0.734504,-5.952058,0.087840,2.344336,0.186489
95
+ phon_R01_S22_3,152.12500,161.46900,76.59600,0.00382,0.00003,0.00191,0.00226,0.00574,0.05925,0.63700,0.03341,0.03794,0.04398,0.10024,0.01211,20.96900,1,0.447456,0.697790,-6.152551,0.173520,2.080121,0.160809
96
+ phon_R01_S22_4,157.82100,172.97500,68.40100,0.00358,0.00002,0.00196,0.00196,0.00587,0.03716,0.30700,0.02062,0.02321,0.02764,0.06185,0.00850,22.21900,1,0.502380,0.712170,-6.251425,0.188056,2.143851,0.160812
97
+ phon_R01_S22_5,157.44700,163.26700,149.60500,0.00369,0.00002,0.00201,0.00197,0.00602,0.03272,0.28300,0.01813,0.01909,0.02571,0.05439,0.01018,21.69300,1,0.447285,0.705658,-6.247076,0.180528,2.344348,0.164916
98
+ phon_R01_S22_6,159.11600,168.91300,144.81100,0.00342,0.00002,0.00178,0.00184,0.00535,0.03381,0.30700,0.01806,0.02024,0.02809,0.05417,0.00852,22.66300,1,0.366329,0.693429,-6.417440,0.194627,2.473239,0.151709
99
+ phon_R01_S24_1,125.03600,143.94600,116.18700,0.01280,0.00010,0.00743,0.00623,0.02228,0.03886,0.34200,0.02135,0.02174,0.03088,0.06406,0.08151,15.33800,1,0.629574,0.714485,-4.020042,0.265315,2.671825,0.340623
100
+ phon_R01_S24_2,125.79100,140.55700,96.20600,0.01378,0.00011,0.00826,0.00655,0.02478,0.04689,0.42200,0.02542,0.02630,0.03908,0.07625,0.10323,15.43300,1,0.571010,0.690892,-5.159169,0.202146,2.441612,0.260375
101
+ phon_R01_S24_3,126.51200,141.75600,99.77000,0.01936,0.00015,0.01159,0.00990,0.03476,0.06734,0.65900,0.03611,0.03963,0.05783,0.10833,0.16744,12.43500,1,0.638545,0.674953,-3.760348,0.242861,2.634633,0.378483
102
+ phon_R01_S24_4,125.64100,141.06800,116.34600,0.03316,0.00026,0.02144,0.01522,0.06433,0.09178,0.89100,0.05358,0.04791,0.06196,0.16074,0.31482,8.86700,1,0.671299,0.656846,-3.700544,0.260481,2.991063,0.370961
103
+ phon_R01_S24_5,128.45100,150.44900,75.63200,0.01551,0.00012,0.00905,0.00909,0.02716,0.06170,0.58400,0.03223,0.03672,0.05174,0.09669,0.11843,15.06000,1,0.639808,0.643327,-4.202730,0.310163,2.638279,0.356881
104
+ phon_R01_S24_6,139.22400,586.56700,66.15700,0.03011,0.00022,0.01854,0.01628,0.05563,0.09419,0.93000,0.05551,0.05005,0.06023,0.16654,0.25930,10.48900,1,0.596362,0.641418,-3.269487,0.270641,2.690917,0.444774
105
+ phon_R01_S25_1,150.25800,154.60900,75.34900,0.00248,0.00002,0.00105,0.00136,0.00315,0.01131,0.10700,0.00522,0.00659,0.01009,0.01567,0.00495,26.75900,1,0.296888,0.722356,-6.878393,0.089267,2.004055,0.113942
106
+ phon_R01_S25_2,154.00300,160.26700,128.62100,0.00183,0.00001,0.00076,0.00100,0.00229,0.01030,0.09400,0.00469,0.00582,0.00871,0.01406,0.00243,28.40900,1,0.263654,0.691483,-7.111576,0.144780,2.065477,0.093193
107
+ phon_R01_S25_3,149.68900,160.36800,133.60800,0.00257,0.00002,0.00116,0.00134,0.00349,0.01346,0.12600,0.00660,0.00818,0.01059,0.01979,0.00578,27.42100,1,0.365488,0.719974,-6.997403,0.210279,1.994387,0.112878
108
+ phon_R01_S25_4,155.07800,163.73600,144.14800,0.00168,0.00001,0.00068,0.00092,0.00204,0.01064,0.09700,0.00522,0.00632,0.00928,0.01567,0.00233,29.74600,1,0.334171,0.677930,-6.981201,0.184550,2.129924,0.106802
109
+ phon_R01_S25_5,151.88400,157.76500,133.75100,0.00258,0.00002,0.00115,0.00122,0.00346,0.01450,0.13700,0.00633,0.00788,0.01267,0.01898,0.00659,26.83300,1,0.393563,0.700246,-6.600023,0.249172,2.499148,0.105306
110
+ phon_R01_S25_6,151.98900,157.33900,132.85700,0.00174,0.00001,0.00075,0.00096,0.00225,0.01024,0.09300,0.00455,0.00576,0.00993,0.01364,0.00238,29.92800,1,0.311369,0.676066,-6.739151,0.160686,2.296873,0.115130
111
+ phon_R01_S26_1,193.03000,208.90000,80.29700,0.00766,0.00004,0.00450,0.00389,0.01351,0.03044,0.27500,0.01771,0.01815,0.02084,0.05312,0.00947,21.93400,1,0.497554,0.740539,-5.845099,0.278679,2.608749,0.185668
112
+ phon_R01_S26_2,200.71400,223.98200,89.68600,0.00621,0.00003,0.00371,0.00337,0.01112,0.02286,0.20700,0.01192,0.01439,0.01852,0.03576,0.00704,23.23900,1,0.436084,0.727863,-5.258320,0.256454,2.550961,0.232520
113
+ phon_R01_S26_3,208.51900,220.31500,199.02000,0.00609,0.00003,0.00368,0.00339,0.01105,0.01761,0.15500,0.00952,0.01058,0.01307,0.02855,0.00830,22.40700,1,0.338097,0.712466,-6.471427,0.184378,2.502336,0.136390
114
+ phon_R01_S26_4,204.66400,221.30000,189.62100,0.00841,0.00004,0.00502,0.00485,0.01506,0.02378,0.21000,0.01277,0.01483,0.01767,0.03831,0.01316,21.30500,1,0.498877,0.722085,-4.876336,0.212054,2.376749,0.268144
115
+ phon_R01_S26_5,210.14100,232.70600,185.25800,0.00534,0.00003,0.00321,0.00280,0.00964,0.01680,0.14900,0.00861,0.01017,0.01301,0.02583,0.00620,23.67100,1,0.441097,0.722254,-5.963040,0.250283,2.489191,0.177807
116
+ phon_R01_S26_6,206.32700,226.35500,92.02000,0.00495,0.00002,0.00302,0.00246,0.00905,0.02105,0.20900,0.01107,0.01284,0.01604,0.03320,0.01048,21.86400,1,0.331508,0.715121,-6.729713,0.181701,2.938114,0.115515
117
+ phon_R01_S27_1,151.87200,492.89200,69.08500,0.00856,0.00006,0.00404,0.00385,0.01211,0.01843,0.23500,0.00796,0.00832,0.01271,0.02389,0.06051,23.69300,1,0.407701,0.662668,-4.673241,0.261549,2.702355,0.274407
118
+ phon_R01_S27_2,158.21900,442.55700,71.94800,0.00476,0.00003,0.00214,0.00207,0.00642,0.01458,0.14800,0.00606,0.00747,0.01312,0.01818,0.01554,26.35600,1,0.450798,0.653823,-6.051233,0.273280,2.640798,0.170106
119
+ phon_R01_S27_3,170.75600,450.24700,79.03200,0.00555,0.00003,0.00244,0.00261,0.00731,0.01725,0.17500,0.00757,0.00971,0.01652,0.02270,0.01802,25.69000,1,0.486738,0.676023,-4.597834,0.372114,2.975889,0.282780
120
+ phon_R01_S27_4,178.28500,442.82400,82.06300,0.00462,0.00003,0.00157,0.00194,0.00472,0.01279,0.12900,0.00617,0.00744,0.01151,0.01851,0.00856,25.02000,1,0.470422,0.655239,-4.913137,0.393056,2.816781,0.251972
121
+ phon_R01_S27_5,217.11600,233.48100,93.97800,0.00404,0.00002,0.00127,0.00128,0.00381,0.01299,0.12400,0.00679,0.00631,0.01075,0.02038,0.00681,24.58100,1,0.462516,0.582710,-5.517173,0.389295,2.925862,0.220657
122
+ phon_R01_S27_6,128.94000,479.69700,88.25100,0.00581,0.00005,0.00241,0.00314,0.00723,0.02008,0.22100,0.00849,0.01117,0.01734,0.02548,0.02350,24.74300,1,0.487756,0.684130,-6.186128,0.279933,2.686240,0.152428
123
+ phon_R01_S27_7,176.82400,215.29300,83.96100,0.00460,0.00003,0.00209,0.00221,0.00628,0.01169,0.11700,0.00534,0.00630,0.01104,0.01603,0.01161,27.16600,1,0.400088,0.656182,-4.711007,0.281618,2.655744,0.234809
124
+ phon_R01_S31_1,138.19000,203.52200,83.34000,0.00704,0.00005,0.00406,0.00398,0.01218,0.04479,0.44100,0.02587,0.02567,0.03220,0.07761,0.01968,18.30500,1,0.538016,0.741480,-5.418787,0.160267,2.090438,0.229892
125
+ phon_R01_S31_2,182.01800,197.17300,79.18700,0.00842,0.00005,0.00506,0.00449,0.01517,0.02503,0.23100,0.01372,0.01580,0.01931,0.04115,0.01813,18.78400,1,0.589956,0.732903,-5.445140,0.142466,2.174306,0.215558
126
+ phon_R01_S31_3,156.23900,195.10700,79.82000,0.00694,0.00004,0.00403,0.00395,0.01209,0.02343,0.22400,0.01289,0.01420,0.01720,0.03867,0.02020,19.19600,1,0.618663,0.728421,-5.944191,0.143359,1.929715,0.181988
127
+ phon_R01_S31_4,145.17400,198.10900,80.63700,0.00733,0.00005,0.00414,0.00422,0.01242,0.02362,0.23300,0.01235,0.01495,0.01944,0.03706,0.01874,18.85700,1,0.637518,0.735546,-5.594275,0.127950,1.765957,0.222716
128
+ phon_R01_S31_5,138.14500,197.23800,81.11400,0.00544,0.00004,0.00294,0.00327,0.00883,0.02791,0.24600,0.01484,0.01805,0.02259,0.04451,0.01794,18.17800,1,0.623209,0.738245,-5.540351,0.087165,1.821297,0.214075
129
+ phon_R01_S31_6,166.88800,198.96600,79.51200,0.00638,0.00004,0.00368,0.00351,0.01104,0.02857,0.25700,0.01547,0.01859,0.02301,0.04641,0.01796,18.33000,1,0.585169,0.736964,-5.825257,0.115697,1.996146,0.196535
130
+ phon_R01_S32_1,119.03100,127.53300,109.21600,0.00440,0.00004,0.00214,0.00192,0.00641,0.01033,0.09800,0.00538,0.00570,0.00811,0.01614,0.01724,26.84200,1,0.457541,0.699787,-6.890021,0.152941,2.328513,0.112856
131
+ phon_R01_S32_2,120.07800,126.63200,105.66700,0.00270,0.00002,0.00116,0.00135,0.00349,0.01022,0.09000,0.00476,0.00588,0.00903,0.01428,0.00487,26.36900,1,0.491345,0.718839,-5.892061,0.195976,2.108873,0.183572
132
+ phon_R01_S32_3,120.28900,128.14300,100.20900,0.00492,0.00004,0.00269,0.00238,0.00808,0.01412,0.12500,0.00703,0.00820,0.01194,0.02110,0.01610,23.94900,1,0.467160,0.724045,-6.135296,0.203630,2.539724,0.169923
133
+ phon_R01_S32_4,120.25600,125.30600,104.77300,0.00407,0.00003,0.00224,0.00205,0.00671,0.01516,0.13800,0.00721,0.00815,0.01310,0.02164,0.01015,26.01700,1,0.468621,0.735136,-6.112667,0.217013,2.527742,0.170633
134
+ phon_R01_S32_5,119.05600,125.21300,86.79500,0.00346,0.00003,0.00169,0.00170,0.00508,0.01201,0.10600,0.00633,0.00701,0.00915,0.01898,0.00903,23.38900,1,0.470972,0.721308,-5.436135,0.254909,2.516320,0.232209
135
+ phon_R01_S32_6,118.74700,123.72300,109.83600,0.00331,0.00003,0.00168,0.00171,0.00504,0.01043,0.09900,0.00490,0.00621,0.00903,0.01471,0.00504,25.61900,1,0.482296,0.723096,-6.448134,0.178713,2.034827,0.141422
136
+ phon_R01_S33_1,106.51600,112.77700,93.10500,0.00589,0.00006,0.00291,0.00319,0.00873,0.04932,0.44100,0.02683,0.03112,0.03651,0.08050,0.03031,17.06000,1,0.637814,0.744064,-5.301321,0.320385,2.375138,0.243080
137
+ phon_R01_S33_2,110.45300,127.61100,105.55400,0.00494,0.00004,0.00244,0.00315,0.00731,0.04128,0.37900,0.02229,0.02592,0.03316,0.06688,0.02529,17.70700,1,0.653427,0.706687,-5.333619,0.322044,2.631793,0.228319
138
+ phon_R01_S33_3,113.40000,133.34400,107.81600,0.00451,0.00004,0.00219,0.00283,0.00658,0.04879,0.43100,0.02385,0.02973,0.04370,0.07154,0.02278,19.01300,1,0.647900,0.708144,-4.378916,0.300067,2.445502,0.259451
139
+ phon_R01_S33_4,113.16600,130.27000,100.67300,0.00502,0.00004,0.00257,0.00312,0.00772,0.05279,0.47600,0.02896,0.03347,0.04134,0.08689,0.03690,16.74700,1,0.625362,0.708617,-4.654894,0.304107,2.672362,0.274387
140
+ phon_R01_S33_5,112.23900,126.60900,104.09500,0.00472,0.00004,0.00238,0.00290,0.00715,0.05643,0.51700,0.03070,0.03530,0.04451,0.09211,0.02629,17.36600,1,0.640945,0.701404,-5.634576,0.306014,2.419253,0.209191
141
+ phon_R01_S33_6,116.15000,131.73100,109.81500,0.00381,0.00003,0.00181,0.00232,0.00542,0.03026,0.26700,0.01514,0.01812,0.02770,0.04543,0.01827,18.80100,1,0.624811,0.696049,-5.866357,0.233070,2.445646,0.184985
142
+ phon_R01_S34_1,170.36800,268.79600,79.54300,0.00571,0.00003,0.00232,0.00269,0.00696,0.03273,0.28100,0.01713,0.01964,0.02824,0.05139,0.02485,18.54000,1,0.677131,0.685057,-4.796845,0.397749,2.963799,0.277227
143
+ phon_R01_S34_2,208.08300,253.79200,91.80200,0.00757,0.00004,0.00428,0.00428,0.01285,0.06725,0.57100,0.04016,0.04003,0.04464,0.12047,0.04238,15.64800,1,0.606344,0.665945,-5.410336,0.288917,2.665133,0.231723
144
+ phon_R01_S34_3,198.45800,219.29000,148.69100,0.00376,0.00002,0.00182,0.00215,0.00546,0.03527,0.29700,0.02055,0.02076,0.02530,0.06165,0.01728,18.70200,1,0.606273,0.661735,-5.585259,0.310746,2.465528,0.209863
145
+ phon_R01_S34_4,202.80500,231.50800,86.23200,0.00370,0.00002,0.00189,0.00211,0.00568,0.01997,0.18000,0.01117,0.01177,0.01506,0.03350,0.02010,18.68700,1,0.536102,0.632631,-5.898673,0.213353,2.470746,0.189032
146
+ phon_R01_S34_5,202.54400,241.35000,164.16800,0.00254,0.00001,0.00100,0.00133,0.00301,0.02662,0.22800,0.01475,0.01558,0.02006,0.04426,0.01049,20.68000,1,0.497480,0.630409,-6.132663,0.220617,2.576563,0.159777
147
+ phon_R01_S34_6,223.36100,263.87200,87.63800,0.00352,0.00002,0.00169,0.00188,0.00506,0.02536,0.22500,0.01379,0.01478,0.01909,0.04137,0.01493,20.36600,1,0.566849,0.574282,-5.456811,0.345238,2.840556,0.232861
148
+ phon_R01_S35_1,169.77400,191.75900,151.45100,0.01568,0.00009,0.00863,0.00946,0.02589,0.08143,0.82100,0.03804,0.05426,0.08808,0.11411,0.07530,12.35900,1,0.561610,0.793509,-3.297668,0.414758,3.413649,0.457533
149
+ phon_R01_S35_2,183.52000,216.81400,161.34000,0.01466,0.00008,0.00849,0.00819,0.02546,0.06050,0.61800,0.02865,0.04101,0.06359,0.08595,0.06057,14.36700,1,0.478024,0.768974,-4.276605,0.355736,3.142364,0.336085
150
+ phon_R01_S35_3,188.62000,216.30200,165.98200,0.01719,0.00009,0.00996,0.01027,0.02987,0.07118,0.72200,0.03474,0.04580,0.06824,0.10422,0.08069,12.29800,1,0.552870,0.764036,-3.377325,0.335357,3.274865,0.418646
151
+ phon_R01_S35_4,202.63200,565.74000,177.25800,0.01627,0.00008,0.00919,0.00963,0.02756,0.07170,0.83300,0.03515,0.04265,0.06460,0.10546,0.07889,14.98900,1,0.427627,0.775708,-4.892495,0.262281,2.910213,0.270173
152
+ phon_R01_S35_5,186.69500,211.96100,149.44200,0.01872,0.00010,0.01075,0.01154,0.03225,0.05830,0.78400,0.02699,0.03714,0.06259,0.08096,0.10952,12.52900,1,0.507826,0.762726,-4.484303,0.340256,2.958815,0.301487
153
+ phon_R01_S35_6,192.81800,224.42900,168.79300,0.03107,0.00016,0.01800,0.01958,0.05401,0.11908,1.30200,0.05647,0.07940,0.13778,0.16942,0.21713,8.44100,1,0.625866,0.768320,-2.434031,0.450493,3.079221,0.527367
154
+ phon_R01_S35_7,198.11600,233.09900,174.47800,0.02714,0.00014,0.01568,0.01699,0.04705,0.08684,1.01800,0.04284,0.05556,0.08318,0.12851,0.16265,9.44900,1,0.584164,0.754449,-2.839756,0.356224,3.184027,0.454721
155
+ phon_R01_S37_1,121.34500,139.64400,98.25000,0.00684,0.00006,0.00388,0.00332,0.01164,0.02534,0.24100,0.01340,0.01399,0.02056,0.04019,0.04179,21.52000,1,0.566867,0.670475,-4.865194,0.246404,2.013530,0.168581
156
+ phon_R01_S37_2,119.10000,128.44200,88.83300,0.00692,0.00006,0.00393,0.00300,0.01179,0.02682,0.23600,0.01484,0.01405,0.02018,0.04451,0.04611,21.82400,1,0.651680,0.659333,-4.239028,0.175691,2.451130,0.247455
157
+ phon_R01_S37_3,117.87000,127.34900,95.65400,0.00647,0.00005,0.00356,0.00300,0.01067,0.03087,0.27600,0.01659,0.01804,0.02402,0.04977,0.02631,22.43100,1,0.628300,0.652025,-3.583722,0.207914,2.439597,0.206256
158
+ phon_R01_S37_4,122.33600,142.36900,94.79400,0.00727,0.00006,0.00415,0.00339,0.01246,0.02293,0.22300,0.01205,0.01289,0.01771,0.03615,0.03191,22.95300,1,0.611679,0.623731,-5.435100,0.230532,2.699645,0.220546
159
+ phon_R01_S37_5,117.96300,134.20900,100.75700,0.01813,0.00015,0.01117,0.00718,0.03351,0.04912,0.43800,0.02610,0.02161,0.02916,0.07830,0.10748,19.07500,1,0.630547,0.646786,-3.444478,0.303214,2.964568,0.261305
160
+ phon_R01_S37_6,126.14400,154.28400,97.54300,0.00975,0.00008,0.00593,0.00454,0.01778,0.02852,0.26600,0.01500,0.01581,0.02157,0.04499,0.03828,21.53400,1,0.635015,0.627337,-5.070096,0.280091,2.892300,0.249703
161
+ phon_R01_S39_1,127.93000,138.75200,112.17300,0.00605,0.00005,0.00321,0.00318,0.00962,0.03235,0.33900,0.01360,0.01650,0.03105,0.04079,0.02663,19.65100,1,0.654945,0.675865,-5.498456,0.234196,2.103014,0.216638
162
+ phon_R01_S39_2,114.23800,124.39300,77.02200,0.00581,0.00005,0.00299,0.00316,0.00896,0.04009,0.40600,0.01579,0.01994,0.04114,0.04736,0.02073,20.43700,1,0.653139,0.694571,-5.185987,0.259229,2.151121,0.244948
163
+ phon_R01_S39_3,115.32200,135.73800,107.80200,0.00619,0.00005,0.00352,0.00329,0.01057,0.03273,0.32500,0.01644,0.01722,0.02931,0.04933,0.02810,19.38800,1,0.577802,0.684373,-5.283009,0.226528,2.442906,0.238281
164
+ phon_R01_S39_4,114.55400,126.77800,91.12100,0.00651,0.00006,0.00366,0.00340,0.01097,0.03658,0.36900,0.01864,0.01940,0.03091,0.05592,0.02707,18.95400,1,0.685151,0.719576,-5.529833,0.242750,2.408689,0.220520
165
+ phon_R01_S39_5,112.15000,131.66900,97.52700,0.00519,0.00005,0.00291,0.00284,0.00873,0.01756,0.15500,0.00967,0.01033,0.01363,0.02902,0.01435,21.21900,1,0.557045,0.673086,-5.617124,0.184896,1.871871,0.212386
166
+ phon_R01_S39_6,102.27300,142.83000,85.90200,0.00907,0.00009,0.00493,0.00461,0.01480,0.02814,0.27200,0.01579,0.01553,0.02073,0.04736,0.03882,18.44700,1,0.671378,0.674562,-2.929379,0.396746,2.560422,0.367233
167
+ phon_R01_S42_1,236.20000,244.66300,102.13700,0.00277,0.00001,0.00154,0.00153,0.00462,0.02448,0.21700,0.01410,0.01426,0.01621,0.04231,0.00620,24.07800,0,0.469928,0.628232,-6.816086,0.172270,2.235197,0.119652
168
+ phon_R01_S42_2,237.32300,243.70900,229.25600,0.00303,0.00001,0.00173,0.00159,0.00519,0.01242,0.11600,0.00696,0.00747,0.00882,0.02089,0.00533,24.67900,0,0.384868,0.626710,-7.018057,0.176316,1.852402,0.091604
169
+ phon_R01_S42_3,260.10500,264.91900,237.30300,0.00339,0.00001,0.00205,0.00186,0.00616,0.02030,0.19700,0.01186,0.01230,0.01367,0.03557,0.00910,21.08300,0,0.440988,0.628058,-7.517934,0.160414,1.881767,0.075587
170
+ phon_R01_S42_4,197.56900,217.62700,90.79400,0.00803,0.00004,0.00490,0.00448,0.01470,0.02177,0.18900,0.01279,0.01272,0.01439,0.03836,0.01337,19.26900,0,0.372222,0.725216,-5.736781,0.164529,2.882450,0.202879
171
+ phon_R01_S42_5,240.30100,245.13500,219.78300,0.00517,0.00002,0.00316,0.00283,0.00949,0.02018,0.21200,0.01176,0.01191,0.01344,0.03529,0.00965,21.02000,0,0.371837,0.646167,-7.169701,0.073298,2.266432,0.100881
172
+ phon_R01_S42_6,244.99000,272.21000,239.17000,0.00451,0.00002,0.00279,0.00237,0.00837,0.01897,0.18100,0.01084,0.01121,0.01255,0.03253,0.01049,21.52800,0,0.522812,0.646818,-7.304500,0.171088,2.095237,0.096220
173
+ phon_R01_S43_1,112.54700,133.37400,105.71500,0.00355,0.00003,0.00166,0.00190,0.00499,0.01358,0.12900,0.00664,0.00786,0.01140,0.01992,0.00435,26.43600,0,0.413295,0.756700,-6.323531,0.218885,2.193412,0.160376
174
+ phon_R01_S43_2,110.73900,113.59700,100.13900,0.00356,0.00003,0.00170,0.00200,0.00510,0.01484,0.13300,0.00754,0.00950,0.01285,0.02261,0.00430,26.55000,0,0.369090,0.776158,-6.085567,0.192375,1.889002,0.174152
175
+ phon_R01_S43_3,113.71500,116.44300,96.91300,0.00349,0.00003,0.00171,0.00203,0.00514,0.01472,0.13300,0.00748,0.00905,0.01148,0.02245,0.00478,26.54700,0,0.380253,0.766700,-5.943501,0.192150,1.852542,0.179677
176
+ phon_R01_S43_4,117.00400,144.46600,99.92300,0.00353,0.00003,0.00176,0.00218,0.00528,0.01657,0.14500,0.00881,0.01062,0.01318,0.02643,0.00590,25.44500,0,0.387482,0.756482,-6.012559,0.229298,1.872946,0.163118
177
+ phon_R01_S43_5,115.38000,123.10900,108.63400,0.00332,0.00003,0.00160,0.00199,0.00480,0.01503,0.13700,0.00812,0.00933,0.01133,0.02436,0.00401,26.00500,0,0.405991,0.761255,-5.966779,0.197938,1.974857,0.184067
178
+ phon_R01_S43_6,116.38800,129.03800,108.97000,0.00346,0.00003,0.00169,0.00213,0.00507,0.01725,0.15500,0.00874,0.01021,0.01331,0.02623,0.00415,26.14300,0,0.361232,0.763242,-6.016891,0.109256,2.004719,0.174429
179
+ phon_R01_S44_1,151.73700,190.20400,129.85900,0.00314,0.00002,0.00135,0.00162,0.00406,0.01469,0.13200,0.00728,0.00886,0.01230,0.02184,0.00570,24.15100,1,0.396610,0.745957,-6.486822,0.197919,2.449763,0.132703
180
+ phon_R01_S44_2,148.79000,158.35900,138.99000,0.00309,0.00002,0.00152,0.00186,0.00456,0.01574,0.14200,0.00839,0.00956,0.01309,0.02518,0.00488,24.41200,1,0.402591,0.762508,-6.311987,0.182459,2.251553,0.160306
181
+ phon_R01_S44_3,148.14300,155.98200,135.04100,0.00392,0.00003,0.00204,0.00231,0.00612,0.01450,0.13100,0.00725,0.00876,0.01263,0.02175,0.00540,23.68300,1,0.398499,0.778349,-5.711205,0.240875,2.845109,0.192730
182
+ phon_R01_S44_4,150.44000,163.44100,144.73600,0.00396,0.00003,0.00206,0.00233,0.00619,0.02551,0.23700,0.01321,0.01574,0.02148,0.03964,0.00611,23.13300,1,0.352396,0.759320,-6.261446,0.183218,2.264226,0.144105
183
+ phon_R01_S44_5,148.46200,161.07800,141.99800,0.00397,0.00003,0.00202,0.00235,0.00605,0.01831,0.16300,0.00950,0.01103,0.01559,0.02849,0.00639,22.86600,1,0.408598,0.768845,-5.704053,0.216204,2.679185,0.197710
184
+ phon_R01_S44_6,149.81800,163.41700,144.78600,0.00336,0.00002,0.00174,0.00198,0.00521,0.02145,0.19800,0.01155,0.01341,0.01666,0.03464,0.00595,23.00800,1,0.329577,0.757180,-6.277170,0.109397,2.209021,0.156368
185
+ phon_R01_S49_1,117.22600,123.92500,106.65600,0.00417,0.00004,0.00186,0.00270,0.00558,0.01909,0.17100,0.00864,0.01223,0.01949,0.02592,0.00955,23.07900,0,0.603515,0.669565,-5.619070,0.191576,2.027228,0.215724
186
+ phon_R01_S49_2,116.84800,217.55200,99.50300,0.00531,0.00005,0.00260,0.00346,0.00780,0.01795,0.16300,0.00810,0.01144,0.01756,0.02429,0.01179,22.08500,0,0.663842,0.656516,-5.198864,0.206768,2.120412,0.252404
187
+ phon_R01_S49_3,116.28600,177.29100,96.98300,0.00314,0.00003,0.00134,0.00192,0.00403,0.01564,0.13600,0.00667,0.00990,0.01691,0.02001,0.00737,24.19900,0,0.598515,0.654331,-5.592584,0.133917,2.058658,0.214346
188
+ phon_R01_S49_4,116.55600,592.03000,86.22800,0.00496,0.00004,0.00254,0.00263,0.00762,0.01660,0.15400,0.00820,0.00972,0.01491,0.02460,0.01397,23.95800,0,0.566424,0.667654,-6.431119,0.153310,2.161936,0.120605
189
+ phon_R01_S49_5,116.34200,581.28900,94.24600,0.00267,0.00002,0.00115,0.00148,0.00345,0.01300,0.11700,0.00631,0.00789,0.01144,0.01892,0.00680,25.02300,0,0.528485,0.663884,-6.359018,0.116636,2.152083,0.138868
190
+ phon_R01_S49_6,114.56300,119.16700,86.64700,0.00327,0.00003,0.00146,0.00184,0.00439,0.01185,0.10600,0.00557,0.00721,0.01095,0.01672,0.00703,24.77500,0,0.555303,0.659132,-6.710219,0.149694,1.913990,0.121777
191
+ phon_R01_S50_1,201.77400,262.70700,78.22800,0.00694,0.00003,0.00412,0.00396,0.01235,0.02574,0.25500,0.01454,0.01582,0.01758,0.04363,0.04441,19.36800,0,0.508479,0.683761,-6.934474,0.159890,2.316346,0.112838
192
+ phon_R01_S50_2,174.18800,230.97800,94.26100,0.00459,0.00003,0.00263,0.00259,0.00790,0.04087,0.40500,0.02336,0.02498,0.02745,0.07008,0.02764,19.51700,0,0.448439,0.657899,-6.538586,0.121952,2.657476,0.133050
193
+ phon_R01_S50_3,209.51600,253.01700,89.48800,0.00564,0.00003,0.00331,0.00292,0.00994,0.02751,0.26300,0.01604,0.01657,0.01879,0.04812,0.01810,19.14700,0,0.431674,0.683244,-6.195325,0.129303,2.784312,0.168895
194
+ phon_R01_S50_4,174.68800,240.00500,74.28700,0.01360,0.00008,0.00624,0.00564,0.01873,0.02308,0.25600,0.01268,0.01365,0.01667,0.03804,0.10715,17.88300,0,0.407567,0.655683,-6.787197,0.158453,2.679772,0.131728
195
+ phon_R01_S50_5,198.76400,396.96100,74.90400,0.00740,0.00004,0.00370,0.00390,0.01109,0.02296,0.24100,0.01265,0.01321,0.01588,0.03794,0.07223,19.02000,0,0.451221,0.643956,-6.744577,0.207454,2.138608,0.123306
196
+ phon_R01_S50_6,214.28900,260.27700,77.97300,0.00567,0.00003,0.00295,0.00317,0.00885,0.01884,0.19000,0.01026,0.01161,0.01373,0.03078,0.04398,21.20900,0,0.462803,0.664357,-5.724056,0.190667,2.555477,0.148569
data_cache/obstetrics_fetal.csv ADDED
The diff for this file is too large to render. See raw diff
 
data_cache/oncology_cervical.csv ADDED
The diff for this file is too large to render. See raw diff
 
data_cache/ophthalmology.arff ADDED
The diff for this file is too large to render. See raw diff
 
data_cache/orthopaedics.arff ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @relation column_2C_weka
2
+
3
+ @attribute pelvic_incidence numeric
4
+ @attribute pelvic_tilt numeric
5
+ @attribute lumbar_lordosis_angle numeric
6
+ @attribute sacral_slope numeric
7
+ @attribute pelvic_radius numeric
8
+ @attribute degree_spondylolisthesis numeric
9
+
10
+ @attribute class {Abnormal, Normal}
11
+
12
+ @data
13
+ 63.0278175,22.55258597,39.60911701,40.47523153,98.67291675,-0.254399986,Abnormal
14
+ 39.05695098,10.06099147,25.01537822,28.99595951,114.4054254,4.564258645,Abnormal
15
+ 68.83202098,22.21848205,50.09219357,46.61353893,105.9851355,-3.530317314,Abnormal
16
+ 69.29700807,24.65287791,44.31123813,44.64413017,101.8684951,11.21152344,Abnormal
17
+ 49.71285934,9.652074879,28.317406,40.06078446,108.1687249,7.918500615,Abnormal
18
+ 40.25019968,13.92190658,25.1249496,26.32829311,130.3278713,2.230651729,Abnormal
19
+ 53.43292815,15.86433612,37.16593387,37.56859203,120.5675233,5.988550702,Abnormal
20
+ 45.36675362,10.75561143,29.03834896,34.61114218,117.2700675,-10.67587083,Abnormal
21
+ 43.79019026,13.5337531,42.69081398,30.25643716,125.0028927,13.28901817,Abnormal
22
+ 36.68635286,5.010884121,41.9487509,31.67546874,84.24141517,0.664437117,Abnormal
23
+ 49.70660953,13.04097405,31.33450009,36.66563548,108.6482654,-7.825985755,Abnormal
24
+ 31.23238734,17.71581923,15.5,13.51656811,120.0553988,0.499751446,Abnormal
25
+ 48.91555137,19.96455616,40.26379358,28.95099521,119.321358,8.028894629,Abnormal
26
+ 53.5721702,20.46082824,33.1,33.11134196,110.9666978,7.044802938,Abnormal
27
+ 57.30022656,24.1888846,46.99999999,33.11134196,116.8065868,5.766946943,Abnormal
28
+ 44.31890674,12.53799164,36.098763,31.78091509,124.1158358,5.415825143,Abnormal
29
+ 63.83498162,20.36250706,54.55243367,43.47247456,112.3094915,-0.622526643,Abnormal
30
+ 31.27601184,3.14466948,32.56299592,28.13134236,129.0114183,3.623020073,Abnormal
31
+ 38.69791243,13.44474904,31,25.25316339,123.1592507,1.429185758,Abnormal
32
+ 41.72996308,12.25407408,30.12258646,29.475889,116.5857056,-1.244402488,Abnormal
33
+ 43.92283983,14.17795853,37.8325467,29.7448813,134.4610156,6.451647637,Abnormal
34
+ 54.91944259,21.06233245,42.19999999,33.85711014,125.2127163,2.432561437,Abnormal
35
+ 63.07361096,24.41380271,53.99999999,38.65980825,106.4243295,15.77969683,Abnormal
36
+ 45.54078988,13.06959759,30.29832059,32.47119229,117.9808303,-4.987129618,Abnormal
37
+ 36.12568347,22.75875277,29,13.3669307,115.5771163,-3.237562489,Abnormal
38
+ 54.12492019,26.65048856,35.32974693,27.47443163,121.447011,1.571204816,Abnormal
39
+ 26.14792141,10.75945357,14,15.38846783,125.2032956,-10.09310817,Abnormal
40
+ 43.58096394,16.5088837,46.99999999,27.07208024,109.271634,8.992815727,Abnormal
41
+ 44.5510115,21.93114655,26.78591597,22.61986495,111.0729197,2.652320636,Abnormal
42
+ 66.87921138,24.89199889,49.27859673,41.9872125,113.4770183,-2.005891748,Abnormal
43
+ 50.81926781,15.40221253,42.52893886,35.41705528,112.192804,10.86956554,Abnormal
44
+ 46.39026008,11.07904664,32.13655345,35.31121344,98.77454633,6.386831648,Abnormal
45
+ 44.93667457,17.44383762,27.78057555,27.49283695,117.9803245,5.569619587,Abnormal
46
+ 38.66325708,12.98644139,39.99999999,25.67681568,124.914118,2.703008052,Abnormal
47
+ 59.59554032,31.99824445,46.56025198,27.59729587,119.3303537,1.474285836,Abnormal
48
+ 31.48421834,7.82622134,24.28481815,23.657997,113.8331446,4.393080498,Abnormal
49
+ 32.09098679,6.989378081,35.99819848,25.10160871,132.264735,6.413427708,Abnormal
50
+ 35.70345781,19.44325311,20.7,16.26020471,137.5406125,-0.263489651,Abnormal
51
+ 55.84328595,28.84744756,47.69054322,26.99583839,123.3118449,2.812426855,Abnormal
52
+ 52.41938511,19.01156052,35.87265953,33.40782459,116.5597709,1.694705102,Abnormal
53
+ 35.49244617,11.7016723,15.59036345,23.79077387,106.9388517,-3.460357991,Abnormal
54
+ 46.44207842,8.39503589,29.0372302,38.04704253,115.4814047,2.045475795,Abnormal
55
+ 53.85479842,19.23064334,32.77905978,34.62415508,121.6709148,5.329843204,Abnormal
56
+ 66.28539377,26.32784484,47.49999999,39.95754893,121.2196839,-0.799624469,Abnormal
57
+ 56.03021778,16.2979149,62.27527456,39.73230287,114.0231172,-2.325683841,Abnormal
58
+ 50.91244034,23.01516931,46.99999999,27.89727103,117.4222591,-2.526701511,Abnormal
59
+ 48.332638,22.22778399,36.18199318,26.10485401,117.3846251,6.481709096,Abnormal
60
+ 41.35250407,16.57736351,30.70619135,24.77514057,113.2666746,-4.497957556,Abnormal
61
+ 40.55735663,17.97778407,34,22.57957256,121.0462458,-1.537383074,Abnormal
62
+ 41.76773173,17.89940172,20.0308863,23.86833001,118.3633889,2.062962549,Abnormal
63
+ 55.28585178,20.44011836,34,34.84573342,115.8770174,3.558372358,Abnormal
64
+ 74.43359316,41.55733141,27.7,32.87626175,107.9493045,5.000088788,Abnormal
65
+ 50.20966979,29.76012218,36.10400731,20.44954761,128.2925148,5.740614083,Abnormal
66
+ 30.14993632,11.91744524,34,18.23249108,112.6841408,11.46322327,Abnormal
67
+ 41.17167989,17.32120599,33.46940277,23.85047391,116.3778894,-9.569249858,Abnormal
68
+ 47.65772963,13.27738491,36.67998541,34.38034472,98.24978071,6.273012173,Abnormal
69
+ 43.34960621,7.467468964,28.06548279,35.88213725,112.7761866,5.753277458,Abnormal
70
+ 46.85578065,15.35151393,38,31.50426672,116.2509174,1.662705589,Abnormal
71
+ 43.20318499,19.66314572,35,23.54003927,124.8461088,-2.919075955,Abnormal
72
+ 48.10923638,14.93072472,35.56468278,33.17851166,124.0564518,7.947904861,Abnormal
73
+ 74.37767772,32.05310438,78.77201304,42.32457334,143.5606905,56.12590603,Abnormal
74
+ 89.68056731,32.70443487,83.13073216,56.97613244,129.9554764,92.02727682,Abnormal
75
+ 44.529051,9.433234213,51.99999999,35.09581679,134.7117723,29.10657504,Abnormal
76
+ 77.69057712,21.38064464,64.42944191,56.30993248,114.818751,26.93184095,Abnormal
77
+ 76.1472121,21.93618556,82.96150249,54.21102654,123.9320096,10.43197194,Abnormal
78
+ 83.93300857,41.28630543,61.99999999,42.64670314,115.012334,26.58810016,Abnormal
79
+ 78.49173027,22.1817978,59.99999999,56.30993248,118.5303266,27.38321314,Abnormal
80
+ 75.64973136,19.33979889,64.14868477,56.30993248,95.9036288,69.55130292,Abnormal
81
+ 72.07627839,18.94617604,50.99999999,53.13010236,114.2130126,1.01004051,Abnormal
82
+ 58.59952852,-0.261499046,51.49999999,58.86102756,102.0428116,28.05969711,Abnormal
83
+ 72.56070163,17.38519079,51.99999999,55.17551084,119.1937238,32.10853735,Abnormal
84
+ 86.90079431,32.9281677,47.79434664,53.97262661,135.0753635,101.7190919,Abnormal
85
+ 84.97413208,33.02117462,60.85987263,51.95295747,125.6595336,74.33340864,Abnormal
86
+ 55.512212,20.09515673,43.99999999,35.41705528,122.648753,34.55294641,Abnormal
87
+ 72.2223343,23.07771056,90.99999999,49.14462374,137.7366546,56.80409277,Abnormal
88
+ 70.22145219,39.82272448,68.11840309,30.39872771,148.5255624,145.3781432,Abnormal
89
+ 86.75360946,36.04301632,69.22104479,50.71059314,139.414504,110.8607824,Abnormal
90
+ 58.78254775,7.667044186,53.33894082,51.11550357,98.50115697,51.58412476,Abnormal
91
+ 67.41253785,17.44279712,60.14464036,49.96974073,111.12397,33.15764573,Abnormal
92
+ 47.74467877,12.08935067,38.99999999,35.6553281,117.5120039,21.68240136,Abnormal
93
+ 77.10657122,30.46999418,69.48062839,46.63657704,112.1516,70.75908308,Abnormal
94
+ 74.00554124,21.12240192,57.37950226,52.88313932,120.2059626,74.55516588,Abnormal
95
+ 88.62390839,29.08945331,47.56426247,59.53445508,121.7647796,51.80589921,Abnormal
96
+ 81.10410039,24.79416792,77.88702048,56.30993247,151.8398566,65.21461611,Abnormal
97
+ 76.32600187,42.39620445,57.19999999,33.92979742,124.267007,50.12745689,Abnormal
98
+ 45.44374959,9.906071798,44.99999999,35.53767779,163.0710405,20.31531532,Abnormal
99
+ 59.78526526,17.87932332,59.20646143,41.90594194,119.3191109,22.12386874,Abnormal
100
+ 44.91414916,10.21899563,44.63091389,34.69515353,130.0756599,37.36453993,Abnormal
101
+ 56.60577127,16.80020017,41.99999999,39.80557109,127.2945222,24.0185747,Abnormal
102
+ 71.18681115,23.89620111,43.6966651,47.29061004,119.8649383,27.28398451,Abnormal
103
+ 81.65603206,28.74886935,58.23282055,52.9071627,114.7698556,30.60914842,Abnormal
104
+ 70.95272771,20.15993121,62.85910914,50.7927965,116.1779325,32.522331,Abnormal
105
+ 85.35231529,15.84491006,71.66865979,69.50740523,124.4197875,76.0206034,Abnormal
106
+ 58.10193455,14.83763914,79.64983825,43.26429541,113.5876551,50.23787808,Abnormal
107
+ 94.17482232,15.38076983,67.70572132,78.79405249,114.8901128,53.25522004,Abnormal
108
+ 57.52235608,33.64707522,50.90985841,23.87528085,140.9817119,148.7537109,Abnormal
109
+ 96.65731511,19.46158117,90.21149828,77.19573393,120.6730408,64.08099841,Abnormal
110
+ 74.72074622,19.75694203,82.73535954,54.96380419,109.3565941,33.30606685,Abnormal
111
+ 77.65511874,22.4329501,93.89277881,55.22216863,123.0557067,61.2111866,Abnormal
112
+ 58.52162283,13.92228609,41.46785522,44.59933674,115.514798,30.3879839,Abnormal
113
+ 84.5856071,30.36168482,65.47948563,54.22392228,108.0102185,25.11847846,Abnormal
114
+ 79.93857026,18.7740711,63.31183486,61.16449915,114.787107,38.53874133,Abnormal
115
+ 70.39930842,13.46998624,61.19999999,56.92932218,102.3375244,25.53842852,Abnormal
116
+ 49.78212054,6.46680486,52.99999999,43.31531568,110.8647831,25.33564729,Abnormal
117
+ 77.40933294,29.39654543,63.23230243,48.0127875,118.4507311,93.56373734,Abnormal
118
+ 65.00796426,27.60260762,50.94751899,37.40535663,116.5811088,7.015977884,Abnormal
119
+ 65.01377322,9.838262375,57.73583722,55.17551084,94.73852542,49.69695462,Abnormal
120
+ 78.42595126,33.42595126,76.27743927,45,138.5541111,77.15517241,Abnormal
121
+ 63.17298709,6.330910974,62.99999999,56.84207612,110.6440206,42.60807567,Abnormal
122
+ 68.61300092,15.0822353,63.01469619,53.53076561,123.4311742,39.49798659,Abnormal
123
+ 63.90063261,13.7062037,62.12433389,50.19442891,114.1292425,41.42282844,Abnormal
124
+ 84.99895554,29.61009772,83.35219438,55.38885782,126.9129899,71.32117542,Abnormal
125
+ 42.02138603,-6.554948347,67.89999999,48.57633437,111.5857819,27.33867086,Abnormal
126
+ 69.75666532,19.27929659,48.49999999,50.47736873,96.49136982,51.1696403,Abnormal
127
+ 80.98807441,36.84317181,86.96060151,44.1449026,141.0881494,85.87215224,Abnormal
128
+ 129.8340406,8.404475005,48.38405705,121.4295656,107.690466,418.5430821,Abnormal
129
+ 70.48410444,12.48948765,62.41714208,57.99461679,114.1900488,56.90244779,Abnormal
130
+ 86.04127982,38.75066978,47.87140494,47.29061004,122.0929536,61.98827709,Abnormal
131
+ 65.53600255,24.15748726,45.77516991,41.3785153,136.4403015,16.37808564,Abnormal
132
+ 60.7538935,15.7538935,43.19915768,45,113.0533309,31.69354839,Abnormal
133
+ 54.74177518,12.09507205,40.99999999,42.64670314,117.6432188,40.3823266,Abnormal
134
+ 83.87994081,23.07742686,87.14151223,60.80251395,124.6460723,80.55560527,Abnormal
135
+ 80.07491418,48.06953097,52.40343873,32.00538321,110.7099121,67.72731595,Abnormal
136
+ 65.66534698,10.54067533,56.48913545,55.12467166,109.1627768,53.93202006,Abnormal
137
+ 74.71722805,14.32167879,32.5,60.39554926,107.1822176,37.01708012,Abnormal
138
+ 48.06062649,5.687032126,57.05716117,42.37359436,95.44375749,32.83587702,Abnormal
139
+ 70.67689818,21.70440224,59.18116082,48.97249594,103.0083545,27.8101478,Abnormal
140
+ 80.43342782,16.998479,66.53601753,63.43494882,116.4389807,57.78125,Abnormal
141
+ 90.51396072,28.27250132,69.8139423,62.2414594,100.8921596,58.82364821,Abnormal
142
+ 77.23689752,16.73762214,49.77553438,60.49927538,110.6903772,39.7871542,Abnormal
143
+ 50.06678595,9.120340183,32.16846267,40.94644577,99.71245318,26.76669655,Abnormal
144
+ 69.78100617,13.77746531,57.99999999,56.00354085,118.9306656,17.91456046,Abnormal
145
+ 69.62628302,21.12275138,52.76659472,48.50353164,116.8030913,54.81686729,Abnormal
146
+ 81.75441933,20.12346562,70.56044038,61.63095371,119.4250857,55.50688907,Abnormal
147
+ 52.20469309,17.21267289,78.09496877,34.9920202,136.9725168,54.93913416,Abnormal
148
+ 77.12134424,30.3498745,77.48108264,46.77146974,110.6111484,82.09360704,Abnormal
149
+ 88.0244989,39.84466878,81.77447308,48.17983012,116.6015376,56.76608323,Abnormal
150
+ 83.39660609,34.31098931,78.42329287,49.08561678,110.4665164,49.67209559,Abnormal
151
+ 72.05403412,24.70073725,79.87401586,47.35329687,107.1723576,56.42615873,Abnormal
152
+ 85.09550254,21.06989651,91.73479193,64.02560604,109.062312,38.03283108,Abnormal
153
+ 69.56348614,15.4011391,74.43849743,54.16234705,105.0673556,29.70121083,Abnormal
154
+ 89.5049473,48.90365265,72.0034229,40.60129465,134.6342912,118.3533701,Abnormal
155
+ 85.29017283,18.27888963,100.7442198,67.0112832,110.6607005,58.88494802,Abnormal
156
+ 60.62621697,20.5959577,64.53526221,40.03025927,117.2255542,104.8592474,Abnormal
157
+ 60.04417717,14.30965614,58.03886519,45.73452103,105.1316639,30.40913315,Abnormal
158
+ 85.64378664,42.68919513,78.7506635,42.95459151,105.1440758,42.88742577,Abnormal
159
+ 85.58171024,30.45703858,78.23137949,55.12467166,114.8660487,68.37612182,Abnormal
160
+ 55.08076562,-3.759929872,55.99999999,58.84069549,109.9153669,31.77358318,Abnormal
161
+ 65.75567895,9.832874231,50.82289501,55.92280472,104.3949585,39.30721246,Abnormal
162
+ 79.24967118,23.94482471,40.79669829,55.30484647,98.62251165,36.7063954,Abnormal
163
+ 81.11260488,20.69044356,60.68700588,60.42216132,94.01878339,40.51098228,Abnormal
164
+ 48.0306238,3.969814743,58.34451924,44.06080905,125.3509625,35.00007784,Abnormal
165
+ 63.40448058,14.11532726,48.13680562,49.28915333,111.9160075,31.78449499,Abnormal
166
+ 57.28694488,15.1493501,63.99999999,42.13759477,116.7353868,30.34120327,Abnormal
167
+ 41.18776972,5.792973871,42.86739151,35.39479584,103.3488802,27.66027669,Abnormal
168
+ 66.80479632,14.55160171,72.08491177,52.25319461,82.45603817,41.6854736,Abnormal
169
+ 79.4769781,26.73226755,70.65098189,52.74471055,118.5886691,61.70059824,Abnormal
170
+ 44.21646446,1.507074501,46.11033909,42.70938996,108.6295666,42.81048066,Abnormal
171
+ 57.03509717,0.34572799,49.19800263,56.68936918,103.0486975,52.16514503,Abnormal
172
+ 64.27481758,12.50864276,68.70237672,51.76617482,95.25245421,39.40982612,Abnormal
173
+ 92.02630795,35.39267395,77.41696348,56.633634,115.72353,58.05754155,Abnormal
174
+ 67.26314926,7.194661096,51.69688681,60.06848816,97.8010854,42.13694325,Abnormal
175
+ 118.1446548,38.44950127,50.83851954,79.69515353,81.0245406,74.04376736,Abnormal
176
+ 115.9232606,37.51543601,76.79999999,78.40782459,104.6986033,81.19892712,Abnormal
177
+ 53.94165809,9.306594428,43.10049819,44.63506366,124.3978211,25.0821266,Abnormal
178
+ 83.7031774,20.26822858,77.1105979,63.43494882,125.4801739,69.279571,Abnormal
179
+ 56.99140382,6.87408897,57.00900516,50.11731485,109.978045,36.81011057,Abnormal
180
+ 72.34359434,16.42078962,59.86901238,55.92280472,70.08257486,12.07264427,Abnormal
181
+ 95.38259648,24.82263131,95.15763273,70.55996517,89.3075466,57.66084135,Abnormal
182
+ 44.25347645,1.101086714,38,43.15238973,98.27410705,23.9106354,Abnormal
183
+ 64.80954139,15.17407796,58.83999352,49.63546343,111.679961,21.40719845,Abnormal
184
+ 78.40125389,14.04225971,79.69426258,64.35899418,104.7312342,12.39285327,Abnormal
185
+ 56.66829282,13.45820343,43.76970978,43.21008939,93.69220863,21.10812135,Abnormal
186
+ 50.82502875,9.064729049,56.29999999,41.7602997,78.99945411,23.04152435,Abnormal
187
+ 61.41173702,25.38436364,39.09686927,36.02737339,103.4045971,21.84340688,Abnormal
188
+ 56.56382381,8.961261611,52.57784639,47.6025622,98.77711506,50.70187326,Abnormal
189
+ 67.02766447,13.28150221,66.15040334,53.74616226,100.7154129,33.98913551,Abnormal
190
+ 80.81777144,19.23898066,61.64245116,61.57879078,89.47183446,44.167602,Abnormal
191
+ 80.65431956,26.34437939,60.89811835,54.30994017,120.1034928,52.46755185,Abnormal
192
+ 68.72190982,49.4318636,68.0560124,19.29004622,125.0185168,54.69128928,Abnormal
193
+ 37.90391014,4.47909896,24.71027447,33.42481118,157.848799,33.60702661,Abnormal
194
+ 64.62400798,15.22530262,67.63216653,49.39870535,90.298468,31.32641123,Abnormal
195
+ 75.43774787,31.53945399,89.59999999,43.89829388,106.8295898,54.96578902,Abnormal
196
+ 71.00194076,37.51577195,84.53709256,33.48616882,125.1642324,67.77118983,Abnormal
197
+ 81.05661087,20.80149217,91.78449512,60.2551187,125.430176,38.18178176,Abnormal
198
+ 91.46874146,24.50817744,84.62027202,66.96056402,117.3078968,52.62304673,Abnormal
199
+ 81.08232025,21.25584028,78.76675639,59.82647997,90.07187999,49.159426,Abnormal
200
+ 60.419932,5.265665422,59.8142356,55.15426658,109.0330745,30.26578534,Abnormal
201
+ 85.68094951,38.65003527,82.68097744,47.03091424,120.8407069,61.95903428,Abnormal
202
+ 82.4065243,29.27642195,77.05456489,53.13010235,117.0422439,62.76534831,Abnormal
203
+ 43.7182623,9.811985315,51.99999999,33.90627699,88.43424213,40.88092253,Abnormal
204
+ 86.472905,40.30376567,61.14101155,46.16913933,97.4041888,55.75222146,Abnormal
205
+ 74.46908181,33.28315665,66.94210105,41.18592517,146.4660009,124.9844057,Abnormal
206
+ 70.25043628,10.34012252,76.37007032,59.91031376,119.2370072,32.66650243,Abnormal
207
+ 72.64385013,18.92911726,67.99999999,53.71473287,116.9634162,25.38424676,Abnormal
208
+ 71.24176388,5.268270454,85.99958417,65.97349342,110.703107,38.2598637,Abnormal
209
+ 63.7723908,12.76338484,65.36052425,51.00900596,89.82274067,55.99545386,Abnormal
210
+ 58.82837872,37.57787321,125.7423855,21.25050551,135.6294176,117.3146829,Abnormal
211
+ 74.85448008,13.90908417,62.69325884,60.9453959,115.2087008,33.17225512,Abnormal
212
+ 75.29847847,16.67148361,61.29620362,58.62699486,118.8833881,31.57582292,Abnormal
213
+ 63.36433898,20.02462134,67.49870507,43.33971763,130.9992576,37.55670552,Abnormal
214
+ 67.51305267,33.2755899,96.28306169,34.23746278,145.6010328,88.30148594,Abnormal
215
+ 76.31402766,41.93368293,93.2848628,34.38034472,132.2672855,101.2187828,Abnormal
216
+ 73.63596236,9.711317947,62.99999999,63.92464442,98.72792982,26.97578722,Abnormal
217
+ 56.53505139,14.37718927,44.99154663,42.15786212,101.7233343,25.77317356,Abnormal
218
+ 80.11157156,33.94243223,85.10160773,46.16913933,125.5936237,100.2921068,Abnormal
219
+ 95.48022873,46.55005318,58.99999999,48.93017555,96.68390337,77.28307195,Abnormal
220
+ 74.09473084,18.82372712,76.03215571,55.27100372,128.4057314,73.38821617,Abnormal
221
+ 87.67908663,20.36561331,93.82241589,67.31347333,120.9448288,76.73062904,Abnormal
222
+ 48.25991962,16.41746236,36.32913708,31.84245726,94.88233607,28.34379914,Abnormal
223
+ 38.50527283,16.96429691,35.11281407,21.54097592,127.6328747,7.986683227,Normal
224
+ 54.92085752,18.96842952,51.60145541,35.952428,125.8466462,2.001642472,Normal
225
+ 44.36249017,8.945434892,46.90209626,35.41705528,129.220682,4.994195288,Normal
226
+ 48.3189305,17.45212105,47.99999999,30.86680945,128.9803079,-0.910940567,Normal
227
+ 45.70178875,10.65985935,42.5778464,35.0419294,130.1783144,-3.38890999,Normal
228
+ 30.74193812,13.35496594,35.90352597,17.38697218,142.4101072,-2.005372903,Normal
229
+ 50.91310144,6.6769999,30.89652243,44.23610154,118.151531,-1.057985526,Normal
230
+ 38.12658854,6.557617408,50.44507473,31.56897113,132.114805,6.338199339,Normal
231
+ 51.62467183,15.96934373,35,35.6553281,129.385308,1.00922834,Normal
232
+ 64.31186727,26.32836901,50.95896417,37.98349826,106.1777511,3.118221289,Normal
233
+ 44.48927476,21.78643263,31.47415392,22.70284212,113.7784936,-0.284129366,Normal
234
+ 54.9509702,5.865353416,52.99999999,49.08561678,126.9703283,-0.631602951,Normal
235
+ 56.10377352,13.10630665,62.63701952,42.99746687,116.2285032,31.17276727,Normal
236
+ 69.3988184,18.89840693,75.96636144,50.50041147,103.5825398,-0.44366081,Normal
237
+ 89.83467631,22.63921678,90.56346144,67.19545953,100.5011917,3.040973261,Normal
238
+ 59.72614016,7.724872599,55.34348527,52.00126756,125.1742214,3.235159224,Normal
239
+ 63.95952166,16.06094486,63.12373633,47.8985768,142.3601245,6.298970934,Normal
240
+ 61.54059876,19.67695713,52.89222856,41.86364163,118.6862678,4.815031084,Normal
241
+ 38.04655072,8.30166942,26.23683004,29.7448813,123.8034132,3.885773488,Normal
242
+ 43.43645061,10.09574326,36.03222439,33.34070735,137.4396942,-3.114450861,Normal
243
+ 65.61180231,23.13791922,62.58217893,42.47388309,124.1280012,-4.083298414,Normal
244
+ 53.91105429,12.93931796,38.99999999,40.97173633,118.1930354,5.074353176,Normal
245
+ 43.11795103,13.81574355,40.34738779,29.30220748,128.5177217,0.970926407,Normal
246
+ 40.6832291,9.148437195,31.02159252,31.53479191,139.1184721,-2.511618596,Normal
247
+ 37.7319919,9.386298276,41.99999999,28.34569362,135.740926,13.68304672,Normal
248
+ 63.92947003,19.97109671,40.17704963,43.95837332,113.0659387,-11.05817866,Normal
249
+ 61.82162717,13.59710457,63.99999999,48.22452261,121.779803,1.296191194,Normal
250
+ 62.14080535,13.96097523,57.99999999,48.17983012,133.2818339,4.955105669,Normal
251
+ 69.00491277,13.29178975,55.5701429,55.71312302,126.6116215,10.83201105,Normal
252
+ 56.44702568,19.44449915,43.5778464,37.00252653,139.1896903,-1.859688529,Normal
253
+ 41.6469159,8.835549101,36.03197484,32.8113668,116.5551679,-6.054537956,Normal
254
+ 51.52935759,13.51784732,35,38.01151027,126.7185156,13.92833085,Normal
255
+ 39.08726449,5.536602477,26.93203835,33.55066201,131.5844199,-0.75946135,Normal
256
+ 34.64992241,7.514782784,42.99999999,27.13513962,123.9877408,-4.082937601,Normal
257
+ 63.02630005,27.33624023,51.60501665,35.69005983,114.5066078,7.439869802,Normal
258
+ 47.80555887,10.68869819,53.99999999,37.11686068,125.3911378,-0.402523218,Normal
259
+ 46.63786363,15.85371711,39.99999999,30.78414653,119.3776026,9.06458168,Normal
260
+ 49.82813487,16.73643493,28,33.09169994,121.4355585,1.91330704,Normal
261
+ 47.31964755,8.573680295,35.56025198,38.74596726,120.5769719,1.630663508,Normal
262
+ 50.75329025,20.23505957,37,30.51823068,122.343516,2.288487746,Normal
263
+ 36.15782981,-0.810514093,33.62731353,36.96834391,135.9369096,-2.092506504,Normal
264
+ 40.74699612,1.835524271,49.99999999,38.91147185,139.2471502,0.668556793,Normal
265
+ 42.91804052,-5.845994341,57.99999999,48.76403486,121.6068586,-3.362044654,Normal
266
+ 63.79242525,21.34532339,65.99999999,42.44710185,119.5503909,12.38260373,Normal
267
+ 72.95564397,19.57697146,61.00707117,53.37867251,111.2340468,0.813491154,Normal
268
+ 67.53818154,14.65504222,58.00142908,52.88313932,123.6322597,25.9702063,Normal
269
+ 54.75251965,9.752519649,47.99999999,45,123.0379985,8.235294118,Normal
270
+ 50.16007802,-2.970024337,41.99999999,53.13010235,131.8024914,-8.290203373,Normal
271
+ 40.34929637,10.19474845,37.96774659,30.15454792,128.0099272,0.458901373,Normal
272
+ 63.61919213,16.93450781,49.34926218,46.68468432,117.0897469,-0.357811974,Normal
273
+ 54.14240778,11.93511014,42.99999999,42.20729763,122.2090834,0.153549242,Normal
274
+ 74.97602148,14.92170492,53.73007172,60.05431656,105.6453997,1.594747729,Normal
275
+ 42.51727249,14.37567126,25.32356538,28.14160123,128.9056892,0.75702014,Normal
276
+ 33.78884314,3.675109986,25.5,30.11373315,128.3253556,-1.776111234,Normal
277
+ 54.5036853,6.819910138,46.99999999,47.68377516,111.7911722,-4.406769011,Normal
278
+ 48.17074627,9.594216702,39.71092029,38.57652956,135.6233101,5.360050572,Normal
279
+ 46.37408781,10.21590237,42.69999999,36.15818544,121.2476572,-0.54202201,Normal
280
+ 52.86221391,9.410371613,46.98805181,43.4518423,123.0912395,1.856659161,Normal
281
+ 57.1458515,16.48909145,42.84214764,40.65676005,113.8061775,5.0151857,Normal
282
+ 37.14014978,16.48123972,24,20.65891006,125.0143609,7.366425398,Normal
283
+ 51.31177106,8.875541276,56.99999999,42.43622979,126.4722584,-2.144043911,Normal
284
+ 42.51561014,16.54121618,41.99999999,25.97439396,120.631941,7.876730692,Normal
285
+ 39.35870531,7.011261806,37,32.3474435,117.8187599,1.904048199,Normal
286
+ 35.8775708,1.112373561,43.45725694,34.76519724,126.9239062,-1.632238263,Normal
287
+ 43.1919153,9.976663803,28.93814927,33.21525149,123.4674001,1.741017579,Normal
288
+ 67.28971201,16.7175142,50.99999999,50.5721978,137.5917777,4.960343813,Normal
289
+ 51.32546366,13.63122319,33.25857782,37.69424047,131.3061224,1.78886965,Normal
290
+ 65.7563482,13.20692644,43.99999999,52.54942177,129.3935728,-1.982120038,Normal
291
+ 40.41336566,-1.329412398,30.98276809,41.74277806,119.3356546,-6.173674823,Normal
292
+ 48.80190855,18.01776202,51.99999999,30.78414653,139.1504066,10.44286169,Normal
293
+ 50.08615264,13.43004422,34.45754051,36.65610842,119.1346221,3.089484465,Normal
294
+ 64.26150724,14.49786554,43.90250363,49.76364169,115.3882683,5.951454368,Normal
295
+ 53.68337998,13.44702168,41.58429713,40.23635831,113.9137026,2.737035292,Normal
296
+ 48.99595771,13.11382047,51.87351997,35.88213725,126.3981876,0.535471617,Normal
297
+ 59.16761171,14.56274875,43.19915768,44.60486296,121.0356423,2.830504124,Normal
298
+ 67.80469442,16.55066167,43.25680184,51.25403274,119.6856451,4.867539941,Normal
299
+ 61.73487533,17.11431203,46.89999999,44.6205633,120.9201997,3.087725997,Normal
300
+ 33.04168754,-0.324678459,19.0710746,33.366366,120.3886112,9.354364925,Normal
301
+ 74.56501543,15.72431994,58.61858244,58.84069549,105.417304,0.599247113,Normal
302
+ 44.43070103,14.17426387,32.2434952,30.25643716,131.7176127,-3.604255336,Normal
303
+ 36.42248549,13.87942449,20.24256187,22.543061,126.0768612,0.179717077,Normal
304
+ 51.07983294,14.20993529,35.95122893,36.86989765,115.8037111,6.905089963,Normal
305
+ 34.75673809,2.631739646,29.50438112,32.12499844,127.1398495,-0.460894198,Normal
306
+ 48.90290434,5.587588658,55.49999999,43.31531568,137.1082886,19.85475919,Normal
307
+ 46.23639915,10.0627701,37,36.17362905,128.0636203,-5.100053328,Normal
308
+ 46.42636614,6.620795049,48.09999999,39.80557109,130.3500956,2.449382401,Normal
309
+ 39.65690201,16.20883944,36.67485694,23.44806258,131.922009,-4.968979881,Normal
310
+ 45.57548229,18.75913544,33.77414297,26.81634684,116.7970069,3.131909921,Normal
311
+ 66.50717865,20.89767207,31.72747138,45.60950658,128.9029049,1.517203356,Normal
312
+ 82.90535054,29.89411893,58.25054221,53.01123161,110.7089577,6.079337831,Normal
313
+ 50.67667667,6.461501271,35,44.2151754,116.5879699,-0.214710615,Normal
314
+ 89.01487529,26.07598143,69.02125897,62.93889386,111.4810746,6.061508401,Normal
315
+ 54.60031622,21.48897426,29.36021618,33.11134196,118.3433212,-1.471067262,Normal
316
+ 34.38229939,2.062682882,32.39081996,32.31961651,128.3001991,-3.365515555,Normal
317
+ 45.07545026,12.30695118,44.58317718,32.76849908,147.8946372,-8.941709421,Normal
318
+ 47.90356517,13.61668819,36,34.28687698,117.4490622,-4.245395422,Normal
319
+ 53.93674778,20.72149628,29.22053381,33.21525149,114.365845,-0.421010392,Normal
320
+ 61.44659663,22.6949683,46.17034732,38.75162833,125.6707246,-2.707879517,Normal
321
+ 45.25279209,8.693157364,41.5831264,36.55963472,118.5458418,0.214750167,Normal
322
+ 33.84164075,5.073991409,36.64123294,28.76764934,123.9452436,-0.199249089,Normal
data_cache/pharmacy_readmission.csv ADDED
The diff for this file is too large to render. See raw diff
 
data_cache/pulmonology_copd.csv ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ age,sex,smoking_pack_years,fev1_litres,fvc_litres,fev1_fvc_ratio,bmi,mrc_dyspnea_scale,sgrq_score,copd_gold_stage,exacerbation
2
+ 77,1,60.0,1.21,2.4,0.504,,,69.55,3,1
3
+ 79,0,50.0,1.09,1.64,0.665,,,44.24,2,0
4
+ 80,0,11.0,1.52,2.3,0.661,,,44.09,2,0
5
+ 56,1,60.0,0.47,1.14,0.412,,,62.04,4,1
6
+ 65,1,68.0,1.07,2.91,0.368,,,75.56,3,1
7
+ 67,0,26.0,1.09,1.99,0.548,,,73.82,2,0
8
+ 67,0,50.0,0.69,1.31,0.527,,,77.44,3,1
9
+ 83,1,90.0,0.68,2.23,0.305,,,45.41,3,1
10
+ 72,1,50.0,2.13,4.38,0.486,,,69.61,2,0
11
+ 75,0,6.0,1.06,2.06,0.515,,,55.56,3,1
12
+ 76,0,6.0,1.1,2.06,0.534,,,55.56,3,1
13
+ 59,0,28.0,0.68,2.02,0.337,,,55.23,4,1
14
+ 64,1,30.0,0.45,1.56,0.288,,,50.53,4,1
15
+ 74,0,75.0,1.79,2.62,0.683,,,45.0,1,0
16
+ 70,0,103.0,1.2,2.09,0.574,,,39.66,2,0
17
+ 71,0,105.0,0.72,2.09,0.344,,,39.66,2,0
18
+ 69,1,78.0,1.46,3.33,0.438,,,28.86,3,1
19
+ 55,0,109.0,1.54,2.15,0.716,,,76.5,2,0
20
+ 72,1,15.0,0.6,1.81,0.331,,,38.74,4,1
21
+ 72,1,15.0,0.89,1.81,0.492,,,38.74,4,1
22
+ 74,0,24.0,0.51,2.06,0.248,,,71.21,4,1
23
+ 75,1,40.0,0.79,1.81,0.436,,,35.79,4,1
24
+ 69,0,15.0,0.91,2.9,0.314,,,58.78,3,1
25
+ 73,1,75.0,1.46,2.37,0.616,,,34.71,3,1
26
+ 75,1,45.0,2.35,4.12,0.57,,,58.25,1,0
27
+ 80,1,67.0,1.77,2.77,0.639,,,67.66,2,0
28
+ 76,1,38.0,1.06,3.11,0.341,,,56.8,3,1
29
+ 73,1,31.0,1.88,2.71,0.694,,,66.51,2,0
30
+ 77,1,75.0,1.92,2.66,0.722,,,36.39,2,0
31
+ 88,1,1.0,1.3,2.0,0.65,,,47.2,3,1
32
+ 44,1,30.0,1.66,3.08,0.539,,,72.24,3,1
33
+ 82,1,45.0,1.18,2.57,0.459,,,37.04,3,1
34
+ 73,1,38.0,1.86,3.69,0.504,,,35.81,2,0
35
+ 64,0,40.0,1.81,3.24,0.559,,,27.27,2,0
36
+ 76,1,23.0,2.01,3.63,0.554,,,37.71,2,0
37
+ 83,1,11.0,1.11,2.04,0.544,,,25.34,3,1
38
+ 65,0,66.0,2.0,3.35,0.597,,,25.02,2,0
39
+ 74,1,64.0,2.37,4.7,0.504,,,43.57,2,0
40
+ 70,1,50.0,1.07,2.91,0.368,,,64.68,3,1
41
+ 71,0,20.0,1.32,2.27,0.581,,,38.43,1,0
42
+ 78,1,37.5,1.6,2.68,0.597,,,27.52,2,0
43
+ 75,0,10.0,0.92,2.29,0.402,,,54.49,2,0
44
+ 67,1,36.0,1.79,3.19,0.561,,,50.03,2,0
45
+ 78,1,55.0,1.6,3.87,0.413,,,38.21,3,0
46
+ 73,1,59.0,2.43,5.37,0.453,,,19.94,2,0
47
+ 53,1,35.0,2.06,3.77,0.546,,,72.56,1,0
48
+ 64,1,90.0,1.26,2.1,0.6,,,42.01,3,1
49
+ 81,1,54.0,1.48,2.29,0.646,,,16.29,2,0
50
+ 82,1,54.0,1.34,2.29,0.585,,,16.29,2,0
51
+ 71,0,3.0,1.67,2.58,0.647,,,29.29,1,0
52
+ 65,0,34.0,1.45,2.85,0.509,,,41.1,2,0
53
+ 71,1,20.0,2.97,3.5,0.849,,,38.57,1,0
54
+ 78,1,55.0,1.78,4.0,0.445,,,28.51,2,0
55
+ 73,0,34.0,0.72,1.47,0.49,,,32.47,3,1
56
+ 72,0,34.0,0.73,1.47,0.497,,,32.47,3,1
57
+ 63,1,44.0,1.28,3.56,0.36,,,62.09,3,1
58
+ 60,1,14.0,2.12,3.62,0.586,,,51.77,2,0
59
+ 75,1,45.0,2.62,4.9,0.535,,,18.72,1,0
60
+ 73,0,49.0,1.42,2.14,0.664,,,46.77,2,0
61
+ 66,1,20.0,3.02,5.23,0.577,,,17.97,1,0
62
+ 80,1,3.0,1.97,2.33,0.845,,,36.74,1,0
63
+ 81,1,3.0,1.83,2.33,0.785,,,36.74,1,0
64
+ 73,1,100.0,1.26,2.28,0.553,,,15.05,3,1
65
+ 71,0,47.0,1.28,2.29,0.559,,,28.41,2,0
66
+ 69,0,47.0,0.65,2.29,0.284,,,28.41,2,0
67
+ 74,1,55.0,3.06,4.46,0.686,,,24.48,1,0
68
+ 62,0,80.0,1.93,3.39,0.569,,,10.01,1,0
69
+ 68,1,20.0,1.12,3.22,0.348,,,61.97,3,1
70
+ 70,0,36.0,2.11,3.51,0.601,,,10.92,1,0
71
+ 67,1,20.0,1.11,3.22,0.345,,,61.97,3,1
72
+ 70,0,36.0,1.89,3.51,0.538,,,10.92,1,0
73
+ 49,0,39.0,0.74,2.31,0.32,,,28.33,4,1
74
+ 75,1,5.0,2.43,4.33,0.561,,,47.88,2,0
75
+ 73,1,60.0,1.92,3.76,0.511,,,56.96,2,0
76
+ 78,1,30.0,1.14,3.04,0.375,,,34.46,3,1
77
+ 67,1,45.0,2.79,4.11,0.679,,,29.98,1,0
78
+ 75,1,30.0,1.64,3.1,0.529,,,32.38,2,0
79
+ 76,1,30.0,1.74,3.1,0.561,,,32.38,2,0
80
+ 63,0,50.0,1.69,2.31,0.732,,,47.36,2,0
81
+ 65,0,6.0,3.18,4.54,0.7,,,56.2,1,0
82
+ 65,0,20.0,2.13,3.2,0.666,,,2.0,1,0
83
+ 62,1,8.0,2.52,3.89,0.648,,,32.69,2,0
84
+ 63,0,20.0,1.01,2.48,0.407,,,17.95,2,0
85
+ 64,0,35.0,0.82,1.52,0.539,,,67.56,3,1
86
+ 72,1,75.0,0.94,2.47,0.381,,,45.3,3,1
87
+ 61,0,9.0,1.37,2.42,0.566,,,39.51,2,0
88
+ 60,1,10.0,1.96,4.01,0.489,,,24.52,2,0
89
+ 78,0,51.0,1.23,1.89,0.651,,,33.69,2,0
90
+ 67,1,20.0,2.74,4.54,0.604,,,8.12,2,0
91
+ 68,1,30.0,1.75,5.15,0.34,,,33.2,1,0
92
+ 72,1,30.0,2.8,4.39,0.638,,,42.88,1,0
93
+ 69,1,27.0,1.89,2.61,0.724,,,8.25,2,0
94
+ 68,1,27.0,1.89,2.61,0.724,,,8.25,2,0
95
+ 52,1,40.0,2.93,3.63,0.807,,,25.62,1,0
96
+ 55,1,40.0,2.75,4.54,0.606,,,58.41,1,0
97
+ 72,1,30.0,1.61,3.14,0.513,,,34.64,2,0
98
+ 68,1,25.0,2.7,3.87,0.698,,,35.84,1,0
99
+ 75,1,40.0,2.9,4.72,0.614,,,15.05,1,0
100
+ 68,0,30.0,1.65,2.8,0.589,,,19.7,2,0
101
+ 54,1,30.0,1.72,4.07,0.423,,,20.55,3,1
102
+ 78,1,55.0,1.15,2.01,0.572,,,30.21,2,0
data_cache/radiology_pneumonia.csv ADDED
The diff for this file is too large to render. See raw diff
 
data_cache/thyroid.csv ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 1,107,10.1,2.2,0.9,2.7
2
+ 1,113,9.9,3.1,2.0,5.9
3
+ 1,127,12.9,2.4,1.4,0.6
4
+ 1,109,5.3,1.6,1.4,1.5
5
+ 1,105,7.3,1.5,1.5,-0.1
6
+ 1,105,6.1,2.1,1.4,7.0
7
+ 1,110,10.4,1.6,1.6,2.7
8
+ 1,114,9.9,2.4,1.5,5.7
9
+ 1,106,9.4,2.2,1.5,0.0
10
+ 1,107,13.0,1.1,0.9,3.1
11
+ 1,106,4.2,1.2,1.6,1.4
12
+ 1,110,11.3,2.3,0.9,3.3
13
+ 1,116,9.2,2.7,1.0,4.2
14
+ 1,112,8.1,1.9,3.7,2.0
15
+ 1,122,9.7,1.6,0.9,2.2
16
+ 1,109,8.4,2.1,1.1,3.6
17
+ 1,111,8.4,1.5,0.8,1.2
18
+ 1,114,6.7,1.5,1.0,3.5
19
+ 1,119,10.6,2.1,1.3,1.1
20
+ 1,115,7.1,1.3,1.3,2.0
21
+ 1,101,7.8,1.2,1.0,1.7
22
+ 1,103,10.1,1.3,0.7,0.1
23
+ 1,109,10.4,1.9,0.4,-0.1
24
+ 1,102,7.6,1.8,2.0,2.5
25
+ 1,121,10.1,1.7,1.3,0.1
26
+ 1,100,6.1,2.4,1.8,3.8
27
+ 1,106,9.6,2.4,1.0,1.3
28
+ 1,116,10.1,2.2,1.6,0.8
29
+ 1,105,11.1,2.0,1.0,1.0
30
+ 1,110,10.4,1.8,1.0,2.3
31
+ 1,120,8.4,1.1,1.4,1.4
32
+ 1,116,11.1,2.0,1.2,2.3
33
+ 1,110,7.8,1.9,2.1,6.4
34
+ 1,90,8.1,1.6,1.4,1.1
35
+ 1,117,12.2,1.9,1.2,3.9
36
+ 1,117,11.0,1.4,1.5,2.1
37
+ 1,113,9.0,2.0,1.8,1.6
38
+ 1,106,9.4,1.5,0.8,0.5
39
+ 1,130,9.5,1.7,0.4,3.2
40
+ 1,100,10.5,2.4,0.9,1.9
41
+ 1,121,10.1,2.4,0.8,3.0
42
+ 1,110,9.2,1.6,1.5,0.3
43
+ 1,129,11.9,2.7,1.2,3.5
44
+ 1,121,13.5,1.5,1.6,0.5
45
+ 1,123,8.1,2.3,1.0,5.1
46
+ 1,107,8.4,1.8,1.5,0.8
47
+ 1,109,10.0,1.3,1.8,4.3
48
+ 1,120,6.8,1.9,1.3,1.9
49
+ 1,100,9.5,2.5,1.3,-0.2
50
+ 1,118,8.1,1.9,1.5,13.7
51
+ 1,100,11.3,2.5,0.7,-0.3
52
+ 1,103,12.2,1.2,1.3,2.7
53
+ 1,115,8.1,1.7,0.6,2.2
54
+ 1,119,8.0,2.0,0.6,3.2
55
+ 1,106,9.4,1.7,0.9,3.1
56
+ 1,114,10.9,2.1,0.3,1.4
57
+ 1,93,8.9,1.5,0.8,2.7
58
+ 1,120,10.4,2.1,1.1,1.8
59
+ 1,106,11.3,1.8,0.9,1.0
60
+ 1,110,8.7,1.9,1.6,4.4
61
+ 1,103,8.1,1.4,0.5,3.8
62
+ 1,101,7.1,2.2,0.8,2.2
63
+ 1,115,10.4,1.8,1.6,2.0
64
+ 1,116,10.0,1.7,1.5,4.3
65
+ 1,117,9.2,1.9,1.5,6.8
66
+ 1,106,6.7,1.5,1.2,3.9
67
+ 1,118,10.5,2.1,0.7,3.5
68
+ 1,97,7.8,1.3,1.2,0.9
69
+ 1,113,11.1,1.7,0.8,2.3
70
+ 1,104,6.3,2.0,1.2,4.0
71
+ 1,96,9.4,1.5,1.0,3.1
72
+ 1,120,12.4,2.4,0.8,1.9
73
+ 1,133,9.7,2.9,0.8,1.9
74
+ 1,126,9.4,2.3,1.0,4.0
75
+ 1,113,8.5,1.8,0.8,0.5
76
+ 1,109,9.7,1.4,1.1,2.1
77
+ 1,119,12.9,1.5,1.3,3.6
78
+ 1,101,7.1,1.6,1.5,1.6
79
+ 1,108,10.4,2.1,1.3,2.4
80
+ 1,117,6.7,2.2,1.8,6.7
81
+ 1,115,15.3,2.3,2.0,2.0
82
+ 1,91,8.0,1.7,2.1,4.6
83
+ 1,103,8.5,1.8,1.9,1.1
84
+ 1,98,9.1,1.4,1.9,-0.3
85
+ 1,111,7.8,2.0,1.8,4.1
86
+ 1,107,13.0,1.5,2.8,1.7
87
+ 1,119,11.4,2.3,2.2,1.6
88
+ 1,122,11.8,2.7,1.7,2.3
89
+ 1,105,8.1,2.0,1.9,-0.5
90
+ 1,109,7.6,1.3,2.2,1.9
91
+ 1,105,9.5,1.8,1.6,3.6
92
+ 1,112,5.9,1.7,2.0,1.3
93
+ 1,112,9.5,2.0,1.2,0.7
94
+ 1,98,8.6,1.6,1.6,6.0
95
+ 1,109,12.4,2.3,1.7,0.8
96
+ 1,114,9.1,2.6,1.5,1.5
97
+ 1,114,11.1,2.4,2.0,-0.3
98
+ 1,110,8.4,1.4,1.0,1.9
99
+ 1,120,7.1,1.2,1.5,4.3
100
+ 1,108,10.9,1.2,1.9,1.0
101
+ 1,108,8.7,1.2,2.2,2.5
102
+ 1,116,11.9,1.8,1.9,1.5
103
+ 1,113,11.5,1.5,1.9,2.9
104
+ 1,105,7.0,1.5,2.7,4.3
105
+ 1,114,8.4,1.6,1.6,-0.2
106
+ 1,114,8.1,1.6,1.6,0.5
107
+ 1,105,11.1,1.1,0.8,1.2
108
+ 1,107,13.8,1.5,1.0,1.9
109
+ 1,116,11.5,1.8,1.4,5.4
110
+ 1,102,9.5,1.4,1.1,1.6
111
+ 1,116,16.1,0.9,1.3,1.5
112
+ 1,118,10.6,1.8,1.4,3.0
113
+ 1,109,8.9,1.7,1.0,0.9
114
+ 1,110,7.0,1.0,1.6,4.3
115
+ 1,104,9.6,1.1,1.3,0.8
116
+ 1,105,8.7,1.5,1.1,1.5
117
+ 1,102,8.5,1.2,1.3,1.4
118
+ 1,112,6.8,1.7,1.4,3.3
119
+ 1,111,8.5,1.6,1.1,3.9
120
+ 1,111,8.5,1.6,1.2,7.7
121
+ 1,103,7.3,1.0,0.7,0.5
122
+ 1,98,10.4,1.6,2.3,-0.7
123
+ 1,117,7.8,2.0,1.0,3.9
124
+ 1,111,9.1,1.7,1.2,4.1
125
+ 1,101,6.3,1.5,0.9,2.9
126
+ 1,106,8.9,0.7,1.0,2.3
127
+ 1,102,8.4,1.5,0.8,2.4
128
+ 1,115,10.6,0.8,2.1,4.6
129
+ 1,130,10.0,1.6,0.9,4.6
130
+ 1,101,6.7,1.3,1.0,5.7
131
+ 1,110,6.3,1.0,0.8,1.0
132
+ 1,103,9.5,2.9,1.4,-0.1
133
+ 1,113,7.8,2.0,1.1,3.0
134
+ 1,112,10.6,1.6,0.9,-0.1
135
+ 1,118,6.5,1.2,1.2,1.7
136
+ 1,109,9.2,1.8,1.1,4.4
137
+ 1,116,7.8,1.4,1.1,3.7
138
+ 1,127,7.7,1.8,1.9,6.4
139
+ 1,108,6.5,1.0,0.9,1.5
140
+ 1,108,7.1,1.3,1.6,2.2
141
+ 1,105,5.7,1.0,0.9,0.9
142
+ 1,98,5.7,0.4,1.3,2.8
143
+ 1,112,6.5,1.2,1.2,2.0
144
+ 1,118,12.2,1.5,1.0,2.3
145
+ 1,94,7.5,1.2,1.3,4.4
146
+ 1,126,10.4,1.7,1.2,3.5
147
+ 1,114,7.5,1.1,1.6,4.4
148
+ 1,111,11.9,2.3,0.9,3.8
149
+ 1,104,6.1,1.8,0.5,0.8
150
+ 1,102,6.6,1.2,1.4,1.3
151
+ 2,139,16.4,3.8,1.1,-0.2
152
+ 2,111,16.0,2.1,0.9,-0.1
153
+ 2,113,17.2,1.8,1.0,0.0
154
+ 2,65,25.3,5.8,1.3,0.2
155
+ 2,88,24.1,5.5,0.8,0.1
156
+ 2,65,18.2,10.0,1.3,0.1
157
+ 2,134,16.4,4.8,0.6,0.1
158
+ 2,110,20.3,3.7,0.6,0.2
159
+ 2,67,23.3,7.4,1.8,-0.6
160
+ 2,95,11.1,2.7,1.6,-0.3
161
+ 2,89,14.3,4.1,0.5,0.2
162
+ 2,89,23.8,5.4,0.5,0.1
163
+ 2,88,12.9,2.7,0.1,0.2
164
+ 2,105,17.4,1.6,0.3,0.4
165
+ 2,89,20.1,7.3,1.1,-0.2
166
+ 2,99,13.0,3.6,0.7,-0.1
167
+ 2,80,23.0,10.0,0.9,-0.1
168
+ 2,89,21.8,7.1,0.7,-0.1
169
+ 2,99,13.0,3.1,0.5,-0.1
170
+ 2,68,14.7,7.8,0.6,-0.2
171
+ 2,97,14.2,3.6,1.5,0.3
172
+ 2,84,21.5,2.7,1.1,-0.6
173
+ 2,84,18.5,4.4,1.1,-0.3
174
+ 2,98,16.7,4.3,1.7,0.2
175
+ 2,94,20.5,1.8,1.4,-0.5
176
+ 2,99,17.5,1.9,1.4,0.3
177
+ 2,76,25.3,4.5,1.2,-0.1
178
+ 2,110,15.2,1.9,0.7,-0.2
179
+ 2,144,22.3,3.3,1.3,0.6
180
+ 2,105,12.0,3.3,1.1,0.0
181
+ 2,88,16.5,4.9,0.8,0.1
182
+ 2,97,15.1,1.8,1.2,-0.2
183
+ 2,106,13.4,3.0,1.1,0.0
184
+ 2,79,19.0,5.5,0.9,0.3
185
+ 2,92,11.1,2.0,0.7,-0.2
186
+ 3,125,2.3,0.9,16.5,9.5
187
+ 3,120,6.8,2.1,10.4,38.6
188
+ 3,108,3.5,0.6,1.7,1.4
189
+ 3,120,3.0,2.5,1.2,4.5
190
+ 3,119,3.8,1.1,23.0,5.7
191
+ 3,141,5.6,1.8,9.2,14.4
192
+ 3,129,1.5,0.6,12.5,2.9
193
+ 3,118,3.6,1.5,11.6,48.8
194
+ 3,120,1.9,0.7,18.5,24.0
195
+ 3,119,0.8,0.7,56.4,21.6
196
+ 3,123,5.6,1.1,13.7,56.3
197
+ 3,115,6.3,1.2,4.7,14.4
198
+ 3,126,0.5,0.2,12.2,8.8
199
+ 3,121,4.7,1.8,11.2,53.0
200
+ 3,131,2.7,0.8,9.9,4.7
201
+ 3,134,2.0,0.5,12.2,2.2
202
+ 3,141,2.5,1.3,8.5,7.5
203
+ 3,113,5.1,0.7,5.8,19.6
204
+ 3,136,1.4,0.3,32.6,8.4
205
+ 3,120,3.4,1.8,7.5,21.5
206
+ 3,125,3.7,1.1,8.5,25.9
207
+ 3,123,1.9,0.3,22.8,22.2
208
+ 3,112,2.6,0.7,41.0,19.0
209
+ 3,134,1.9,0.6,18.4,8.2
210
+ 3,119,5.1,1.1,7.0,40.8
211
+ 3,118,6.5,1.3,1.7,11.5
212
+ 3,139,4.2,0.7,4.3,6.3
213
+ 3,103,5.1,1.4,1.2,5.0
214
+ 3,97,4.7,1.1,2.1,12.6
215
+ 3,102,5.3,1.4,1.3,6.7
datasets/.gitkeep ADDED
File without changes
main_hf.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """HuggingFace Spaces entry — serves API + static frontend."""
2
+ from __future__ import annotations
3
+
4
+ import os
5
+ from pathlib import Path
6
+
7
+ from fastapi import FastAPI, Request
8
+ from fastapi.middleware.cors import CORSMiddleware
9
+ from fastapi.responses import FileResponse
10
+ from fastapi.staticfiles import StaticFiles
11
+
12
+ from app.services.certificate_service import CertificateService
13
+ from app.services.data_service import DataService
14
+ from app.services.ethics_service import EthicsService
15
+ from app.services.explain_service import ExplainService
16
+ from app.services.insight_service import InsightService
17
+ from app.services.ml_service import MLService
18
+ from arena.service import ArenaService
19
+
20
+ app = FastAPI(title="HealthWithSevgi API", version="1.3.1")
21
+
22
+ app.add_middleware(
23
+ CORSMiddleware,
24
+ allow_origins=["*"],
25
+ allow_credentials=False,
26
+ allow_methods=["*"],
27
+ allow_headers=["*"],
28
+ )
29
+
30
+ app.state.data_service = DataService()
31
+ app.state.ml_service = MLService()
32
+ app.state.explain_service = ExplainService()
33
+ app.state.ethics_service = EthicsService()
34
+ app.state.insight_service = InsightService()
35
+ app.state.certificate_service = CertificateService()
36
+ app.state.arena_service = ArenaService(app.state.ml_service)
37
+
38
+ from app.routers.data_router import router as data_router
39
+ from app.routers.explain_router import router as explain_router
40
+ from app.routers.ml_router import router as ml_router
41
+ from arena.router import router as arena_router
42
+
43
+ app.include_router(data_router)
44
+ app.include_router(ml_router)
45
+ app.include_router(explain_router)
46
+ app.include_router(arena_router)
47
+
48
+ STATIC_DIR = Path(__file__).parent / "static"
49
+
50
+ # Health check — verify critical native libraries load correctly
51
+ @app.get("/health")
52
+ async def health_check() -> dict:
53
+ errors: list[str] = []
54
+ for lib in ("sklearn", "xgboost", "lightgbm", "shap", "scipy"):
55
+ try:
56
+ __import__(lib)
57
+ except Exception as exc:
58
+ errors.append(f"{lib}: {exc}")
59
+ if errors:
60
+ return {"status": "degraded", "errors": errors}
61
+ return {"status": "healthy"}
62
+
63
+ # Serve frontend static files
64
+ if STATIC_DIR.is_dir():
65
+ app.mount("/assets", StaticFiles(directory=STATIC_DIR / "assets"), name="assets")
66
+
67
+ @app.get("/{full_path:path}")
68
+ async def serve_spa(request: Request, full_path: str):
69
+ file = STATIC_DIR / full_path
70
+ if file.is_file():
71
+ return FileResponse(file)
72
+ return FileResponse(STATIC_DIR / "index.html")
requirements.txt ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi>=0.110.0
2
+ uvicorn[standard]>=0.29.0
3
+ scikit-learn>=1.4.0
4
+ pandas>=2.2.0
5
+ numpy>=1.26.0
6
+ imbalanced-learn>=0.12.0 # SMOTE
7
+ shap>=0.45.0 # Explainability
8
+ reportlab>=4.1.0 # PDF certificate generation
9
+ python-multipart>=0.0.9 # File upload support
10
+ pydantic>=2.6.0
11
+ xgboost>=2.0.0 # Gradient boosting (high performance)
12
+ lightgbm>=4.3.0 # Fast gradient boosting
13
+ requests>=2.31.0 # Real dataset downloads with caching
14
+ httpx>=0.28.0 # FastAPI TestClient dependency (used by backend tests)
15
+ python-dotenv>=1.0.0 # Load .env file for API keys
16
+ scipy>=1.12.0 # ARFF file parsing (vertebral column dataset)
17
+ openpyxl>=3.1.0 # Excel .xlsx reading (fetal health CTG dataset)
18
+ xlrd>=2.0.0 # Excel .xls reading (legacy UCI datasets)
19
+ ucimlrepo>=0.0.3 # UCI ML Repository API (CKD, CTG, and other datasets)
static/.gitkeep ADDED
File without changes
static/apple-touch-icon.png ADDED
static/assets/ArenaPage-C8SsT3v3.js ADDED
The diff for this file is too large to render. See raw diff
 
static/assets/ArenaPage-C8SsT3v3.js.map ADDED
The diff for this file is too large to render. See raw diff