Spaces:

0xBatuhan4
/

HealthWithSevgi

Running

App Files Files Community

github-actions[bot] commited on Apr 24

Commit

ee28bd3

0 Parent(s):

Deploy 1.15.12

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitignore +2 -0
Dockerfile +34 -0
README.md +12 -0
app/__init__.py +1 -0
app/main.py +79 -0
app/models/__init__.py +1 -0
app/models/explain_schemas.py +185 -0
app/models/ml_schemas.py +194 -0
app/models/schemas.py +73 -0
app/routers/__init__.py +1 -0
app/routers/data_router.py +184 -0
app/routers/explain_router.py +454 -0
app/routers/ml_router.py +92 -0
app/services/__init__.py +1 -0
app/services/certificate_service.py +690 -0
app/services/data_service.py +1272 -0
app/services/ethics_service.py +500 -0
app/services/explain_service.py +665 -0
app/services/insight_service.py +607 -0
app/services/ml_service.py +855 -0
app/services/specialty_registry.py +559 -0
app/utils/__init__.py +1 -0
arena/__init__.py +0 -0
arena/router.py +72 -0
arena/schemas.py +64 -0
arena/service.py +199 -0
data_cache/cardiology_arrhythmia.csv +0 -0
data_cache/cardiology_hf.csv +300 -0
data_cache/depression_data.csv +0 -0
data_cache/dermatology.csv +0 -0
data_cache/endocrinology_diabetes.csv +768 -0
data_cache/hepatology_liver.csv +583 -0
data_cache/icu_sepsis.csv +0 -0
data_cache/nephrology_ckd.csv +363 -0
data_cache/neurology_parkinsons.csv +196 -0
data_cache/obstetrics_fetal.csv +0 -0
data_cache/oncology_cervical.csv +0 -0
data_cache/ophthalmology.arff +0 -0
data_cache/orthopaedics.arff +322 -0
data_cache/pharmacy_readmission.csv +0 -0
data_cache/pulmonology_copd.csv +102 -0
data_cache/radiology_pneumonia.csv +0 -0
data_cache/thyroid.csv +215 -0
datasets/.gitkeep +0 -0
main_hf.py +72 -0
requirements.txt +19 -0
static/.gitkeep +0 -0
static/apple-touch-icon.png +0 -0
static/assets/ArenaPage-C8SsT3v3.js +0 -0
static/assets/ArenaPage-C8SsT3v3.js.map +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ *.pyc
2	+ __pycache__/

Dockerfile ADDED Viewed

	@@ -0,0 +1,34 @@

+## Stage 1 — install dependencies
+FROM python:3.12-slim AS builder
+WORKDIR /build
+COPY requirements.txt .
+RUN pip install --no-cache-dir --no-compile --target=/build/deps -r requirements.txt \
+    && find /build/deps -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null || true \
+    && find /build/deps -type d -name "tests" -exec rm -rf {} + 2>/dev/null || true
+## Stage 2 — slim runtime
+FROM python:3.12-slim
+# Native libs required by scikit-learn, xgboost, lightgbm, scipy, shap
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libgomp1 \
+    libopenblas0 \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+COPY --from=builder /build/deps /usr/local/lib/python3.12/site-packages
+COPY app ./app
+COPY datasets ./datasets
+COPY data_cache ./data_cache
+COPY arena ./arena
+COPY static ./static
+COPY main_hf.py .
+EXPOSE 7860
+CMD ["python", "-m", "uvicorn", "main_hf:app", "--host", "0.0.0.0", "--port", "7860"]

README.md ADDED Viewed

	@@ -0,0 +1,12 @@

+---
+title: HealthWithSevgi
+emoji: 🏥
+colorFrom: green
+colorTo: blue
+sdk: docker
+pinned: false
+---
+# HealthWithSevgi — ML Learning Tool for Healthcare Professionals
+A 7-step ML visualization tool for healthcare professionals to explore clinical datasets, prepare data, train models, and interpret predictions.

app/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """HealthWithSevgi FastAPI backend package."""

app/main.py ADDED Viewed

	@@ -0,0 +1,79 @@

+"""HealthWithSevgi — FastAPI Backend Entry Point"""
+from __future__ import annotations
+import logging
+from pathlib import Path
+from dotenv import load_dotenv
+load_dotenv(Path(__file__).resolve().parent.parent / ".env")
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from app.services.certificate_service import CertificateService
+from app.services.data_service import DataService
+from app.services.ethics_service import EthicsService
+from app.services.explain_service import ExplainService
+from app.services.insight_service import InsightService
+from app.services.ml_service import MLService
+logging.basicConfig(level=logging.INFO, format="%(levelname)s | %(name)s | %(message)s")
+app = FastAPI(
+    title="HealthWithSevgi API",
+    description="ML Visualization Tool for Healthcare — REST API",
+    version="1.3.1",
+)
+# CORS — allow frontend dev server
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["http://localhost:5173", "http://127.0.0.1:5173"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Singleton service instances
+app.state.data_service = DataService()
+app.state.ml_service = MLService()
+app.state.explain_service = ExplainService()
+app.state.ethics_service = EthicsService()
+app.state.insight_service = InsightService()
+app.state.certificate_service = CertificateService()
+# Routers
+from app.routers.data_router import router as data_router  # noqa: E402
+from app.routers.explain_router import router as explain_router  # noqa: E402
+from app.routers.ml_router import router as ml_router  # noqa: E402
+app.include_router(data_router)
+app.include_router(ml_router)
+app.include_router(explain_router)
+# Model Arena extension
+import sys
+from pathlib import Path
+_arena_path = str(Path(__file__).resolve().parent.parent.parent / "local" / "model-arena")
+if _arena_path not in sys.path:
+    sys.path.insert(0, _arena_path)
+from arena.router import router as arena_router  # noqa: E402
+from arena.service import ArenaService  # noqa: E402
+app.state.arena_service = ArenaService(app.state.ml_service)
+app.include_router(arena_router)
+@app.get("/")
+async def root() -> dict:
+    """Health root — returns a short string so `docker-compose healthcheck` has a 200 target."""
+    return {"status": "ok", "project": "HealthWithSevgi", "version": "1.3.1"}
+@app.get("/health")
+async def health_check() -> dict:
+    """
+    Deep health probe — verifies the heavy native libs (sklearn, xgboost, lightgbm, shap,
+    scipy) import cleanly.
+    """
+    return {"status": "healthy"}

app/models/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Pydantic request/response schemas used by the routers."""

app/models/explain_schemas.py ADDED Viewed

	@@ -0,0 +1,185 @@

+"""Pydantic schemas for explainability, ethics, and certificate endpoints."""
+from __future__ import annotations
+from typing import Literal
+from pydantic import BaseModel, Field
+class FeatureImportanceItem(BaseModel):
+    """One row of global SHAP importance — feature name + mean |SHAP value|."""
+    feature_name: str
+    clinical_name: str
+    importance: float
+    direction: Literal["positive", "negative", "neutral"]
+    clinical_note: str
+class GlobalExplainabilityResponse(BaseModel):
+    """
+    Payload for `/api/explain/global-importance` — the ranked feature list with the method
+    used (tree or kernel SHAP) and a textual description for the UI.
+    """
+    model_id: str
+    method: str
+    feature_importances: list[FeatureImportanceItem]
+    top_feature_clinical_note: str
+    explained_variance_pct: float
+class SHAPWaterfallPoint(BaseModel):
+    """
+    Single bar in the per-patient SHAP waterfall: which feature pushed the probability in
+    which direction and by how much.
+    """
+    feature_name: str
+    clinical_name: str
+    feature_value: float | str
+    shap_value: float
+    direction: Literal["increases_risk", "decreases_risk"]
+    plain_language: str
+class SinglePatientExplainResponse(BaseModel):
+    """
+    Payload for `/api/explain/single-patient` — base value, final prediction, and the
+    ordered waterfall points.
+    """
+    model_id: str
+    patient_index: int
+    predicted_class: str
+    predicted_probability: float
+    base_value: float
+    waterfall: list[SHAPWaterfallPoint]
+    clinical_summary: str
+class SubgroupMetrics(BaseModel):
+    """
+    Fairness metrics computed for one subgroup of a sensitive attribute (accuracy,
+    sensitivity, specificity, PPV, NPV, etc.).
+    """
+    group_name: str
+    group_label: str
+    sample_size: int
+    accuracy: float
+    sensitivity: float
+    specificity: float
+    precision: float
+    f1_score: float
+    status: Literal["acceptable", "review", "action_needed"]
+    status_reason: str = ""
+class BiasWarning(BaseModel):
+    """
+    Machine-readable flag emitted when a subgroup metric falls outside the configured
+    tolerance relative to the overall cohort.
+    """
+    detected: bool
+    message: str
+    affected_group: str
+    metric: str
+    gap: float
+class CaseStudy(BaseModel):
+    """
+    One narrative case study from the ethics LLM pass — a real-world regulatory/clinical
+    incident with a short lesson.
+    """
+    id: str
+    title: str
+    specialty: str
+    year: int
+    what_happened: str
+    impact: str
+    lesson: str
+    severity: Literal["failure", "near_miss", "prevention"]
+class RepresentationWarning(BaseModel):
+    """Flags a demographic group whose training-data proportion differs
+    from the population norm by more than the configured threshold."""
+    group: str
+    attribute: str
+    dataset_pct: float
+    population_pct: float
+    gap_pp: float
+    message: str
+class EthicsResponse(BaseModel):
+    """
+    Payload for `/api/explain/ethics` — overall metrics, subgroup breakdowns, warnings,
+    LLM narrative, and the EU AI Act checklist state.
+    """
+    model_id: str
+    subgroup_metrics: list[SubgroupMetrics]
+    bias_warnings: list[BiasWarning]
+    training_representation: dict
+    representation_warnings: list[RepresentationWarning] = Field(default_factory=list)
+    overall_sensitivity: float
+    eu_ai_act_items: list[dict]
+    case_studies: list[CaseStudy]
+    demographics_available: bool = True
+    demographics_note: str = ""
+class WhatIfRequest(BaseModel):
+    """Request body for `/api/explain/what-if` — the patient vector plus the feature/value edits to probe."""
+    model_id: str
+    patient_index: int
+    feature_name: str
+    new_value: float
+class WhatIfResponse(BaseModel):
+    """
+    Response for `/api/explain/what-if` — probability delta and the explanatory SHAP
+    waterfall after the edit.
+    """
+    feature_name: str
+    original_value: float
+    new_value: float
+    original_prob: float
+    new_prob: float
+    shift: float
+    direction: Literal["increased_risk", "decreased_risk", "no_change"]
+class ChecklistUpdate(BaseModel):
+    """Toggle payload used to persist a single EU AI Act checklist item for the active session."""
+    model_id: str
+    item_id: str
+    checked: bool
+class SamplePatient(BaseModel):
+    """
+    A single patient row picked from the trained dataset for use in Step 6 explainability
+    or Step 7 ethics demos.
+    """
+    index: int
+    risk_level: Literal["low", "medium", "high"]
+    probability: float
+    summary: str
+class SamplePatientsResponse(BaseModel):
+    """Wraps a small list of `SamplePatient` rows used to seed the Step 6 "single patient" picker."""
+    model_id: str
+    patients: list[SamplePatient]
+class CertificateRequest(BaseModel):
+    """
+    Request body for `/api/explain/certificate` — the session id plus user-selected
+    checklist items to embed in the EU AI Act PDF.
+    """
+    model_id: str
+    session_id: str
+    checklist_state: dict[str, bool] = Field(default_factory=dict)
+    clinician_name: str = "Healthcare Professional"
+    institution: str = "Healthcare Institution"

app/models/ml_schemas.py ADDED Viewed

	@@ -0,0 +1,194 @@

+"""Pydantic schemas for ML training and evaluation endpoints."""
+from __future__ import annotations
+from enum import Enum
+from typing import Any, Literal
+from pydantic import BaseModel, Field, model_validator
+class ModelType(str, Enum):
+    """Enum of the eight classifiers the backend can train."""
+    KNN = "knn"
+    SVM = "svm"
+    DECISION_TREE = "decision_tree"
+    RANDOM_FOREST = "random_forest"
+    LOGISTIC_REGRESSION = "logistic_regression"
+    NAIVE_BAYES = "naive_bayes"
+    XGBOOST = "xgboost"
+    LIGHTGBM = "lightgbm"
+class KNNParams(BaseModel):
+    """Hyperparameters for K-Nearest-Neighbours (neighbour count, distance metric)."""
+    n_neighbors: int = Field(5, ge=1, le=25)
+    metric: Literal["euclidean", "manhattan"] = "euclidean"
+class SVMParams(BaseModel):
+    """Hyperparameters for Support Vector Machine (kernel, C, gamma)."""
+    kernel: Literal["linear", "rbf", "poly", "sigmoid"] = "rbf"
+    C: float = Field(1.0, ge=0.01, le=100.0)
+class DecisionTreeParams(BaseModel):
+    """Hyperparameters for a single Decision Tree (max depth, split criterion)."""
+    max_depth: int = Field(5, ge=1, le=20)
+    criterion: Literal["gini", "entropy"] = "gini"
+class RandomForestParams(BaseModel):
+    """Hyperparameters for Random Forest ensemble (n_estimators, max depth)."""
+    n_estimators: int = Field(100, ge=10, le=500)
+    max_depth: int = Field(5, ge=1, le=20)
+class LogisticRegressionParams(BaseModel):
+    """Hyperparameters for Logistic Regression (regularisation strength, penalty)."""
+    C: float = Field(1.0, ge=0.001, le=100.0)
+    max_iter: int = Field(200, ge=50, le=2000)
+class NaiveBayesParams(BaseModel):
+    """Hyperparameters for Gaussian Naive Bayes (variance smoothing)."""
+    var_smoothing: float = Field(1e-9, ge=1e-12, le=1e-3)
+class XGBoostParams(BaseModel):
+    """Hyperparameters for XGBoost (n_estimators, max depth, learning rate)."""
+    n_estimators: int = Field(100, ge=10, le=500)
+    max_depth: int = Field(5, ge=1, le=15)
+    learning_rate: float = Field(0.1, ge=0.01, le=0.5)
+class LightGBMParams(BaseModel):
+    """Hyperparameters for LightGBM (n_estimators, num_leaves, learning rate)."""
+    n_estimators: int = Field(100, ge=10, le=500)
+    max_depth: int = Field(-1, ge=-1, le=15)
+    learning_rate: float = Field(0.1, ge=0.01, le=0.5)
+PARAM_SCHEMAS: dict[str, type[BaseModel]] = {
+    "knn": KNNParams,
+    "svm": SVMParams,
+    "decision_tree": DecisionTreeParams,
+    "random_forest": RandomForestParams,
+    "logistic_regression": LogisticRegressionParams,
+    "naive_bayes": NaiveBayesParams,
+    "xgboost": XGBoostParams,
+    "lightgbm": LightGBMParams,
+}
+class TrainRequest(BaseModel):
+    """Request body for `/api/ml/train` — session id + model type + its hyperparameter bundle."""
+    session_id: str
+    model_type: ModelType
+    params: dict[str, Any] = Field(default_factory=dict)
+    tune: bool = False
+    use_feature_selection: bool = False
+    @model_validator(mode='after')
+    def validate_params(self) -> 'TrainRequest':
+        """Cross-field validator ensuring the `params` object matches the chosen `model_type`."""
+        schema = PARAM_SCHEMAS.get(self.model_type.value)
+        if schema and self.params:
+            try:
+                validated = schema(**self.params)
+                self.params = validated.model_dump()
+            except Exception:
+                pass  # Allow through with raw params; build_model has its own defaults
+        return self
+class ConfusionMatrixData(BaseModel):
+    """Confusion matrix counts plus labels, ready for the Step-5 chart."""
+    tn: int = 0
+    fp: int = 0
+    fn: int = 0
+    tp: int = 0
+    matrix: list[list[int]]
+    labels: list[str]
+class ROCPoint(BaseModel):
+    """One threshold sample of the ROC curve (FPR, TPR, threshold)."""
+    fpr: float
+    tpr: float
+    threshold: float
+class MetricsResponse(BaseModel):
+    """
+    Bundle of evaluation metrics returned after a training run (accuracy, precision,
+    recall, F1, AUC, confusion matrix, ROC/PR points).
+    """
+    accuracy: float
+    sensitivity: float
+    specificity: float
+    precision: float
+    f1_score: float
+    auc_roc: float
+    confusion_matrix: ConfusionMatrixData
+    roc_curve: list[ROCPoint]
+    pr_curve: list[dict[str, float]]
+    train_accuracy: float
+    cross_val_scores: list[float]
+    low_sensitivity_warning: bool
+    mcc: float = 0.0
+    overfitting_warning: bool = False
+    optimal_threshold: float = 0.5
+class ScatterPoint(BaseModel):
+    """Single 2-D point used by the KNN scatter visualisation in Step 4."""
+    x: float
+    y: float
+    label: int
+    label_name: str
+    split: str  # "train" or "test"
+    predicted: int | None = None  # only for test points
+class DecisionMesh(BaseModel):
+    """Grid of predictions used to shade the KNN decision boundary in Step 4."""
+    x_values: list[float]  # unique x coordinates of the grid
+    y_values: list[float]  # unique y coordinates of the grid
+    predictions: list[list[int]]  # 2D array [y][x] of predicted class indices
+class KNNScatterData(BaseModel):
+    """Bundle of scatter points + decision mesh shipped to the KNN visualisation."""
+    scatter_points: list[ScatterPoint]
+    decision_mesh: DecisionMesh
+    pca_explained_variance: list[float]
+    classes: list[str]
+    k: int
+    metric: str
+class TrainResponse(BaseModel):
+    """Complete payload returned by `/api/ml/train` — session id, model id, metrics, ROC/PR, scatter data."""
+    model_id: str
+    session_id: str
+    model_type: ModelType
+    params: dict[str, Any]
+    metrics: MetricsResponse
+    training_time_ms: float
+    feature_names: list[str]
+    knn_scatter: KNNScatterData | None = None
+class CompareEntry(BaseModel):
+    """A single model entry in the cross-model comparison list (Step 4 "Add to comparison")."""
+    model_id: str
+    model_type: ModelType
+    params: dict[str, Any]
+    metrics: MetricsResponse
+    training_time_ms: float
+class CompareResponse(BaseModel):
+    """Response for `/api/ml/comparison` — the current list of compared models for the session."""
+    entries: list[CompareEntry]
+    best_model_id: str

app/models/schemas.py ADDED Viewed

	@@ -0,0 +1,73 @@

+"""Pydantic schemas for data exploration and preparation endpoints."""
+from __future__ import annotations
+from typing import Any, Literal
+from pydantic import BaseModel, Field
+class SpecialtyInfo(BaseModel):
+    """Descriptor for one of the 20 medical specialties — id, name, category, blurb, dataset pointers."""
+    id: str
+    name: str
+    description: str
+    target_variable: str
+    target_type: Literal["binary", "multiclass"]
+    feature_names: list[str]
+    clinical_context: str
+    data_source: str
+    what_ai_predicts: str
+    license_type: str = ""
+    license_url: str = ""
+    requires_attribution: bool = False
+class ColumnStat(BaseModel):
+    """
+    Per-column summary computed during exploration (dtype, missing %, min/max/mean for
+    numeric, top categories for categorical).
+    """
+    name: str
+    dtype: str
+    missing_count: int
+    missing_pct: float
+    unique_count: int
+    sample_values: list[Any]
+class DataExplorationResponse(BaseModel):
+    """
+    Response for `/api/data/explore` — column stats, row count, warnings, and the detected
+    target column.
+    """
+    columns: list[ColumnStat]
+    row_count: int
+    class_distribution: dict[str, int]
+    imbalance_warning: bool
+    imbalance_ratio: float
+    target_col: str
+class PrepSettings(BaseModel):
+    """
+    Step-3 preparation settings (test split, normalisation, missing-value handling, SMOTE
+    flag, outlier treatment).
+    """
+    test_size: float = Field(0.2, ge=0.1, le=0.4)
+    missing_strategy: Literal["median", "mode", "drop"] = "median"
+    normalization: Literal["zscore", "minmax", "none"] = "zscore"
+    use_smote: bool = False
+    outlier_handling: Literal["none", "iqr", "zscore_clip"] = "none"
+class PrepResponse(BaseModel):
+    """Response for `/api/data/prepare` — session id, train/test shapes, and any applied transformations."""
+    session_id: str
+    train_size: int
+    test_size: int
+    features_count: int
+    class_distribution_before: dict[str, int]
+    class_distribution_after: dict[str, int]
+    smote_applied: bool
+    normalization_applied: str
+    norm_samples: list[dict[str, object]] = Field(default_factory=list)  # [{feature, before, after}, ...]

app/routers/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """FastAPI routers split by wizard concern (data, ml, explain)."""

app/routers/data_router.py ADDED Viewed

	@@ -0,0 +1,184 @@

+"""Data exploration and preparation REST endpoints."""
+from __future__ import annotations
+import io
+import logging
+import uuid
+import pandas as pd
+from fastapi import APIRouter, File, Form, HTTPException, Request, UploadFile, status
+from fastapi.responses import JSONResponse
+from app.models.schemas import (
+    DataExplorationResponse,
+    PrepResponse,
+    PrepSettings,
+    SpecialtyInfo,
+)
+from app.services.data_service import DatasetUnavailableError
+from app.services.specialty_registry import get_specialty, list_specialties
+logger = logging.getLogger(__name__)
+router = APIRouter(prefix="/api", tags=["data"])
+_MAX_UPLOAD_BYTES = 50 * 1024 * 1024  # 50 MB
+def _get_data_service(request: Request):
+    """FastAPI dependency — resolves the shared `DataService` off `app.state`."""
+    return request.app.state.data_service
+def _get_ml_service(request: Request):
+    """FastAPI dependency — resolves the shared `MLService` off `app.state`."""
+    return request.app.state.ml_service
+def _load_df(file: UploadFile | None, specialty_id: str, data_service) -> pd.DataFrame:
+    """
+    Helper that loads a pandas DataFrame either from an uploaded CSV or from the
+    specialty's bundled dataset.
+    """
+    if file is not None and file.filename:
+        # Bug #6: Validate file extension
+        if not file.filename.lower().endswith(".csv"):
+            raise HTTPException(
+                status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
+                detail=f"Only .csv files are accepted (got: {file.filename})",
+            )
+        content = file.file.read()
+        # Enforce 50 MB limit
+        if len(content) > _MAX_UPLOAD_BYTES:
+            raise HTTPException(
+                status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
+                detail=f"File exceeds 50 MB limit (uploaded: {len(content) // (1024 * 1024)} MB)",
+            )
+        try:
+            df = pd.read_csv(io.BytesIO(content))
+        except Exception as exc:
+            raise HTTPException(
+                status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
+                detail=f"Could not parse CSV file: {exc}",
+            )
+        # Bug #7: Minimum dataset size validation
+        if len(df) < 10:
+            raise HTTPException(
+                status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
+                detail=f"Dataset must have at least 10 rows (got {len(df)})",
+            )
+        if len(df.columns) < 2:
+            raise HTTPException(
+                status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
+                detail=f"Dataset must have at least 2 columns (got {len(df.columns)})",
+            )
+        return df
+    try:
+        return data_service.get_example_dataset(specialty_id)
+    except DatasetUnavailableError as exc:
+        raise HTTPException(
+            status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
+            detail=str(exc),
+        ) from exc
+# ------------------------------------------------------------------
+# Specialties
+# ------------------------------------------------------------------
+@router.get("/specialties", response_model=list[SpecialtyInfo])
+def get_specialties() -> list[SpecialtyInfo]:
+    """List endpoint — returns the 20-entry specialty registry used by the Step 1 picker."""
+    return list_specialties()
+@router.get("/specialties/{specialty_id}", response_model=SpecialtyInfo)
+def get_specialty_by_id(specialty_id: str) -> SpecialtyInfo:
+    """Retrieve a single specialty by id; 404 if unknown."""
+    spec = get_specialty(specialty_id)
+    if spec is None:
+        raise HTTPException(status_code=404, detail=f"Specialty '{specialty_id}' not found")
+    return spec
+# ------------------------------------------------------------------
+# Exploration
+# ------------------------------------------------------------------
+@router.post("/explore", response_model=DataExplorationResponse)
+def explore_data(
+    request: Request,
+    specialty_id: str = Form(...),
+    target_col: str = Form(...),
+    file: UploadFile | None = File(None),
+) -> DataExplorationResponse:
+    """Step-2 exploration endpoint — returns per-column stats for the active dataset."""
+    ds = _get_data_service(request)
+    df = _load_df(file, specialty_id, ds)
+    if target_col not in df.columns:
+        # Try to find target from specialty registry
+        spec = get_specialty(specialty_id)
+        if spec and spec.target_variable in df.columns:
+            target_col = spec.target_variable
+        else:
+            raise HTTPException(
+                status_code=422,
+                detail=f"Target column '{target_col}' not found. Available: {list(df.columns)}",
+            )
+    return ds.explore_dataframe(df, target_col)
+# ------------------------------------------------------------------
+# Preparation
+# ------------------------------------------------------------------
+@router.post("/prepare", response_model=PrepResponse)
+def prepare_data(
+    request: Request,
+    specialty_id: str = Form(...),
+    target_col: str = Form(...),
+    test_size: float = Form(0.2),
+    missing_strategy: str = Form("median"),
+    normalization: str = Form("zscore"),
+    use_smote: bool = Form(False),
+    outlier_handling: str = Form("none"),
+    session_id: str = Form(None),
+    file: UploadFile | None = File(None),
+) -> PrepResponse:
+    """Step-3 preparation endpoint — splits, normalises, imputes missing values, optionally applies SMOTE."""
+    ds = _get_data_service(request)
+    ml_service = _get_ml_service(request)
+    df = _load_df(file, specialty_id, ds)
+    if target_col not in df.columns:
+        spec = get_specialty(specialty_id)
+        if spec and spec.target_variable in df.columns:
+            target_col = spec.target_variable
+        else:
+            raise HTTPException(status_code=422, detail=f"Target column '{target_col}' not found")
+    new_session_id = session_id or str(uuid.uuid4())
+    try:
+        settings = PrepSettings(
+            test_size=test_size,
+            missing_strategy=missing_strategy,  # type: ignore[arg-type]
+            normalization=normalization,  # type: ignore[arg-type]
+            use_smote=use_smote,
+            outlier_handling=outlier_handling,  # type: ignore[arg-type]
+        )
+        X_train, X_test, y_train, y_test, response, feature_names = ds.prepare_data(
+            df, target_col, settings, new_session_id
+        )
+    except Exception as exc:
+        logger.exception("Data preparation failed")
+        raise HTTPException(status_code=422, detail=str(exc))
+    # Share prepared data with ML service, including specialty_id for certificate generation
+    session_data = ds.get_session(new_session_id)
+    if session_data:
+        session_data["specialty_id"] = specialty_id  # Fix: store for certificate generation
+        ml_service.store_session_data(new_session_id, session_data)
+    return response

app/routers/explain_router.py ADDED Viewed

	@@ -0,0 +1,454 @@

+"""Explainability, ethics, and certificate REST endpoints."""
+from __future__ import annotations
+import logging
+from fastapi import APIRouter, HTTPException, Request
+from fastapi.responses import StreamingResponse
+from app.models.explain_schemas import (
+    CertificateRequest,
+    ChecklistUpdate,
+    EthicsResponse,
+    GlobalExplainabilityResponse,
+    SamplePatientsResponse,
+    SinglePatientExplainResponse,
+    WhatIfRequest,
+    WhatIfResponse,
+)
+logger = logging.getLogger(__name__)
+router = APIRouter(prefix="/api", tags=["explain"])
+def _get_services(request: Request):
+    """FastAPI dependency — resolves data/ml/explain/ethics/insight/certificate services as a tuple."""
+    return (
+        request.app.state.ml_service,
+        request.app.state.explain_service,
+        request.app.state.ethics_service,
+        request.app.state.certificate_service,
+        request.app.state.insight_service,
+    )
+def _get_model_data(ml_service, model_id: str) -> dict:
+    """Helper that pulls the trained model + split data for a session, raising 404 if absent."""
+    data = ml_service.get_model(model_id)
+    if data is None:
+        raise HTTPException(status_code=404, detail=f"Model '{model_id}' not found. Train a model first.")
+    return data
+@router.get("/explain/global/{model_id}", response_model=GlobalExplainabilityResponse)
+def global_importance(request: Request, model_id: str) -> GlobalExplainabilityResponse:
+    """Step-6 endpoint — computes global SHAP feature importance for the active model."""
+    ml, explain, *_ = _get_services(request)
+    data = _get_model_data(ml, model_id)
+    try:
+        return explain.global_importance(
+            model_id=model_id,
+            model=data["model"],
+            X_test=data["X_test"],
+            y_test=data["y_test"],
+            feature_names=data["feature_names"],
+            X_train=data["X_train"],
+            model_type=str(data["model_type"]),
+            classes=data["classes"],
+        )
+    except Exception as exc:
+        logger.exception("Global explainability failed")
+        raise HTTPException(status_code=500, detail=str(exc))
+@router.get("/explain/patient/{model_id}/{patient_index}", response_model=SinglePatientExplainResponse)
+def single_patient_explain(
+    request: Request, model_id: str, patient_index: int
+) -> SinglePatientExplainResponse:
+    """Step-6 endpoint — returns a per-patient SHAP waterfall plus base/final probability."""
+    ml, explain, *_ = _get_services(request)
+    data = _get_model_data(ml, model_id)
+    n_test = len(data["X_test"])
+    if patient_index < 0 or patient_index >= n_test:
+        raise HTTPException(status_code=422, detail=f"Patient index {patient_index} out of range [0, {n_test-1}]")
+    try:
+        return explain.single_patient(
+            model_id=model_id,
+            model=data["model"],
+            patient_idx=patient_index,
+            X_test=data["X_test"],
+            feature_names=data["feature_names"],
+            X_train=data["X_train"],
+            model_type=str(data["model_type"]),
+            classes=data["classes"],
+            y_test=data["y_test"],
+            scaler=data.get("scaler"),
+        )
+    except Exception as exc:
+        logger.exception("Single-patient explanation failed")
+        raise HTTPException(status_code=500, detail=str(exc))
+@router.post("/explain/what-if", response_model=WhatIfResponse)
+def what_if(request: Request, body: WhatIfRequest) -> WhatIfResponse:
+    """Step-6 endpoint — probes probability changes when specific feature values are altered."""
+    ml, explain, *_ = _get_services(request)
+    data = _get_model_data(ml, body.model_id)
+    n_test = len(data["X_test"])
+    if body.patient_index < 0 or body.patient_index >= n_test:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Patient index {body.patient_index} out of range [0, {n_test - 1}]",
+        )
+    if body.feature_name not in data["feature_names"]:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Feature '{body.feature_name}' not found. Available: {data['feature_names']}",
+        )
+    try:
+        return explain.what_if(
+            model_id=body.model_id,
+            model=data["model"],
+            patient_index=body.patient_index,
+            feature_name=body.feature_name,
+            new_value=body.new_value,
+            X_test=data["X_test"],
+            feature_names=data["feature_names"],
+            scaler=data.get("scaler"),
+        )
+    except Exception as exc:
+        logger.exception("What-if analysis failed")
+        raise HTTPException(status_code=500, detail=str(exc))
+@router.get("/explain/sample-patients/{model_id}", response_model=SamplePatientsResponse)
+def sample_patients(request: Request, model_id: str) -> SamplePatientsResponse:
+    """Step-6 helper — returns a handful of sample rows from the test split for quick picking."""
+    ml, explain, *_ = _get_services(request)
+    data = _get_model_data(ml, model_id)
+    try:
+        return explain.sample_patients(
+            model_id=model_id,
+            model=data["model"],
+            X_test=data["X_test"],
+        )
+    except Exception as exc:
+        logger.exception("Sample patients retrieval failed")
+        raise HTTPException(status_code=500, detail=str(exc))
+@router.get("/ethics/{model_id}", response_model=EthicsResponse)
+def get_ethics(request: Request, model_id: str) -> EthicsResponse:
+    """Step-7 endpoint — runs the bias audit and produces fairness deltas + warnings."""
+    ml, _, ethics, _, _ = _get_services(request)
+    data = _get_model_data(ml, model_id)
+    try:
+        return ethics.analyze_bias(
+            model_id=model_id,
+            model=data["model"],
+            X_test=data["X_test"],
+            y_test=data["y_test"],
+            feature_names=data["feature_names"],
+            classes=data["classes"],
+            X_train=data["X_train"],
+            scaler=data.get("scaler"),
+        )
+    except Exception as exc:
+        logger.exception("Ethics analysis failed")
+        raise HTTPException(status_code=500, detail=str(exc))
+@router.post("/ethics/checklist")
+def update_checklist(request: Request, body: ChecklistUpdate) -> dict:
+    """Step-7 endpoint — toggles a single EU AI Act checklist item for the session."""
+    _, _, ethics, _, _ = _get_services(request)
+    return ethics.update_checklist(body.model_id, body.item_id, body.checked)
+@router.get("/insights/{model_id}")
+async def get_insights(request: Request, model_id: str) -> dict:
+    """Generate LLM-powered clinical insights for a trained model."""
+    import asyncio
+    import numpy as np
+    ml, explain, ethics, _, insight_svc = _get_services(request)
+    data = _get_model_data(ml, model_id)
+    metrics = data.get("metrics")
+    if metrics is None:
+        raise HTTPException(status_code=422, detail="Model metrics not available.")
+    # --- Gather all data sources ---
+    ethics_data = ethics.analyze_bias(
+        model_id=model_id,
+        model=data["model"],
+        X_test=data["X_test"],
+        y_test=data["y_test"],
+        feature_names=data["feature_names"],
+        classes=data["classes"],
+        X_train=data["X_train"],
+        scaler=data.get("scaler"),
+    )
+    # SHAP / Feature importance (non-blocking, best-effort)
+    shap_data = None
+    try:
+        shap_data = explain.global_importance(
+            model_id=model_id,
+            model=data["model"],
+            X_test=data["X_test"],
+            y_test=data["y_test"],
+            feature_names=data["feature_names"],
+            X_train=data["X_train"],
+            model_type=str(data["model_type"]),
+            classes=data["classes"],
+        )
+    except Exception as exc:
+        logger.warning("SHAP for insights failed: %s", exc)
+    # Specialty metadata
+    session_id = data.get("session_id", "")
+    ml_session = ml.get_session(session_id)
+    specialty_info = None
+    if ml_session:
+        from app.services.specialty_registry import SPECIALTIES
+        specialty_info = SPECIALTIES.get(ml_session.get("specialty_id", ""))
+    def _m(attr: str):
+        """Inner helper used by `get_insights` to memoise the LLM call per task."""
+        return getattr(metrics, attr, None) if hasattr(metrics, attr) else metrics.get(attr)
+    # Confusion matrix
+    cm_summary = {}
+    cm_data = _m("confusion_matrix")
+    if cm_data and hasattr(cm_data, "matrix"):
+        matrix = cm_data.matrix
+        if len(matrix) == 2:
+            cm_summary = {"TN": matrix[0][0], "FP": matrix[0][1], "FN": matrix[1][0], "TP": matrix[1][1]}
+        else:
+            cm_summary = {"matrix_size": f"{len(matrix)}x{len(matrix)}", "classes": data["classes"]}
+    # Class distribution
+    class_dist = {}
+    if ml_session:
+        y_train = ml_session.get("y_train")
+        if y_train is not None:
+            unique, counts = np.unique(y_train, return_counts=True)
+            classes_list = data["classes"]
+            class_dist = {
+                classes_list[int(u)] if int(u) < len(classes_list) else str(u): int(c)
+                for u, c in zip(unique, counts)
+            }
+    # Feature importance from SHAP
+    feature_importance_data = []
+    if shap_data:
+        for fi in shap_data.feature_importances[:10]:  # top 10
+            feature_importance_data.append({
+                "feature": fi.feature_name,
+                "clinical_name": fi.clinical_name,
+                "importance": round(fi.importance, 4),
+                "direction": fi.direction,
+                "clinical_note": fi.clinical_note,
+            })
+    cv_scores = _m("cross_val_scores") or []
+    context = {
+        # Specialty & clinical domain
+        "specialty_name": specialty_info.name if specialty_info else "Unknown",
+        "what_ai_predicts": specialty_info.what_ai_predicts if specialty_info else "clinical outcome",
+        "clinical_context": specialty_info.clinical_context if specialty_info else "",
+        "target_variable": specialty_info.target_variable if specialty_info else "target",
+        "data_source": specialty_info.data_source if specialty_info else "unknown",
+        # Model info
+        "model_type": data["model_type"].value.replace("_", " ").title() if hasattr(data.get("model_type"), "value") else str(data.get("model_type", "unknown")),
+        "model_params": data.get("params", {}),
+        "training_time_ms": data.get("training_time_ms"),
+        # Dataset info
+        "feature_names": data["feature_names"],
+        "classes": data["classes"],
+        "train_size": len(data["X_train"]),
+        "test_size": len(data["X_test"]),
+        "class_distribution_train": class_dist,
+        "use_smote": ml_session.get("smote_applied", False) if ml_session else False,
+        "normalization": ml_session.get("normalization", "N/A") if ml_session else "N/A",
+        "raw_column_meta": ml_session.get("raw_column_meta", []) if ml_session else [],
+        "row_count_original": ml_session.get("row_count", 0) if ml_session else 0,
+        # Performance metrics
+        "accuracy": _m("accuracy"),
+        "sensitivity": _m("sensitivity"),
+        "specificity": _m("specificity"),
+        "precision": _m("precision"),
+        "f1_score": _m("f1_score"),
+        "auc_roc": _m("auc_roc"),
+        "mcc": _m("mcc"),
+        "train_accuracy": _m("train_accuracy"),
+        "cv_scores": cv_scores,
+        "cv_mean": float(sum(cv_scores) / max(len(cv_scores), 1)),
+        "cv_std": float(np.std(cv_scores)) if cv_scores else 0.0,
+        "overfitting_warning": _m("overfitting_warning"),
+        "optimal_threshold": _m("optimal_threshold"),
+        "low_sensitivity_warning": _m("low_sensitivity_warning"),
+        "confusion_matrix": cm_summary,
+        # Explainability / SHAP
+        "shap_method": shap_data.method if shap_data else "unavailable",
+        "feature_importances": feature_importance_data,
+        "top_feature_clinical_note": shap_data.top_feature_clinical_note if shap_data else "",
+        "explained_variance_top5_pct": shap_data.explained_variance_pct if shap_data else 0,
+        # Fairness data
+        "overall_sensitivity": ethics_data.overall_sensitivity,
+        "bias_warnings": [
+            {"group": w.affected_group, "metric": w.metric, "gap": w.gap}
+            for w in ethics_data.bias_warnings
+        ],
+        "subgroup_details": [
+            {
+                "group": sm.group_label,
+                "sensitivity": sm.sensitivity,
+                "accuracy": sm.accuracy,
+                "specificity": sm.specificity,
+                "precision": sm.precision,
+                "f1_score": sm.f1_score,
+                "sample_size": sm.sample_size,
+                "status": sm.status,
+                "status_reason": sm.status_reason,
+            }
+            for sm in ethics_data.subgroup_metrics
+        ],
+    }
+    # Compared models (if user trained multiple models)
+    compared_models = []
+    if session_id:
+        try:
+            compare_data = ml.get_comparison(session_id)
+            for entry in compare_data.entries:
+                compared_models.append({
+                    "model_type": entry.model_type.value.replace("_", " ").title(),
+                    "model_id": entry.model_id,
+                    "accuracy": entry.metrics.accuracy,
+                    "sensitivity": entry.metrics.sensitivity,
+                    "specificity": entry.metrics.specificity,
+                    "auc_roc": entry.metrics.auc_roc,
+                    "f1_score": entry.metrics.f1_score,
+                    "mcc": entry.metrics.mcc,
+                    "training_time_ms": entry.training_time_ms,
+                })
+        except Exception as exc:
+            logger.warning("Comparison data unavailable: %s", exc)
+    logger.info("Insights context: %d compared models", len(compared_models))
+    context["compared_models"] = compared_models
+    # Feature column statistics (distributions for clinical grounding)
+    column_stats = []
+    X_train = data["X_train"]
+    for i, fname in enumerate(data["feature_names"]):
+        col_info: dict[str, Any] = {"name": fname}
+        try:
+            col = X_train[:, i] if hasattr(X_train, "shape") else X_train.iloc[:, i]
+            col_info["mean"] = round(float(np.mean(col)), 3)
+            col_info["std"] = round(float(np.std(col)), 3)
+            col_info["min"] = round(float(np.min(col)), 3)
+            col_info["max"] = round(float(np.max(col)), 3)
+        except Exception:
+            pass
+        column_stats.append(col_info)
+    context["column_statistics"] = column_stats
+    # Sample rows from test set (real patient data for LLM grounding)
+    feature_names = data["feature_names"]
+    classes = data["classes"]
+    X_test = data["X_test"]
+    y_test = data["y_test"]
+    sample_rows = []
+    n_samples = min(5, len(X_test))
+    # Pick diverse samples: some positive, some negative
+    try:
+        pos_idx = [i for i in range(len(y_test)) if int(y_test[i]) == 1]
+        neg_idx = [i for i in range(len(y_test)) if int(y_test[i]) == 0]
+        pick = (pos_idx[:3] + neg_idx[:2])[:n_samples] if pos_idx and neg_idx else list(range(n_samples))
+        for idx in pick:
+            row = {}
+            for j, fname in enumerate(feature_names):
+                val = X_test[idx, j] if hasattr(X_test, "shape") else X_test.iloc[idx, j]
+                row[fname] = round(float(val), 3)
+            row["_actual_outcome"] = classes[int(y_test[idx])] if int(y_test[idx]) < len(classes) else str(y_test[idx])
+            sample_rows.append(row)
+    except Exception:
+        pass
+    context["sample_patients"] = sample_rows
+    # EU AI Act static items for enrichment
+    from app.services.ethics_service import EU_AI_ACT_ITEMS
+    context["eu_ai_act_items"] = EU_AI_ACT_ITEMS
+    try:
+        ethics_task = insight_svc.generate_ethics_insight(context)
+        cases_task = insight_svc.generate_case_studies(context)
+        eu_act_task = insight_svc.generate_eu_ai_act_insights(context)
+        ethics_result, cases_result, eu_act_result = await asyncio.gather(
+            ethics_task, cases_task, eu_act_task
+        )
+        return {
+            "ethics_insight": ethics_result,
+            "case_studies": cases_result,
+            "eu_ai_act_insights": eu_act_result,
+        }
+    except Exception as exc:
+        logger.exception("Insight generation failed")
+        raise HTTPException(status_code=500, detail=str(exc))
+@router.post("/generate-certificate")
+def generate_certificate(request: Request, body: CertificateRequest) -> StreamingResponse:
+    """Step-7 endpoint — renders the EU AI Act compliance PDF via `CertificateService`."""
+    ml, _, ethics, cert_svc, _ = _get_services(request)
+    data = _get_model_data(ml, body.model_id)
+    # Rebuild metrics from stored model
+    metrics = data.get("metrics")
+    if metrics is None:
+        raise HTTPException(status_code=422, detail="Model metrics not available. Train the model first.")
+    ethics_data = ethics.analyze_bias(
+        model_id=body.model_id,
+        model=data["model"],
+        X_test=data["X_test"],
+        y_test=data["y_test"],
+        feature_names=data["feature_names"],
+        classes=data["classes"],
+        X_train=data["X_train"],
+        scaler=data.get("scaler"),
+    )
+    session_id = data.get("session_id", "")
+    specialty_name = "Healthcare ML"
+    ml_session = ml.get_session(session_id)
+    if ml_session:
+        from app.services.specialty_registry import SPECIALTIES
+        sid = ml_session.get("specialty_id", "")
+        spec = SPECIALTIES.get(sid)
+        if spec:
+            specialty_name = spec.name
+    try:
+        pdf_bytes = cert_svc.generate_pdf(
+            cert_request=body,
+            metrics=metrics,
+            ethics=ethics_data,
+            specialty_name=specialty_name,
+            model_type=data["model_type"],
+        )
+    except Exception as exc:
+        logger.exception("Certificate generation failed")
+        raise HTTPException(status_code=500, detail=str(exc))
+    return StreamingResponse(
+        iter([pdf_bytes]),
+        media_type="application/pdf",
+        headers={"Content-Disposition": f'attachment; filename="ml_certificate_{body.model_id[:8]}.pdf"'},
+    )

app/routers/ml_router.py ADDED Viewed

	@@ -0,0 +1,92 @@

+"""ML model training and evaluation REST endpoints."""
+from __future__ import annotations
+import logging
+from fastapi import APIRouter, HTTPException, Request, status
+from fastapi.responses import Response
+from app.models.ml_schemas import (
+    CompareResponse,
+    ModelType,
+    TrainRequest,
+    TrainResponse,
+)
+logger = logging.getLogger(__name__)
+router = APIRouter(prefix="/api", tags=["ml"])
+def _get_ml_service(request: Request):
+    """FastAPI dependency — resolves the shared `MLService` off `app.state`."""
+    return request.app.state.ml_service
+@router.post("/train", response_model=TrainResponse)
+def train_model(request: Request, body: TrainRequest) -> TrainResponse:
+    """Step-4 endpoint — trains the chosen classifier on the prepared session data and returns metrics."""
+    ml = _get_ml_service(request)
+    session = ml.get_session(body.session_id)
+    if session is None:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"Session '{body.session_id}' not found. Run /api/prepare first.",
+        )
+    try:
+        response = ml.train_and_evaluate(
+            body.session_id, body.model_type, body.params,
+            tune=body.tune,
+            use_feature_selection=body.use_feature_selection,
+        )
+    except Exception as exc:
+        logger.exception("Model training failed")
+        raise HTTPException(status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, detail=str(exc))
+    # Cache metrics for comparison
+    ml.store_train_response_in_model(response.model_id, response)
+    return response
+@router.post("/compare/{model_id}", response_model=CompareResponse)
+def add_to_comparison(request: Request, model_id: str) -> CompareResponse:
+    """Step-4 endpoint — adds the latest trained model to the cross-model comparison list."""
+    ml = _get_ml_service(request)
+    model_data = ml.get_model(model_id)
+    if model_data is None:
+        raise HTTPException(status_code=404, detail=f"Model '{model_id}' not found")
+    session_id = model_data.get("session_id", "")
+    try:
+        return ml.add_to_comparison(session_id, model_id)
+    except Exception as exc:
+        raise HTTPException(status_code=422, detail=str(exc))
+@router.get("/compare/{session_id}", response_model=CompareResponse)
+def get_comparison(request: Request, session_id: str) -> CompareResponse:
+    """Step-4 endpoint — returns the current comparison list for the session."""
+    ml = _get_ml_service(request)
+    return ml.get_comparison(session_id)
+@router.delete("/compare/{session_id}", status_code=204, response_model=None)
+def clear_comparison(request: Request, session_id: str):
+    """Step-4 endpoint — empties the comparison list for the session."""
+    _get_ml_service(request).clear_comparison(session_id)
+    return Response(status_code=204)
+@router.get("/models/{model_id}")
+def get_model_info(request: Request, model_id: str) -> dict:
+    """Step-4 endpoint — returns stored metrics for a specific model id."""
+    ml = _get_ml_service(request)
+    data = ml.get_model(model_id)
+    if data is None:
+        raise HTTPException(status_code=404, detail=f"Model '{model_id}' not found")
+    return {
+        "model_id": model_id,
+        "model_type": data.get("model_type"),
+        "params": data.get("params"),
+        "session_id": data.get("session_id"),
+        "feature_names": data.get("feature_names"),
+        "classes": data.get("classes"),
+    }

app/services/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Service layer — one singleton per concern, attached to `app.state` in `main.py`."""

app/services/certificate_service.py ADDED Viewed

	@@ -0,0 +1,690 @@

+"""PDF certificate generation using ReportLab."""
+from __future__ import annotations
+import datetime
+import math
+from io import BytesIO
+from typing import Optional
+from reportlab.lib import colors
+from reportlab.lib.pagesizes import A4
+from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet
+from reportlab.lib.units import cm
+from reportlab.platypus import (
+    HRFlowable,
+    Paragraph,
+    SimpleDocTemplate,
+    Spacer,
+    Table,
+    TableStyle,
+)
+from reportlab.platypus.flowables import Flowable
+from app.models.explain_schemas import CertificateRequest, EthicsResponse
+from app.models.ml_schemas import MetricsResponse, ModelType
+# Colour palette — using the app's green as PRIMARY
+PRIMARY = colors.HexColor("#1A7A4C")
+PRIMARY_DARK = colors.HexColor("#145E39")
+PRIMARY_LIGHT = colors.HexColor("#E8F5EE")
+SUCCESS = colors.HexColor("#1A7A4C")
+SUCCESS_BG = colors.HexColor("#F0FDF4")
+WARNING = colors.HexColor("#92400E")
+WARNING_BG = colors.HexColor("#FFF7ED")
+DANGER = colors.HexColor("#991B1B")
+DANGER_BG = colors.HexColor("#FFF1F2")
+LIGHT_GREY = colors.HexColor("#F4F7FB")
+MID_GREY = colors.HexColor("#DDE3EC")
+DARK_TEXT = colors.HexColor("#172B4D")
+ACCENT = colors.HexColor("#0EA5E9")
+MODEL_LABELS = {
+    ModelType.KNN: "K-Nearest Neighbours (KNN)",
+    ModelType.SVM: "Support Vector Machine (SVM)",
+    ModelType.DECISION_TREE: "Decision Tree",
+    ModelType.RANDOM_FOREST: "Random Forest",
+    ModelType.LOGISTIC_REGRESSION: "Logistic Regression",
+    ModelType.NAIVE_BAYES: "Naïve Bayes",
+    ModelType.XGBOOST: "XGBoost (Extreme Gradient Boosting)",
+    ModelType.LIGHTGBM: "LightGBM (Light Gradient Boosting)",
+}
+# ---------------------------------------------------------------------------
+# Custom flowable: full-width coloured banner block
+# ---------------------------------------------------------------------------
+class _BannerBlock(Flowable):
+    """Draws a filled rectangle spanning the full page width at the top."""
+    def __init__(self, width: float, height: float, bg_color: colors.Color,
+                 title: str):
+        """Store the label + colour so the flowable is self-contained during layout."""
+        super().__init__()
+        self.width = width
+        self.height = height
+        self.bg_color = bg_color
+        self.title = title
+    def draw(self):
+        """Render the rectangle + label onto the current canvas."""
+        c = self.canv
+        c.setFillColor(self.bg_color)
+        c.rect(0, 0, self.width, self.height, fill=1, stroke=0)
+        c.setFillColor(PRIMARY_DARK)
+        c.rect(0, 0, self.width, 3, fill=1, stroke=0)
+        c.setFillColor(colors.white)
+        c.setFont("Helvetica-Bold", 22)
+        c.drawCentredString(self.width / 2, self.height / 2 + 2, self.title)
+class _BorderFrame(Flowable):
+    """Draws a decorative double-line border around the page."""
+    def __init__(self, page_width: float, page_height: float,
+                 margin: float, color: colors.Color):
+        """Store the inner flowables + border colour."""
+        super().__init__()
+        self.page_width = page_width
+        self.page_height = page_height
+        self.margin = margin
+        self.color = color
+        self.width = 0
+        self.height = 0
+    def draw(self):
+        """Draw the border + delegate inner rendering to the wrapped flowables."""
+        c = self.canv
+        m = self.margin
+        pw, ph = self.page_width, self.page_height
+        c.setStrokeColor(self.color)
+        # Outer border
+        c.setLineWidth(2.5)
+        c.rect(m - 8, m - 8, pw - 2 * (m - 8), ph - 2 * (m - 8),
+               fill=0, stroke=1)
+        # Inner border (inset by 4 pts)
+        c.setLineWidth(0.8)
+        c.rect(m - 4, m - 4, pw - 2 * (m - 4), ph - 2 * (m - 4),
+               fill=0, stroke=1)
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+def _metric_colour(value: float, green: float, amber: float) -> colors.Color:
+    """Pick a banner colour for a metric value (green/amber/red) based on configured thresholds."""
+    if value >= green:
+        return SUCCESS
+    if value >= amber:
+        return WARNING
+    return DANGER
+def _pct(value: float) -> str:
+    """Format a 0..1 number as a one-decimal percentage string."""
+    return f"{value * 100:.1f}%"
+def _row_bg(val: float, green: float, amber: float) -> colors.Color:
+    """Alternate row background colour for zebra-striped tables."""
+    if val >= green:
+        return SUCCESS_BG
+    if val >= amber:
+        return WARNING_BG
+    return DANGER_BG
+def _compute_mcc(tp: int, tn: int, fp: int, fn: int) -> Optional[float]:
+    """Compute Matthews Correlation Coefficient from a confusion matrix row."""
+    denom = math.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))
+    if denom == 0:
+        return None
+    return (tp * tn - fp * fn) / denom
+def _generate_takeaways(metrics: MetricsResponse, model_type: ModelType) -> list[str]:
+    """Auto-generate bullet-point takeaways from model metrics."""
+    bullets: list[str] = []
+    model_label = MODEL_LABELS.get(model_type, str(model_type))
+    # Sensitivity (clinical priority)
+    if metrics.sensitivity >= 0.85:
+        bullets.append(
+            f"Excellent sensitivity ({_pct(metrics.sensitivity)}): the model correctly identifies the "
+            "large majority of positive cases, making it well-suited for clinical screening."
+        )
+    elif metrics.sensitivity >= 0.70:
+        bullets.append(
+            f"Acceptable sensitivity ({_pct(metrics.sensitivity)}): most positive cases are detected, "
+            "though some missed diagnoses remain possible."
+        )
+    else:
+        bullets.append(
+            f"Low sensitivity ({_pct(metrics.sensitivity)}): the model misses a substantial proportion "
+            "of positive cases — not recommended for screening without further tuning."
+        )
+    # Specificity
+    if metrics.specificity >= 0.85:
+        bullets.append(
+            f"High specificity ({_pct(metrics.specificity)}): very few healthy patients are incorrectly "
+            "flagged, reducing unnecessary follow-up burden."
+        )
+    elif metrics.specificity < 0.65:
+        bullets.append(
+            f"Below-average specificity ({_pct(metrics.specificity)}): a notable false-positive rate "
+            "could lead to unnecessary investigations in healthy patients."
+        )
+    # AUC
+    if metrics.auc_roc >= 0.90:
+        bullets.append(
+            f"Outstanding discrimination (AUC = {_pct(metrics.auc_roc)}): the model reliably ranks "
+            "positive cases above negative ones across all decision thresholds."
+        )
+    elif metrics.auc_roc >= 0.75:
+        bullets.append(
+            f"Good discriminative ability (AUC = {_pct(metrics.auc_roc)}): the model provides useful "
+            "separation between classes across operating points."
+        )
+    else:
+        bullets.append(
+            f"Weak discrimination (AUC = {_pct(metrics.auc_roc)}): the model struggles to separate "
+            "positive from negative cases and should be improved before deployment."
+        )
+    # Overfitting warning
+    if metrics.overfitting_warning:
+        gap = metrics.train_accuracy - metrics.accuracy
+        bullets.append(
+            f"Overfitting detected: training accuracy ({_pct(metrics.train_accuracy)}) is considerably "
+            f"higher than test accuracy ({_pct(metrics.accuracy)}, gap = {gap * 100:.1f} pp). "
+            "Consider regularisation, pruning, or collecting more data."
+        )
+    else:
+        bullets.append(
+            f"Generalisation is healthy: the gap between training ({_pct(metrics.train_accuracy)}) "
+            f"and test accuracy ({_pct(metrics.accuracy)}) is within acceptable bounds."
+        )
+    # MCC
+    if hasattr(metrics, "mcc") and metrics.mcc is not None:
+        mcc = metrics.mcc
+        if mcc >= 0.6:
+            bullets.append(
+                f"Strong overall balance (MCC = {mcc:.3f}): the model performs well even if class "
+                "sizes are imbalanced."
+            )
+        elif mcc >= 0.3:
+            bullets.append(
+                f"Moderate overall balance (MCC = {mcc:.3f}): the model shows some robustness to "
+                "class imbalance, but there is room for improvement."
+            )
+        else:
+            bullets.append(
+                f"Poor balance score (MCC = {mcc:.3f}): the model may be biased toward the majority "
+                "class. Consider resampling or adjusted class weights."
+            )
+    # Cross-val stability
+    if metrics.cross_val_scores:
+        cv_mean = sum(metrics.cross_val_scores) / len(metrics.cross_val_scores)
+        cv_std = math.sqrt(
+            sum((x - cv_mean) ** 2 for x in metrics.cross_val_scores)
+            / len(metrics.cross_val_scores)
+        )
+        if cv_std <= 0.03:
+            bullets.append(
+                f"{len(metrics.cross_val_scores)}-fold cross-validation shows very stable performance "
+                f"(mean {_pct(cv_mean)} ± {cv_std * 100:.1f} pp), indicating the result is unlikely "
+                "to be a lucky split."
+            )
+        elif cv_std <= 0.06:
+            bullets.append(
+                f"{len(metrics.cross_val_scores)}-fold cross-validation shows moderate variability "
+                f"(mean {_pct(cv_mean)} ± {cv_std * 100:.1f} pp). "
+                "The model is reasonably stable across data splits."
+            )
+        else:
+            bullets.append(
+                f"{len(metrics.cross_val_scores)}-fold cross-validation shows high variability "
+                f"(mean {_pct(cv_mean)} ± {cv_std * 100:.1f} pp). "
+                "Performance may depend heavily on how the data is split."
+            )
+    # Model-specific notes
+    if model_type in (ModelType.RANDOM_FOREST, ModelType.XGBOOST, ModelType.LIGHTGBM):
+        bullets.append(
+            f"{model_label} is an ensemble method that aggregates many weak learners; "
+            "feature-importance outputs are available for clinical interpretability."
+        )
+    elif model_type == ModelType.LOGISTIC_REGRESSION:
+        bullets.append(
+            "Logistic Regression produces calibrated probabilities and fully interpretable "
+            "coefficients, making it a strong baseline for clinical audit."
+        )
+    elif model_type == ModelType.DECISION_TREE:
+        bullets.append(
+            "Decision Trees are highly interpretable but prone to overfitting on small datasets; "
+            "examine the max-depth parameter if overfitting is observed."
+        )
+    return bullets
+# ---------------------------------------------------------------------------
+# Certificate service
+# ---------------------------------------------------------------------------
+class CertificateService:
+    """
+    Produces the EU AI Act compliance PDF (overview, fairness, explainability, checklist,
+    signatures) via reportlab.
+    """
+    def generate_pdf(
+        self,
+        cert_request: CertificateRequest,
+        metrics: MetricsResponse,
+        ethics: EthicsResponse,
+        specialty_name: str,
+        model_type: ModelType,
+        training_time_ms: Optional[float] = None,
+    ) -> bytes:
+        """Main entrypoint — build the full PDF for a session and return it as bytes."""
+        buf = BytesIO()
+        PAGE_W, PAGE_H = A4
+        MARGIN = 2 * cm
+        doc = SimpleDocTemplate(
+            buf,
+            pagesize=A4,
+            leftMargin=MARGIN,
+            rightMargin=MARGIN,
+            topMargin=MARGIN,
+            bottomMargin=2.2 * cm,
+        )
+        CONTENT_W = PAGE_W - 2 * MARGIN
+        styles = getSampleStyleSheet()
+        h2 = ParagraphStyle(
+            "H2", parent=styles["Heading2"],
+            fontSize=13, textColor=PRIMARY_DARK, spaceBefore=16, spaceAfter=5,
+            borderPad=3,
+        )
+        body = ParagraphStyle(
+            "Body", parent=styles["Normal"],
+            fontSize=10, textColor=DARK_TEXT, leading=14,
+        )
+        body_center = ParagraphStyle(
+            "BodyCenter", parent=body,
+            alignment=1,
+        )
+        small = ParagraphStyle(
+            "Small", parent=styles["Normal"],
+            fontSize=8, textColor=colors.HexColor("#6B7280"), leading=11,
+        )
+        small_center = ParagraphStyle(
+            "SmallCenter", parent=small,
+            alignment=1,
+        )
+        disclaimer_style = ParagraphStyle(
+            "Disclaimer", parent=small,
+            textColor=DANGER, alignment=1, leading=11,
+        )
+        bullet_style = ParagraphStyle(
+            "Bullet", parent=styles["Normal"],
+            fontSize=9, textColor=DARK_TEXT, leading=13,
+            leftIndent=14, firstLineIndent=-10,
+        )
+        cell8 = ParagraphStyle(
+            "Cell8", parent=styles["Normal"],
+            fontSize=8, textColor=DARK_TEXT, leading=10,
+        )
+        story = []
+        # ---- PAGE BORDER (drawn via canvas callback — we approximate with a table border) ----
+        # We'll use a single-cell table at the very start to act as a framing border.
+        # This works because SimpleDocTemplate renders top to bottom.
+        # A more robust approach uses page templates; here we use a thin top-rule trick.
+        # ---- GREEN HEADER BANNER ----
+        banner = _BannerBlock(
+            width=CONTENT_W,
+            height=1.8 * cm,
+            bg_color=PRIMARY,
+            title="HEALTH-AI · ML Learning Tool",
+        )
+        story.append(banner)
+        story.append(Spacer(1, 0.4 * cm))
+        issued_to = cert_request.clinician_name or "Healthcare Professional"
+        institution = cert_request.institution or "Healthcare Institution"
+        today = datetime.date.today().strftime("%d %B %Y")
+        story.append(Paragraph(
+            f"This certificate is issued to <b>{issued_to}</b> of <b>{institution}</b> "
+            f"for completing the HEALTH-AI ML Learning Tool educational exercise on <b>{today}</b>.",
+            body_center,
+        ))
+        story.append(Spacer(1, 0.4 * cm))
+        # ---- SECTION 1: Specialty & Model ----
+        story.append(Paragraph("1. Clinical Specialty &amp; AI Model", h2))
+        info_data = [
+            ["Medical Specialty", specialty_name],
+            ["AI Model Type", MODEL_LABELS.get(model_type, str(model_type))],
+            ["Model ID", cert_request.model_id[:24] + ("…" if len(cert_request.model_id) > 24 else "")],
+        ]
+        if training_time_ms is not None:
+            if training_time_ms >= 1000:
+                time_str = f"{training_time_ms / 1000:.2f} s"
+            else:
+                time_str = f"{training_time_ms:.0f} ms"
+            info_data.append(["Training Time", time_str])
+        info_table = Table(info_data, colWidths=[5.5 * cm, 11.5 * cm])
+        info_table.setStyle(TableStyle([
+            ("BACKGROUND", (0, 0), (0, -1), PRIMARY_LIGHT),
+            ("TEXTCOLOR", (0, 0), (-1, -1), DARK_TEXT),
+            ("FONTNAME", (0, 0), (0, -1), "Helvetica-Bold"),
+            ("FONTSIZE", (0, 0), (-1, -1), 9),
+            ("GRID", (0, 0), (-1, -1), 0.5, MID_GREY),
+            ("ROWBACKGROUNDS", (0, 0), (-1, -1), [colors.white, LIGHT_GREY]),
+            ("LEFTPADDING", (0, 0), (-1, -1), 8),
+            ("RIGHTPADDING", (0, 0), (-1, -1), 8),
+            ("TOPPADDING", (0, 0), (-1, -1), 5),
+            ("BOTTOMPADDING", (0, 0), (-1, -1), 5),
+            ("LINEBELOW", (0, -1), (-1, -1), 1.5, PRIMARY),
+        ]))
+        story.append(info_table)
+        story.append(Spacer(1, 0.4 * cm))
+        # ---- SECTION 2: Performance Metrics ----
+        story.append(Paragraph("2. Model Performance Summary", h2))
+        story.append(Paragraph(
+            "Performance measured on held-out test patients the model had never seen during training.",
+            body,
+        ))
+        story.append(Spacer(1, 0.2 * cm))
+        # Resolve MCC: prefer the field on MetricsResponse, fall back to computing from CM
+        mcc_value: Optional[float] = getattr(metrics, "mcc", None)
+        cm_data = metrics.confusion_matrix
+        if mcc_value is None or mcc_value == 0.0:
+            mcc_value = _compute_mcc(cm_data.tp, cm_data.tn, cm_data.fp, cm_data.fn)
+        metric_rows = [
+            ["Metric", "Value", "Threshold", "Status"],
+            ["Accuracy", _pct(metrics.accuracy), "≥ 65 %",
+             "✓  Acceptable" if metrics.accuracy >= 0.65 else "✗  Below threshold"],
+            ["Sensitivity ★", _pct(metrics.sensitivity), "≥ 70 %",
+             "✓  Acceptable" if metrics.sensitivity >= 0.70 else "✗  Below threshold"],
+            ["Specificity", _pct(metrics.specificity), "≥ 65 %",
+             "✓  Acceptable" if metrics.specificity >= 0.65 else "✗  Below threshold"],
+            ["Precision (PPV)", _pct(metrics.precision), "≥ 60 %",
+             "✓  Acceptable" if metrics.precision >= 0.60 else "✗  Below threshold"],
+            ["F1 Score", _pct(metrics.f1_score), "≥ 65 %",
+             "✓  Acceptable" if metrics.f1_score >= 0.65 else "✗  Below threshold"],
+            ["AUC-ROC", _pct(metrics.auc_roc), "≥ 75 %",
+             "✓  Acceptable" if metrics.auc_roc >= 0.75 else "✗  Below threshold"],
+        ]
+        if mcc_value is not None:
+            metric_rows.append([
+                "MCC †", f"{mcc_value:.3f}", "≥ 0.30",
+                "✓  Acceptable" if mcc_value >= 0.30 else "✗  Below threshold",
+            ])
+        # Build per-row background colours
+        perf_vals_thresholds = [
+            (metrics.accuracy, 0.65, 0.55),
+            (metrics.sensitivity, 0.70, 0.50),
+            (metrics.specificity, 0.65, 0.55),
+            (metrics.precision, 0.60, 0.50),
+            (metrics.f1_score, 0.65, 0.55),
+            (metrics.auc_roc, 0.75, 0.65),
+        ]
+        if mcc_value is not None:
+            perf_vals_thresholds.append((mcc_value, 0.30, 0.10))
+        row_bgs = [PRIMARY]  # header row
+        for val, gt, at in perf_vals_thresholds:
+            row_bgs.append(_row_bg(val, gt, at))
+        perf_table = Table(metric_rows, colWidths=[5 * cm, 2.8 * cm, 3.2 * cm, 6 * cm])
+        ts = [
+            ("BACKGROUND", (0, 0), (-1, 0), PRIMARY),
+            ("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
+            ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
+            ("FONTSIZE", (0, 0), (-1, -1), 9),
+            ("GRID", (0, 0), (-1, -1), 0.5, MID_GREY),
+            ("LEFTPADDING", (0, 0), (-1, -1), 8),
+            ("TOPPADDING", (0, 0), (-1, -1), 5),
+            ("BOTTOMPADDING", (0, 0), (-1, -1), 5),
+            ("ALIGN", (1, 0), (2, -1), "CENTER"),
+        ]
+        for i, bg in enumerate(row_bgs):
+            ts.append(("BACKGROUND", (0, i), (-1, i), bg))
+        # Colour the Value and Status columns
+        for i, (val, gt, at) in enumerate(perf_vals_thresholds, start=1):
+            col = SUCCESS if val >= gt else (WARNING if val >= at else DANGER)
+            ts.append(("TEXTCOLOR", (1, i), (1, i), col))
+            ts.append(("FONTNAME", (1, i), (1, i), "Helvetica-Bold"))
+            ts.append(("TEXTCOLOR", (3, i), (3, i), col))
+            ts.append(("FONTNAME", (3, i), (3, i), "Helvetica-Bold"))
+        perf_table.setStyle(TableStyle(ts))
+        story.append(perf_table)
+        story.append(Spacer(1, 0.2 * cm))
+        story.append(Paragraph(
+            "★ Sensitivity (recall) is the most critical metric for clinical screening tools.  "
+            "† MCC (Matthews Correlation Coefficient) accounts for class imbalance.",
+            small,
+        ))
+        story.append(Spacer(1, 0.3 * cm))
+        # ---- Confusion matrix summary ----
+        story.append(Paragraph(
+            "<b>Confusion Matrix Summary</b>",
+            ParagraphStyle("CMHead", parent=body, textColor=PRIMARY_DARK, spaceAfter=4),
+        ))
+        cm_rows = [
+            ["", "Predicted Positive", "Predicted Negative"],
+            [
+                "Actual Positive",
+                f"TP = {cm_data.tp}",
+                f"FN = {cm_data.fn}",
+            ],
+            [
+                "Actual Negative",
+                f"FP = {cm_data.fp}",
+                f"TN = {cm_data.tn}",
+            ],
+        ]
+        cm_table = Table(cm_rows, colWidths=[4.5 * cm, 4.5 * cm, 4.5 * cm])
+        cm_ts = [
+            ("BACKGROUND", (0, 0), (-1, 0), PRIMARY),
+            ("BACKGROUND", (0, 0), (0, -1), PRIMARY_LIGHT),
+            ("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
+            ("TEXTCOLOR", (0, 1), (0, -1), PRIMARY_DARK),
+            ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
+            ("FONTNAME", (0, 1), (0, -1), "Helvetica-Bold"),
+            ("FONTSIZE", (0, 0), (-1, -1), 9),
+            ("ALIGN", (1, 0), (-1, -1), "CENTER"),
+            ("ALIGN", (0, 0), (0, -1), "RIGHT"),
+            ("GRID", (0, 0), (-1, -1), 0.5, MID_GREY),
+            ("TOPPADDING", (0, 0), (-1, -1), 5),
+            ("BOTTOMPADDING", (0, 0), (-1, -1), 5),
+            ("LEFTPADDING", (0, 0), (-1, -1), 8),
+            # TP cell — green
+            ("BACKGROUND", (1, 1), (1, 1), SUCCESS_BG),
+            ("TEXTCOLOR", (1, 1), (1, 1), SUCCESS),
+            ("FONTNAME", (1, 1), (1, 1), "Helvetica-Bold"),
+            # TN cell — green
+            ("BACKGROUND", (2, 2), (2, 2), SUCCESS_BG),
+            ("TEXTCOLOR", (2, 2), (2, 2), SUCCESS),
+            ("FONTNAME", (2, 2), (2, 2), "Helvetica-Bold"),
+            # FP cell — amber
+            ("BACKGROUND", (1, 2), (1, 2), WARNING_BG),
+            ("TEXTCOLOR", (1, 2), (1, 2), WARNING),
+            ("FONTNAME", (1, 2), (1, 2), "Helvetica-Bold"),
+            # FN cell — red
+            ("BACKGROUND", (2, 1), (2, 1), DANGER_BG),
+            ("TEXTCOLOR", (2, 1), (2, 1), DANGER),
+            ("FONTNAME", (2, 1), (2, 1), "Helvetica-Bold"),
+        ]
+        cm_table.setStyle(TableStyle(cm_ts))
+        story.append(cm_table)
+        story.append(Spacer(1, 0.2 * cm))
+        # Cross-val summary
+        if metrics.cross_val_scores:
+            cv = metrics.cross_val_scores
+            cv_mean = sum(cv) / len(cv)
+            cv_std = math.sqrt(sum((x - cv_mean) ** 2 for x in cv) / len(cv))
+            cv_min = min(cv)
+            cv_max = max(cv)
+            story.append(Paragraph(
+                f"<b>{len(cv)}-Fold Cross-Validation:</b>  "
+                f"mean accuracy = <b>{_pct(cv_mean)}</b>  |  "
+                f"std = {cv_std * 100:.1f} pp  |  "
+                f"range [{_pct(cv_min)} – {_pct(cv_max)}]",
+                ParagraphStyle("CVLine", parent=small,
+                               textColor=DARK_TEXT, leading=12),
+            ))
+            story.append(Spacer(1, 0.1 * cm))
+        story.append(Spacer(1, 0.4 * cm))
+        # ---- SECTION 3: Bias Findings ----
+        story.append(Paragraph("3. Bias &amp; Fairness Findings", h2))
+        if ethics.bias_warnings:
+            for w in ethics.bias_warnings:
+                story.append(Paragraph(f"⚠  {w.message}", ParagraphStyle(
+                    "Warn", parent=body, textColor=DANGER, spaceAfter=3,
+                )))
+        else:
+            story.append(Paragraph(
+                "✓  No significant bias detected across patient subgroups.",
+                ParagraphStyle("OK", parent=body, textColor=SUCCESS),
+            ))
+        story.append(Spacer(1, 0.2 * cm))
+        subgroup_data = [["Subgroup", "n", "Accuracy", "Sens.", "Spec.", "F1", "Status"]]
+        for sm in ethics.subgroup_metrics:
+            status_sym = {"acceptable": "✓", "review": "⚠", "action_needed": "✗"}.get(sm.status, "?")
+            subgroup_data.append([
+                Paragraph(sm.group_label, cell8),
+                str(sm.sample_size),
+                _pct(sm.accuracy), _pct(sm.sensitivity), _pct(sm.specificity),
+                _pct(sm.f1_score),
+                f"{status_sym}  {sm.status.replace('_', ' ').title()}",
+            ])
+        sg_table = Table(
+            subgroup_data,
+            colWidths=[3.2 * cm, 1.2 * cm, 2.1 * cm, 2.1 * cm, 2.1 * cm, 2.1 * cm, 4.2 * cm],
+        )
+        sg_ts = [
+            ("BACKGROUND", (0, 0), (-1, 0), PRIMARY),
+            ("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
+            ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
+            ("FONTSIZE", (0, 0), (-1, -1), 8),
+            ("GRID", (0, 0), (-1, -1), 0.4, MID_GREY),
+            ("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.white, LIGHT_GREY]),
+            ("LEFTPADDING", (0, 0), (-1, -1), 6),
+            ("TOPPADDING", (0, 0), (-1, -1), 4),
+            ("BOTTOMPADDING", (0, 0), (-1, -1), 4),
+            ("ALIGN", (1, 0), (-1, -1), "CENTER"),
+        ]
+        for i, sm in enumerate(ethics.subgroup_metrics, 1):
+            col = (SUCCESS if sm.status == "acceptable"
+                   else WARNING if sm.status == "review" else DANGER)
+            sg_ts.append(("TEXTCOLOR", (6, i), (6, i), col))
+            sg_ts.append(("FONTNAME", (6, i), (6, i), "Helvetica-Bold"))
+        sg_table.setStyle(TableStyle(sg_ts))
+        story.append(sg_table)
+        story.append(Spacer(1, 0.4 * cm))
+        # ---- SECTION 4: EU AI Act Checklist ----
+        story.append(Paragraph("4. EU AI Act Compliance Checklist", h2))
+        checklist_state = cert_request.checklist_state or {}
+        checklist_data = [["#", "Requirement", "Status"]]
+        for i, item in enumerate(ethics.eu_ai_act_items, 1):
+            is_checked = item.get("pre_checked") or checklist_state.get(item["id"], False)
+            checklist_data.append([
+                str(i),
+                Paragraph(item["text"], cell8),
+                "✓  Complete" if is_checked else "○  Pending",
+            ])
+        cl_table = Table(checklist_data, colWidths=[1 * cm, 14 * cm, 2 * cm])
+        cl_ts = [
+            ("BACKGROUND", (0, 0), (-1, 0), PRIMARY),
+            ("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
+            ("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
+            ("FONTSIZE", (0, 0), (-1, -1), 8),
+            ("GRID", (0, 0), (-1, -1), 0.4, MID_GREY),
+            ("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.white, LIGHT_GREY]),
+            ("LEFTPADDING", (0, 0), (-1, -1), 6),
+            ("TOPPADDING", (0, 0), (-1, -1), 4),
+            ("BOTTOMPADDING", (0, 0), (-1, -1), 4),
+        ]
+        for i, item in enumerate(ethics.eu_ai_act_items, 1):
+            is_checked = item.get("pre_checked") or checklist_state.get(item["id"], False)
+            if is_checked:
+                cl_ts.append(("TEXTCOLOR", (2, i), (2, i), SUCCESS))
+                cl_ts.append(("FONTNAME", (2, i), (2, i), "Helvetica-Bold"))
+            else:
+                cl_ts.append(("TEXTCOLOR", (2, i), (2, i), colors.HexColor("#9CA3AF")))
+        cl_table.setStyle(TableStyle(cl_ts))
+        story.append(cl_table)
+        story.append(Spacer(1, 0.4 * cm))
+        # ---- SECTION 5: Key Takeaways ----
+        story.append(Paragraph("5. Key Takeaways", h2))
+        story.append(Paragraph(
+            "Auto-generated insights based on this model's performance metrics:",
+            ParagraphStyle("TkIntro", parent=body, textColor=colors.HexColor("#4B5563"),
+                           spaceAfter=5),
+        ))
+        takeaways = _generate_takeaways(metrics, model_type)
+        for idx, bullet in enumerate(takeaways, 1):
+            story.append(Paragraph(f"<b>{idx}.</b>  {bullet}", bullet_style))
+            story.append(Spacer(1, 0.1 * cm))
+        story.append(Spacer(1, 0.3 * cm))
+        # ---- FOOTER ----
+        story.append(HRFlowable(width="100%", thickness=1.5, color=PRIMARY,
+                                spaceAfter=4))
+        story.append(HRFlowable(width="100%", thickness=0.5, color=MID_GREY,
+                                spaceAfter=5))
+        story.append(Paragraph(
+            f"Generated: <b>{today}</b>  ·  HEALTH-AI ML Learning Tool v1.5  "
+            "·  Prepared by the HealthWithSevgi Team",
+            small_center,
+        ))
+        story.append(Spacer(1, 0.15 * cm))
+        story.append(Paragraph(
+            "<b>IMPORTANT DISCLAIMER:</b>  This certificate confirms completion of an educational "
+            "exercise only. The AI model described herein is <b>NOT</b> validated for clinical use "
+            "and must <b>NOT</b> be used to inform patient management decisions without appropriate "
+            "prospective clinical validation and regulatory clearance.",
+            disclaimer_style,
+        ))
+        def _add_page_number(canvas, doc_template):
+            """Inner canvas callback that stamps `Page X / N` on every page."""
+            canvas.saveState()
+            canvas.setFont("Helvetica", 7)
+            canvas.setFillColor(colors.HexColor("#9CA3AF"))
+            canvas.drawCentredString(
+                PAGE_W / 2, 1.0 * cm,
+                f"Page {canvas.getPageNumber()}"
+            )
+            canvas.restoreState()
+        doc.build(story, onFirstPage=_add_page_number, onLaterPages=_add_page_number)
+        return buf.getvalue()

app/services/data_service.py ADDED Viewed

	@@ -0,0 +1,1272 @@

+"""Data exploration and preparation service."""
+from __future__ import annotations
+import io
+import logging
+import pathlib
+import uuid
+import zipfile
+from typing import Any
+import numpy as np
+import pandas as pd
+import requests
+from imblearn.over_sampling import SMOTE
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import MinMaxScaler, StandardScaler
+from app.models.schemas import (
+    ColumnStat,
+    DataExplorationResponse,
+    PrepResponse,
+    PrepSettings,
+)
+logger = logging.getLogger(__name__)
+IMBALANCE_RATIO_THRESHOLD = 1.5
+MIN_ROWS = 10
+MAX_UPLOAD_MB = 50
+MAX_TARGET_CLASSES = 20
+_CACHE_DIR = pathlib.Path(__file__).parent.parent.parent / "data_cache"
+class DatasetUnavailableError(Exception):
+    """Raised when a real dataset cannot be loaded and no fallback is allowed."""
+    def __init__(self, name: str, reason: str) -> None:
+        """
+        Load and return the bundled dataset for the `_init__` specialty. Used internally
+        by `DataService._load_specialty_dataset`.
+        """
+        self.dataset_name = name
+        self.reason = reason
+        super().__init__(
+            f"Dataset '{name}' is unavailable: {reason}. "
+            "Please upload your own CSV file or ensure the dataset cache is populated."
+        )
+class DataService:
+    """
+    Owns CSV ingestion, column exploration, and per-specialty preparation
+    (split/normalise/impute/SMOTE).
+    """
+    def __init__(self) -> None:
+        """
+        Load and return the bundled dataset for the `_init__` specialty. Used internally
+        by `DataService._load_specialty_dataset`.
+        """
+        self._session_store: dict[str, dict[str, Any]] = {}
+    # ------------------------------------------------------------------
+    # Real-data download helper
+    # ------------------------------------------------------------------
+    def _fetch_cached(
+        self,
+        name: str,
+        url: str,
+        read_kwargs: dict | None = None,
+    ) -> pd.DataFrame:
+        """Download a dataset from URL, cache locally, return DataFrame.
+        Raises DatasetUnavailableError if the dataset cannot be loaded.
+        """
+        _CACHE_DIR.mkdir(parents=True, exist_ok=True)
+        cache_path = _CACHE_DIR / f"{name}.csv"
+        rk = read_kwargs or {}
+        # Try from cache first
+        if cache_path.exists():
+            try:
+                return pd.read_csv(cache_path, **rk)
+            except Exception as exc:
+                raise DatasetUnavailableError(
+                    name, f"Cached file exists but failed to read: {exc}"
+                ) from exc
+        # Download
+        try:
+            resp = requests.get(url, timeout=20, headers={"User-Agent": "HealthWithSevgi/1.0"})
+            resp.raise_for_status()
+            cache_path.write_bytes(resp.content)
+            logger.info("Downloaded real dataset: %s (%d bytes)", name, len(resp.content))
+            return pd.read_csv(io.BytesIO(resp.content), **rk)
+        except Exception as exc:
+            raise DatasetUnavailableError(
+                name, f"Failed to download from {url}: {exc}"
+            ) from exc
+    # ------------------------------------------------------------------
+    # Exploration
+    # ------------------------------------------------------------------
+    def explore_dataframe(
+        self, df: pd.DataFrame, target_col: str
+    ) -> DataExplorationResponse:
+        """Build per-column statistics for the Step-2 exploration panel."""
+        columns: list[ColumnStat] = []
+        for col in df.columns:
+            series = df[col]
+            missing = int(series.isna().sum())
+            columns.append(
+                ColumnStat(
+                    name=col,
+                    dtype=str(series.dtype),
+                    missing_count=missing,
+                    missing_pct=round(missing / len(df) * 100, 2),
+                    unique_count=int(series.nunique()),
+                    sample_values=series.dropna().head(5).tolist(),
+                )
+            )
+        class_counts: dict[str, int] = {}
+        imbalance_ratio = 1.0
+        imbalance_warning = False
+        if target_col in df.columns:
+            vc = df[target_col].value_counts()
+            class_counts = {str(k): int(v) for k, v in vc.items()}
+            if len(vc) >= 2:
+                imbalance_ratio = round(vc.iloc[0] / vc.iloc[-1], 2)
+                imbalance_warning = imbalance_ratio >= IMBALANCE_RATIO_THRESHOLD
+        return DataExplorationResponse(
+            columns=columns,
+            row_count=len(df),
+            class_distribution=class_counts,
+            imbalance_warning=imbalance_warning,
+            imbalance_ratio=imbalance_ratio,
+            target_col=target_col,
+        )
+    # ------------------------------------------------------------------
+    # Preparation
+    # ------------------------------------------------------------------
+    def prepare_data(
+        self,
+        df: pd.DataFrame,
+        target_col: str,
+        settings: PrepSettings,
+        session_id: str | None = None,
+    ) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, PrepResponse, list[str]]:
+        """
+        Step-3 preparation endpoint — splits, normalises, imputes missing values,
+        optionally applies SMOTE.
+        """
+        if session_id is None:
+            session_id = str(uuid.uuid4())
+        # Drop rows where target is NaN
+        df = df.dropna(subset=[target_col]).copy()
+        # Guard: reject continuous / high-cardinality target columns
+        n_unique = df[target_col].nunique()
+        if n_unique > MAX_TARGET_CLASSES:
+            raise ValueError(
+                f"Target column '{target_col}' has {n_unique} unique values, "
+                f"which looks like a continuous measurement rather than a "
+                f"classification label. Choose a column with at most "
+                f"{MAX_TARGET_CLASSES} distinct classes (e.g. a binary "
+                f"outcome like 0/1)."
+            )
+        # Encode target
+        y_raw = df[target_col]
+        classes = sorted(y_raw.unique().tolist(), key=str)
+        class_to_int = {c: i for i, c in enumerate(classes)}
+        y = y_raw.map(class_to_int).values.astype(int)
+        # Keep only numeric features (drop target + non-numeric)
+        feature_df = df.drop(columns=[target_col])
+        feature_df = feature_df.select_dtypes(include=[np.number])
+        feature_names = list(feature_df.columns)
+        dist_before = {str(k): int((y == v).sum()) for k, v in class_to_int.items()}
+        if settings.missing_strategy == "drop":
+            mask = ~feature_df.isna().any(axis=1)
+            feature_df = feature_df[mask]
+            y = y[mask]
+        elif settings.missing_strategy == "median":
+            feature_df = feature_df.fillna(feature_df.median(numeric_only=True))
+        else:  # mode
+            _mode = feature_df.mode()
+            if not _mode.empty:
+                feature_df = feature_df.fillna(_mode.iloc[0])
+            else:
+                feature_df = feature_df.fillna(feature_df.median(numeric_only=True))
+        X = feature_df.values.astype(float)
+        # --- Train / test split (BEFORE imputation to avoid data leakage) ---
+        # Use stratified split only when every class has at least 2 samples;
+        # otherwise fall back to non-stratified to avoid ValueError.
+        from collections import Counter
+        class_counts_y = Counter(y)
+        min_class_size = min(class_counts_y.values()) if class_counts_y else 0
+        can_stratify = min_class_size >= 2
+        X_train, X_test, y_train, y_test = train_test_split(
+            X, y, test_size=settings.test_size, random_state=42,
+            stratify=y if can_stratify else None,
+        )
+        # --- Handle missing values AFTER split (train-only statistics) ---
+        if settings.missing_strategy == "drop":
+            train_mask = ~pd.DataFrame(X_train).isna().any(axis=1).values
+            test_mask = ~pd.DataFrame(X_test).isna().any(axis=1).values
+            X_train = X_train[train_mask]
+            y_train = y_train[train_mask]
+            X_test = X_test[test_mask]
+            y_test = y_test[test_mask]
+        elif settings.missing_strategy == "median":
+            train_df = pd.DataFrame(X_train, columns=feature_names)
+            medians = train_df.median()
+            X_train = train_df.fillna(medians).values
+            X_test = pd.DataFrame(X_test, columns=feature_names).fillna(medians).values
+        else:  # mode
+            train_df = pd.DataFrame(X_train, columns=feature_names)
+            modes = train_df.mode().iloc[0]
+            X_train = train_df.fillna(modes).values
+            X_test = pd.DataFrame(X_test, columns=feature_names).fillna(modes).values
+        # --- Outlier handling (train statistics applied to test) ---
+        if settings.outlier_handling == "iqr":
+            train_df = pd.DataFrame(X_train, columns=feature_names)
+            Q1 = train_df.quantile(0.25)
+            Q3 = train_df.quantile(0.75)
+            IQR = Q3 - Q1
+            lower = Q1 - 1.5 * IQR
+            upper = Q3 + 1.5 * IQR
+            X_train = train_df.clip(lower=lower, upper=upper, axis=1).values
+            X_test = pd.DataFrame(X_test, columns=feature_names).clip(lower=lower, upper=upper, axis=1).values
+        elif settings.outlier_handling == "zscore_clip":
+            train_df = pd.DataFrame(X_train, columns=feature_names)
+            mean = train_df.mean()
+            std = train_df.std().replace(0, 1)
+            lower = mean - 3 * std
+            upper = mean + 3 * std
+            X_train = train_df.clip(lower=lower, upper=upper, axis=1).values
+            X_test = pd.DataFrame(X_test, columns=feature_names).clip(lower=lower, upper=upper, axis=1).values
+        # Capture raw (pre-scaling) arrays for session storage
+        X_train_raw = X_train.copy()
+        X_test_raw = X_test.copy()
+        # --- Normalisation ---
+        scaler = None
+        normalization_applied = settings.normalization
+        if settings.normalization == "zscore":
+            scaler = StandardScaler()
+        elif settings.normalization == "minmax":
+            scaler = MinMaxScaler()
+        if scaler is not None:
+            X_train = scaler.fit_transform(X_train)
+            X_test = scaler.transform(X_test)
+        # --- SMOTE (training only, supports multi-class) ---
+        smote_applied = False
+        # Filter out classes with fewer than 2 samples to prevent SMOTE ValueError
+        unique, counts = np.unique(y_train, return_counts=True)
+        valid_classes = unique[counts >= 2]
+        if len(valid_classes) < len(unique):
+            logger.warning(
+                "Dropped %d classes with only 1 sample before SMOTE/training.",
+                len(unique) - len(valid_classes)
+            )
+            train_mask = np.isin(y_train, valid_classes)
+            X_train = X_train[train_mask]
+            X_train_raw = X_train_raw[train_mask]
+            y_train = y_train[train_mask]
+            # Also filter test set to only contain classes present in training
+            test_mask = np.isin(y_test, valid_classes)
+            X_test = X_test[test_mask]
+            X_test_raw = X_test_raw[test_mask]
+            y_test = y_test[test_mask]
+        # Re-encode labels to be contiguous (0..n-1) after any class filtering.
+        # This prevents XGBoost/LightGBM "Invalid classes" errors when label
+        # values have gaps (e.g. [0, 2, 5] instead of [0, 1, 2]).
+        remaining_labels = np.unique(np.concatenate([y_train, y_test]))
+        if len(remaining_labels) > 0 and (
+            remaining_labels[-1] != len(remaining_labels) - 1
+            or len(remaining_labels) != int(remaining_labels[-1]) + 1
+        ):
+            label_map = {old: new for new, old in enumerate(sorted(remaining_labels))}
+            y_train = np.array([label_map[v] for v in y_train])
+            y_test = np.array([label_map[v] for v in y_test])
+            # Rebuild classes list and mapping with new contiguous labels
+            old_classes = classes
+            classes = [old_classes[old] for old in sorted(remaining_labels)]
+            class_to_int = {c: i for i, c in enumerate(classes)}
+            logger.info(
+                "Re-encoded %d classes to contiguous labels 0..%d",
+                len(remaining_labels), len(remaining_labels) - 1,
+            )
+        # Preserve pre-SMOTE labels for leak-free CV (after filtering and re-encoding)
+        y_train_original = y_train.copy()
+        unique_classes = np.unique(y_train)
+        if settings.use_smote and len(unique_classes) >= 2:
+            try:
+                min_class_count = min(np.bincount(y_train[y_train >= 0])) if len(y_train) > 0 else 0
+                k_neighbors = max(1, min(5, min_class_count - 1))
+                smote = SMOTE(k_neighbors=k_neighbors, random_state=42)
+                X_train, y_train = smote.fit_resample(X_train, y_train)
+                smote_applied = True
+                logger.info("SMOTE applied — training set resampled to %d samples", len(X_train))
+            except Exception as exc:
+                logger.warning("SMOTE failed: %s — proceeding without resampling", exc)
+        dist_after = {str(k): int((y_train == v).sum()) for k, v in class_to_int.items()}
+        # Bug #1: Build real normalization sample data (first row before vs after)
+        norm_samples: list[dict[str, object]] = []
+        sample_count = min(5, len(feature_names))
+        for i in range(sample_count):
+            before_val = float(X_train_raw[0, i]) if len(X_train_raw) > 0 else 0.0
+            after_val = float(X_train[0, i]) if len(X_train) > 0 else 0.0
+            norm_samples.append({
+                "feature": feature_names[i],
+                "before": round(before_val, 2),
+                "after": round(after_val, 3),
+            })
+        response = PrepResponse(
+            session_id=session_id,
+            train_size=int(len(X_train)),
+            test_size=int(len(X_test)),
+            features_count=len(feature_names),
+            class_distribution_before=dist_before,
+            class_distribution_after=dist_after,
+            smote_applied=smote_applied,
+            normalization_applied=normalization_applied,
+            norm_samples=norm_samples,
+        )
+        # Column metadata from raw DataFrame (before preprocessing)
+        raw_column_meta = []
+        for col in df.columns:
+            series = df[col]
+            raw_column_meta.append({
+                "name": col,
+                "dtype": str(series.dtype),
+                "missing_count": int(series.isna().sum()),
+                "missing_pct": round(series.isna().sum() / len(df) * 100, 2),
+                "unique_count": int(series.nunique()),
+                "sample_values": [str(v) for v in series.dropna().head(3).tolist()],
+                "is_target": col == target_col,
+            })
+        # Persist to session store
+        self._session_store[session_id] = {
+            "X_train": X_train,
+            "X_test": X_test,
+            "y_train": y_train,
+            "y_test": y_test,
+            "feature_names": feature_names,
+            "classes": [str(c) for c in classes],
+            "scaler": scaler,
+            "X_train_raw": X_train_raw,
+            "X_test_raw": X_test_raw,
+            "normalization": settings.normalization,
+            "y_train_original": y_train_original,
+            "smote_applied": smote_applied,
+            "raw_column_meta": raw_column_meta,
+            "row_count": len(df),
+        }
+        logger.info(
+            "Session %s prepared — train=%d, test=%d, features=%d",
+            session_id,
+            len(X_train),
+            len(X_test),
+            len(feature_names),
+        )
+        return X_train, X_test, y_train, y_test, response, feature_names
+    def get_session(self, session_id: str) -> dict[str, Any] | None:
+        """Return the prepared session bundle by id; `None` when the session is unknown."""
+        return self._session_store.get(session_id)
+    # ------------------------------------------------------------------
+    # Built-in example datasets
+    # ------------------------------------------------------------------
+    def get_example_dataset(self, specialty_id: str) -> pd.DataFrame:
+        """Return the bundled example dataframe for a specialty (cached after first load)."""
+        generators: dict[str, Any] = {
+            "cardiology_hf": self._heart_failure,
+            "radiology_pneumonia": self._pneumonia,
+            "nephrology_ckd": self._ckd,
+            "oncology_breast": self._breast_cancer,
+            "neurology_parkinsons": self._parkinsons,
+            "endocrinology_diabetes": self._diabetes,
+            "hepatology_liver": self._liver,
+            "cardiology_stroke": self._stroke,
+            "mental_health": self._mental_health,
+            "pulmonology_copd": self._copd,
+            "haematology_anaemia": self._anaemia,
+            "dermatology": self._dermatology,
+            "ophthalmology": self._ophthalmology,
+            "orthopaedics": self._orthopaedics,
+            "icu_sepsis": self._sepsis,
+            "obstetrics_fetal": self._fetal_health,
+            "cardiology_arrhythmia": self._arrhythmia,
+            "oncology_cervical": self._cervical,
+            "thyroid": self._thyroid,
+            "pharmacy_readmission": self._readmission,
+        }
+        gen = generators.get(specialty_id)
+        if gen is None:
+            raise DatasetUnavailableError(specialty_id, f"Unknown specialty ID '{specialty_id}'")
+        df = gen()
+        logger.info("Example dataset generated for '%s': %d rows", specialty_id, len(df))
+        return df
+    # ------ Dataset generators ------
+    def _heart_failure(self) -> pd.DataFrame:
+        """
+        Load and return the bundled dataset for the `heart_failure` specialty. Used
+        internally by `DataService._load_specialty_dataset`.
+        """
+        df = self._fetch_cached(
+            "cardiology_hf",
+            "https://archive.ics.uci.edu/ml/machine-learning-databases/00519/heart_failure_clinical_records_dataset.csv",
+        )
+        if "DEATH_EVENT" not in df.columns:
+            raise DatasetUnavailableError("cardiology_hf", "Missing required column 'DEATH_EVENT'")
+        return df
+    def _breast_cancer(self) -> pd.DataFrame:
+        """
+        Load and return the bundled dataset for the `breast_cancer` specialty. Used
+        internally by `DataService._load_specialty_dataset`.
+        """
+        from sklearn.datasets import load_breast_cancer
+        data = load_breast_cancer(as_frame=True)
+        df = data.frame.copy()
+        df["diagnosis"] = data.target.map({1: "B", 0: "M"})
+        df = df.drop(columns=["target"])
+        # Normalise column names: replace spaces with underscores
+        df.columns = [c.replace(" ", "_") for c in df.columns]
+        # Select the 14 registered features (mean + worst geometry/texture only)
+        keep = [
+            "mean_radius", "mean_texture", "mean_perimeter", "mean_area",
+            "mean_smoothness", "mean_compactness", "mean_concavity",
+            "mean_concave_points", "mean_symmetry", "worst_radius",
+            "worst_texture", "worst_perimeter", "worst_area", "worst_smoothness",
+            "diagnosis",
+        ]
+        available = [c for c in keep if c in df.columns]
+        return df[available]
+    def _diabetes(self) -> pd.DataFrame:
+        """
+        Load and return the bundled dataset for the `diabetes` specialty. Used internally
+        by `DataService._load_specialty_dataset`.
+        """
+        pima_cols = [
+            "pregnancies", "glucose", "blood_pressure", "skin_thickness",
+            "insulin", "bmi", "diabetes_pedigree_function", "age", "Outcome",
+        ]
+        df = self._fetch_cached(
+            "endocrinology_diabetes",
+            "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv",
+            read_kwargs={"header": None, "names": pima_cols},
+        )
+        if "Outcome" not in df.columns:
+            raise DatasetUnavailableError("endocrinology_diabetes", "Missing required column 'Outcome'")
+        return df
+    def _ckd(self) -> pd.DataFrame:
+        """
+        Load and return the bundled dataset for the `ckd` specialty. Used internally by
+        `DataService._load_specialty_dataset`.
+        """
+        _CACHE_DIR.mkdir(parents=True, exist_ok=True)
+        csv_cache = _CACHE_DIR / "nephrology_ckd.csv"
+        if not csv_cache.exists():
+            raise DatasetUnavailableError("nephrology_ckd", f"Cache file not found: {csv_cache}")
+        df = pd.read_csv(csv_cache)
+        rename_map = {
+            "bp": "blood_pressure", "sg": "specific_gravity",
+            "al": "albumin", "su": "sugar",
+            "rbc": "red_blood_cells", "pc": "pus_cell",
+            "bgr": "blood_glucose_random", "bu": "blood_urea",
+            "sc": "serum_creatinine", "sod": "sodium",
+            "pot": "potassium", "hemo": "haemoglobin",
+            "pcv": "packed_cell_volume", "wc": "white_blood_cell_count",
+            "rc": "red_blood_cell_count",
+            "htn": "hypertension", "dm": "diabetes_mellitus",
+            "cad": "coronary_artery_disease",
+            "appet": "appetite", "pe": "pedal_oedema", "ane": "anemia",
+            "class": "classification",
+        }
+        df = df.rename(columns={k: v for k, v in rename_map.items() if k in df.columns})
+        if "classification" not in df.columns:
+            raise DatasetUnavailableError("nephrology_ckd", "Missing required column 'classification'")
+        df["classification"] = df["classification"].astype(str).str.strip().str.rstrip(".")
+        df = df[df["classification"].isin(["ckd", "notckd"])].copy()
+        for col in df.columns:
+            if col != "classification":
+                df[col] = pd.to_numeric(df[col], errors="coerce")
+        if len(df) < 100:
+            raise DatasetUnavailableError("nephrology_ckd", f"Dataset too small ({len(df)} rows)")
+        return df
+    def _parkinsons(self) -> pd.DataFrame:
+        """
+        Load and return the bundled dataset for the `parkinsons` specialty. Used
+        internally by `DataService._load_specialty_dataset`.
+        """
+        df = self._fetch_cached(
+            "neurology_parkinsons",
+            "https://archive.ics.uci.edu/ml/machine-learning-databases/parkinsons/parkinsons.data",
+        )
+        if "name" in df.columns:
+            df = df.drop(columns=["name"])
+        col_rename = {
+            "MDVP:Fo(Hz)": "MDVP_Fo_Hz",
+            "MDVP:Fhi(Hz)": "MDVP_Fhi_Hz",
+            "MDVP:Flo(Hz)": "MDVP_Flo_Hz",
+            "MDVP:Jitter(%)": "MDVP_Jitter_pct",
+            "MDVP:Jitter(Abs)": "MDVP_Jitter_Abs",
+            "MDVP:RAP": "MDVP_RAP",
+            "MDVP:PPQ": "MDVP_PPQ",
+            "Jitter:DDP": "Jitter_DDP",
+            "MDVP:Shimmer": "MDVP_Shimmer",
+            "MDVP:Shimmer(dB)": "MDVP_Shimmer_dB",
+            "Shimmer:APQ3": "Shimmer_APQ3",
+            "Shimmer:APQ5": "Shimmer_APQ5",
+            "MDVP:APQ": "MDVP_APQ",
+            "Shimmer:DDA": "Shimmer_DDA",
+        }
+        df = df.rename(columns={k: v for k, v in col_rename.items() if k in df.columns})
+        if "status" not in df.columns:
+            raise DatasetUnavailableError("neurology_parkinsons", "Missing required column 'status'")
+        return df
+    def _liver(self) -> pd.DataFrame:
+        """
+        Load and return the bundled dataset for the `liver` specialty. Used internally by
+        `DataService._load_specialty_dataset`.
+        """
+        ilpd_cols = [
+            "age", "gender", "total_bilirubin", "direct_bilirubin",
+            "alkaline_phosphotase", "alamine_aminotransferase",
+            "aspartate_aminotransferase", "total_proteins",
+            "albumin", "albumin_globulin_ratio", "Dataset",
+        ]
+        df = self._fetch_cached(
+            "hepatology_liver",
+            "https://archive.ics.uci.edu/ml/machine-learning-databases/00225/Indian%20Liver%20Patient%20Dataset%20(ILPD).csv",
+            read_kwargs={"header": None, "names": ilpd_cols},
+        )
+        if "Dataset" not in df.columns:
+            raise DatasetUnavailableError("hepatology_liver", "Missing required column 'Dataset'")
+        if df["gender"].dtype == object:
+            df["gender"] = (df["gender"] == "Male").astype(int)
+        df["albumin_globulin_ratio"] = df["albumin_globulin_ratio"].fillna(
+            df["albumin_globulin_ratio"].median()
+        )
+        return df
+    def _stroke(self) -> pd.DataFrame:
+        """
+        Load and return the bundled dataset for the `stroke` specialty. Used internally by
+        `DataService._load_specialty_dataset`.
+        """
+        try:
+            df = self._fetch_cached(
+                "cardiology_stroke",
+                "https://raw.githubusercontent.com/04-aditya/Stroke-Prediction-using-R/main/healthcare-dataset-stroke-data.csv",
+            )
+        except DatasetUnavailableError:
+            raise DatasetUnavailableError(
+                "cardiology_stroke",
+                "This dataset has no formal open license and cannot be bundled. "
+                "It must be downloaded at runtime for educational use only, "
+                "but the download failed. Check your network connection.",
+            )
+        if "stroke" not in df.columns:
+            raise DatasetUnavailableError(
+                "cardiology_stroke",
+                "Missing required column 'stroke'. "
+                "This dataset has no formal open license and cannot be bundled. "
+                "It will be downloaded at runtime for educational use only.",
+            )
+        if "id" in df.columns:
+            df = df.drop(columns=["id"])
+        cat_encodings: dict[str, dict] = {
+            "gender": {"Male": 1, "Female": 0, "Other": 0},
+            "ever_married": {"Yes": 1, "No": 0},
+            "work_type": {"children": 0, "Govt_job": 1, "Never_worked": 2, "Private": 3, "Self-employed": 4},
+            "smoking_status": {"never smoked": 0, "Unknown": 1, "formerly smoked": 2, "smokes": 3},
+        }
+        for col, mapping in cat_encodings.items():
+            if col in df.columns and df[col].dtype == object:
+                df[col] = df[col].map(mapping).fillna(0).astype(int)
+        if "Residence_type" in df.columns:
+            df = df.rename(columns={"Residence_type": "residence_type"})
+        if "residence_type" in df.columns and df["residence_type"].dtype == object:
+            df["residence_type"] = (df["residence_type"] == "Urban").astype(int)
+        df["bmi"] = pd.to_numeric(df["bmi"], errors="coerce")
+        df["stroke"] = pd.to_numeric(df["stroke"], errors="coerce")
+        df = df.dropna(subset=["stroke"])
+        if len(df) < 100:
+            raise DatasetUnavailableError(
+                "cardiology_stroke",
+                f"Dataset too small ({len(df)} rows). "
+                "This dataset has no formal open license and cannot be bundled. "
+                "It will be downloaded at runtime for educational use only.",
+            )
+        return df
+    def _mental_health(self) -> pd.DataFrame:
+        """
+        Load and return the bundled dataset for the `mental_health` specialty. Used
+        internally by `DataService._load_specialty_dataset`.
+        """
+        for candidate in ("depression_data.csv", "mental_health_depression.csv"):
+            csv_cache = _CACHE_DIR / candidate
+            if csv_cache.exists():
+                try:
+                    df = pd.read_csv(csv_cache)
+                    df = df.drop(columns=[c for c in ["Name", "name"] if c in df.columns])
+                    ordinal_maps = {
+                        "Dietary Habits": {"Healthy": 2, "Moderate": 1, "Unhealthy": 0},
+                        "Sleep Patterns": {"Good": 2, "Fair": 1, "Poor": 0},
+                        "Alcohol Consumption": {"Low": 0, "Moderate": 1, "High": 2},
+                        "Physical Activity Level": {"Active": 2, "Moderate": 1, "Sedentary": 0},
+                        "Smoking Status": {"Non-smoker": 0, "Former": 1, "Current": 2},
+                        "Employment Status": {"Employed": 1, "Unemployed": 0},
+                    }
+                    for col, mapping in ordinal_maps.items():
+                        if col in df.columns:
+                            df[col] = df[col].map(mapping).fillna(1).astype(int)
+                    yes_no_cols = [
+                        "History of Substance Abuse", "Family History of Depression",
+                        "Chronic Medical Conditions",
+                    ]
+                    for col in yes_no_cols:
+                        if col in df.columns and df[col].dtype == object:
+                            df[col] = (df[col].str.lower() == "yes").astype(int)
+                    if "History of Mental Illness" in df.columns:
+                        df["severity_class"] = df["History of Mental Illness"].map(
+                            {"Yes": "has_condition", "No": "no_condition"}
+                        )
+                        df = df.drop(columns=["History of Mental Illness"])
+                    elif "Depression" in df.columns:
+                        df["severity_class"] = df["Depression"].map({1: "has_condition", 0: "no_condition"})
+                        df = df.drop(columns=["Depression"])
+                    col_rename = {
+                        "Age": "age",
+                        "Number of Children": "number_of_children",
+                        "Income": "income",
+                        "Dietary Habits": "dietary_habits",
+                        "Sleep Patterns": "sleep_patterns",
+                        "Alcohol Consumption": "alcohol_consumption",
+                        "Physical Activity Level": "physical_activity_level",
+                        "Smoking Status": "smoking_status",
+                        "Employment Status": "employment_status",
+                        "History of Substance Abuse": "history_substance_abuse",
+                        "Family History of Depression": "family_history_depression",
+                        "Chronic Medical Conditions": "chronic_medical_conditions",
+                        "Marital Status": "marital_status",
+                        "Education Level": "education_level",
+                    }
+                    df = df.rename(columns={k: v for k, v in col_rename.items() if k in df.columns})
+                    for col in df.columns:
+                        if col != "severity_class" and df[col].dtype == object:
+                            df[col] = pd.Categorical(df[col]).codes
+                    df = df.dropna(subset=["severity_class"])
+                    if len(df) >= 100 and "severity_class" in df.columns:
+                        if len(df) > 5000:
+                            from sklearn.model_selection import train_test_split as _tts
+                            _, df = _tts(
+                                df, test_size=5000, random_state=42,
+                                stratify=df["severity_class"] if df["severity_class"].nunique() > 1 else None,
+                            )
+                            df = df.reset_index(drop=True)
+                        logger.info("Loaded real mental health dataset (%d rows) from %s", len(df), candidate)
+                        return df
+                except Exception as exc:
+                    logger.warning("Mental health CSV load failed (%s): %s", candidate, exc)
+        raise DatasetUnavailableError(
+            "mental_health",
+            "Real mental health dataset not found in data_cache/. "
+            "Download from kaggle.com/datasets/anthonytherrien/depression-dataset "
+            "and save as depression_data.csv in data_cache/",
+        )
+    def _copd(self) -> pd.DataFrame:
+        """
+        Load and return the bundled dataset for the `copd` specialty. Used internally by
+        `DataService._load_specialty_dataset`.
+        """
+        csv_cache = _CACHE_DIR / "pulmonology_copd.csv"
+        if not csv_cache.exists():
+            raise DatasetUnavailableError(
+                "pulmonology_copd",
+                f"Real COPD dataset not found at {csv_cache}. "
+                "Download from kaggle.com/datasets/prakharrathi25/copd-student-dataset "
+                "or physionet.org/content/copd-ehr/1.0.0/ "
+                "and save as pulmonology_copd.csv in data_cache/",
+            )
+        df = pd.read_csv(csv_cache)
+        col_rename = {
+            "AGE": "age", "Age": "age",
+            "SEX": "sex", "Sex": "sex", "GENDER": "sex", "Gender": "sex",
+            "SMOKING_PACK_YEARS": "smoking_pack_years", "PackYears": "smoking_pack_years",
+            "FEV1": "fev1_litres", "FEV1_LITRES": "fev1_litres",
+            "FVC": "fvc_litres", "FVC_LITRES": "fvc_litres",
+            "FEV1_FVC": "fev1_fvc_ratio", "FEV1FVC": "fev1_fvc_ratio",
+            "PRIOR_EXAC": "prior_exacerbations_year", "ExacerbationRate": "prior_exacerbations_year",
+            "BMI": "bmi",
+            "MRC": "mrc_dyspnea_scale", "MRCScore": "mrc_dyspnea_scale",
+            "SGRQ": "sgrq_score", "SGRQTotal": "sgrq_score",
+            "GOLD_STAGE": "copd_gold_stage", "GOLDStage": "copd_gold_stage",
+            "EXACERBATION": "exacerbation", "Exacerbation": "exacerbation",
+            "EXAC": "exacerbation",
+        }
+        df = df.rename(columns={k: v for k, v in col_rename.items() if k in df.columns})
+        if "sex" in df.columns and df["sex"].dtype == object:
+            df["sex"] = (df["sex"].str.lower().isin(["m", "male", "1"])).astype(int)
+        for col in df.columns:
+            if col != "exacerbation":
+                df[col] = pd.to_numeric(df[col], errors="coerce")
+        if "exacerbation" in df.columns and df["exacerbation"].dtype == object:
+            df["exacerbation"] = pd.to_numeric(df["exacerbation"], errors="coerce")
+        df = df.dropna(subset=["exacerbation"])
+        keep = [
+            "age", "sex", "smoking_pack_years", "fev1_litres", "fvc_litres",
+            "fev1_fvc_ratio", "prior_exacerbations_year", "bmi",
+            "mrc_dyspnea_scale", "sgrq_score", "copd_gold_stage", "exacerbation",
+        ]
+        available = [c for c in keep if c in df.columns]
+        df = df[available]
+        if len(df) < 100 or "exacerbation" not in df.columns:
+            raise DatasetUnavailableError("pulmonology_copd", f"Dataset too small or missing target ({len(df)} rows)")
+        logger.info("Loaded real COPD dataset (%d rows)", len(df))
+        return df
+    def _anaemia(self) -> pd.DataFrame:
+        """
+        Load and return the bundled dataset for the `anaemia` specialty. Used internally
+        by `DataService._load_specialty_dataset`.
+        """
+        try:
+            df = self._fetch_cached(
+                "haematology_anaemia",
+                "https://raw.githubusercontent.com/maladeep/anemia-detection-with-machine-learning/master/anemia%20data%20from%20Kaggle.csv",
+            )
+        except DatasetUnavailableError:
+            raise DatasetUnavailableError(
+                "haematology_anaemia",
+                "This dataset has an unknown license and cannot be bundled. "
+                "It must be downloaded at runtime for educational use only, "
+                "but the download failed. Check your network connection.",
+            )
+        rename_map = {
+            "Gender": "gender", "Hemoglobin": "haemoglobin",
+            "MCH": "mch", "MCHC": "mchc", "MCV": "mcv",
+            "Result": "anemia_type",
+        }
+        df = df.rename(columns={k: v for k, v in rename_map.items() if k in df.columns})
+        # Gender is already encoded as 0/1 in the source CSV; coerce to numeric
+        # to handle any edge-case whitespace or string variants.
+        for col in df.columns:
+            df[col] = pd.to_numeric(df[col], errors="coerce")
+        if "anemia_type" not in df.columns:
+            raise DatasetUnavailableError(
+                "haematology_anaemia",
+                "Missing required column 'anemia_type'. "
+                "This dataset has an unknown license and cannot be bundled. "
+                "It will be downloaded at runtime for educational use only.",
+            )
+        df = df.dropna(subset=["anemia_type"])
+        return df
+    def _dermatology(self) -> pd.DataFrame:
+        """
+        Load and return the bundled dataset for the `dermatology` specialty. Used
+        internally by `DataService._load_specialty_dataset`.
+        """
+        csv_cache = _CACHE_DIR / "dermatology.csv"
+        df = None
+        if csv_cache.exists():
+            try:
+                df = pd.read_csv(csv_cache)
+            except Exception:
+                pass
+        if df is None or "dx" not in (df.columns if df is not None else []):
+            df = self._fetch_cached(
+                "dermatology_tsv",
+                "https://dataverse.harvard.edu/api/access/datafile/4338392",
+                read_kwargs={"sep": "\t", "quotechar": '"'},
+            )
+        if "dx" not in df.columns:
+            raise DatasetUnavailableError("dermatology", "Missing required column 'dx'")
+        malignant = {"mel", "bcc", "akiec"}
+        df["dx_type"] = df["dx"].apply(
+            lambda x: "malignant" if str(x).strip() in malignant else "benign"
+        )
+        if "sex" in df.columns and df["sex"].dtype == object:
+            df["sex"] = (df["sex"] == "male").astype(int)
+        if "localization" in df.columns and df["localization"].dtype == object:
+            locs = df["localization"].unique()
+            loc_map = {v: i for i, v in enumerate(sorted(locs))}
+            df["localization"] = df["localization"].map(loc_map).fillna(0).astype(int)
+        df["age"] = pd.to_numeric(df["age"], errors="coerce")
+        keep = ["age", "sex", "localization", "dx_type"]
+        df = df[[c for c in keep if c in df.columns]].dropna(subset=["dx_type"])
+        if len(df) < 100:
+            raise DatasetUnavailableError("dermatology", f"Dataset too small ({len(df)} rows)")
+        return df
+    def _ophthalmology(self) -> pd.DataFrame:
+        """
+        Load and return the bundled dataset for the `ophthalmology` specialty. Used
+        internally by `DataService._load_specialty_dataset`.
+        """
+        _CACHE_DIR.mkdir(parents=True, exist_ok=True)
+        arff_cache = _CACHE_DIR / "ophthalmology.arff"
+        if not arff_cache.exists():
+            try:
+                resp = requests.get(
+                    "https://archive.ics.uci.edu/static/public/329/diabetic+retinopathy+debrecen+data+set.zip",
+                    timeout=30, headers={"User-Agent": "HealthWithSevgi/1.0"},
+                )
+                resp.raise_for_status()
+                with zipfile.ZipFile(io.BytesIO(resp.content)) as zf:
+                    arff_names = [n for n in zf.namelist() if n.endswith(".arff")]
+                    if arff_names:
+                        arff_cache.write_bytes(zf.read(arff_names[0]))
+                        logger.info("Extracted Debrecen DR ARFF (%d bytes)", arff_cache.stat().st_size)
+            except Exception as exc:
+                raise DatasetUnavailableError(
+                    "ophthalmology", f"Failed to download Debrecen DR ARFF: {exc}"
+                ) from exc
+        if not arff_cache.exists():
+            raise DatasetUnavailableError("ophthalmology", f"ARFF file not found: {arff_cache}")
+        from scipy.io import arff as scipy_arff
+        data, meta = scipy_arff.loadarff(str(arff_cache))
+        df = pd.DataFrame(data)
+        for col in df.columns:
+            if df[col].dtype == object:
+                df[col] = df[col].str.decode("utf-8").str.strip()
+        for col in df.columns:
+            df[col] = pd.to_numeric(df[col], errors="coerce")
+        cols = list(df.columns)
+        feature_cols = cols[:-1]
+        target_col = cols[-1]
+        df = df.rename(columns={target_col: "severity_grade"})
+        df["severity_grade"] = df["severity_grade"].astype(int)
+        named_features = [
+            "quality_assessment", "pre_screening", "ma_detection_0.5",
+            "ma_detection_0.6", "ma_detection_0.7", "ma_detection_0.8",
+            "ma_detection_0.9", "ma_detection_1.0",
+            "exudate_1", "exudate_2", "exudate_3", "exudate_4",
+            "exudate_5", "exudate_6", "exudate_7", "exudate_8",
+            "macula_od_distance", "optic_disc_diameter", "am_fm_classification",
+        ]
+        if len(feature_cols) == len(named_features):
+            rename_map = {old: new for old, new in zip(feature_cols, named_features)}
+            df = df.rename(columns=rename_map)
+        df = df.dropna(subset=["severity_grade"])
+        if len(df) < 100:
+            raise DatasetUnavailableError("ophthalmology", f"Dataset too small ({len(df)} rows)")
+        return df
+    def _orthopaedics(self) -> pd.DataFrame:
+        """
+        Load and return the bundled dataset for the `orthopaedics` specialty. Used
+        internally by `DataService._load_specialty_dataset`.
+        """
+        _CACHE_DIR.mkdir(parents=True, exist_ok=True)
+        arff_cache = _CACHE_DIR / "orthopaedics.arff"
+        if not arff_cache.exists():
+            try:
+                resp = requests.get(
+                    "https://archive.ics.uci.edu/static/public/212/vertebral+column.zip",
+                    timeout=30, headers={"User-Agent": "HealthWithSevgi/1.0"},
+                )
+                resp.raise_for_status()
+                with zipfile.ZipFile(io.BytesIO(resp.content)) as zf:
+                    arff_names = [n for n in zf.namelist() if n.endswith("_weka.arff")]
+                    if arff_names:
+                        arff_cache.write_bytes(zf.read(arff_names[0]))
+                        logger.info("Extracted vertebral column ARFF (%d bytes)", arff_cache.stat().st_size)
+            except Exception as exc:
+                raise DatasetUnavailableError(
+                    "orthopaedics", f"Failed to download vertebral column ARFF: {exc}"
+                ) from exc
+        if not arff_cache.exists():
+            raise DatasetUnavailableError("orthopaedics", f"ARFF file not found: {arff_cache}")
+        from scipy.io import arff as scipy_arff
+        data, meta = scipy_arff.loadarff(str(arff_cache))
+        df = pd.DataFrame(data)
+        for col in df.columns:
+            if df[col].dtype == object:
+                df[col] = df[col].str.decode("utf-8")
+        col_names = [
+            "pelvic_incidence", "pelvic_tilt", "lumbar_lordosis_angle",
+            "sacral_slope", "pelvic_radius", "degree_spondylolisthesis", "class",
+        ]
+        if len(df.columns) == len(col_names):
+            df.columns = col_names
+        if "class" not in df.columns:
+            raise DatasetUnavailableError("orthopaedics", "Missing required column 'class'")
+        return df
+    def _sepsis(self) -> pd.DataFrame:
+        """
+        Load and return the bundled dataset for the `sepsis` specialty. Used internally by
+        `DataService._load_specialty_dataset`.
+        """
+        csv_cache = _CACHE_DIR / "icu_sepsis.csv"
+        if not csv_cache.exists():
+            raise DatasetUnavailableError(
+                "icu_sepsis",
+                f"Real ICU/Sepsis dataset not found at {csv_cache}. "
+                "Download from physionet.org/content/challenge-2019/1.0.0/, "
+                "merge PSV files into one CSV, and save as icu_sepsis.csv in data_cache/",
+            )
+        df = pd.read_csv(csv_cache)
+        if len(df.columns) == 1:
+            df = pd.read_csv(csv_cache, sep="|")
+        keep = [
+            "HR", "O2Sat", "Temp", "SBP", "MAP", "Resp",
+            "BaseExcess", "pH", "PaCO2", "Lactate", "Creatinine",
+            "Bilirubin_total", "WBC", "Platelets", "Age", "Gender", "SepsisLabel",
+        ]
+        available = [c for c in keep if c in df.columns]
+        df = df[available].dropna(subset=["SepsisLabel"])
+        df["SepsisLabel"] = pd.to_numeric(df["SepsisLabel"], errors="coerce").astype("Int64")
+        df = df.dropna(subset=["SepsisLabel"])
+        if len(df) < 100 or "SepsisLabel" not in df.columns:
+            raise DatasetUnavailableError("icu_sepsis", f"Dataset too small ({len(df)} rows)")
+        if len(df) > 5000:
+            # Stratified cap: guarantee all positive (sepsis=1) cases are retained,
+            # then fill the remaining budget with negatives. A random cap at 5000 rows
+            # would yield only ~100-250 positives at 2-5% prevalence, making the
+            # imbalance effectively 20-50:1. This preserves every real sepsis case.
+            sep_pos = df[df["SepsisLabel"] == 1]
+            sep_neg = df[df["SepsisLabel"] == 0]
+            n_neg = max(0, 5000 - len(sep_pos))
+            if len(sep_neg) > n_neg:
+                sep_neg = sep_neg.sample(n_neg, random_state=42)
+            df = pd.concat([sep_pos, sep_neg]).sample(frac=1, random_state=42).reset_index(drop=True)
+        logger.info("Loaded real ICU sepsis dataset (%d rows, %d positive)", len(df), int((df["SepsisLabel"] == 1).sum()))
+        return df
+    def _fetal_health(self) -> pd.DataFrame:
+        """
+        Load and return the bundled dataset for the `fetal_health` specialty. Used
+        internally by `DataService._load_specialty_dataset`.
+        """
+        _CACHE_DIR.mkdir(parents=True, exist_ok=True)
+        csv_cache = _CACHE_DIR / "obstetrics_fetal.csv"
+        if not csv_cache.exists():
+            raise DatasetUnavailableError("obstetrics_fetal", f"Cache file not found: {csv_cache}")
+        df = pd.read_csv(csv_cache)
+        if len(df.columns) <= 2:
+            df = pd.read_csv(csv_cache, sep=";")
+        col_map = {
+            "LB": "baseline_value", "AC": "accelerations", "FM": "fetal_movement",
+            "UC": "uterine_contractions", "DL": "light_decelerations",
+            "DS": "severe_decelerations", "DP": "prolongued_decelerations",
+            "ASTV": "abnormal_short_term_variability",
+            "MSTV": "mean_value_short_term_variability",
+            "ALTV": "pct_time_abnormal_long_term_variability",
+            "MLTV": "mean_value_long_term_variability",
+            "Mode": "histogram_mode",
+            "NSP": "fetal_health",
+        }
+        df = df.rename(columns={k: v for k, v in col_map.items() if k in df.columns})
+        if "fetal_health" not in df.columns:
+            raise DatasetUnavailableError("obstetrics_fetal", "Missing required column 'fetal_health'")
+        df["fetal_health"] = pd.to_numeric(df["fetal_health"], errors="coerce")
+        df = df.dropna(subset=["fetal_health"])
+        df["fetal_health"] = df["fetal_health"].astype(int)
+        keep = [v for v in col_map.values() if v in df.columns]
+        df = df[keep].dropna(subset=["fetal_health"])
+        if len(df) < 100:
+            raise DatasetUnavailableError("obstetrics_fetal", f"Dataset too small ({len(df)} rows)")
+        return df
+    def _arrhythmia(self) -> pd.DataFrame:
+        """
+        Load and return the bundled dataset for the `arrhythmia` specialty. Used
+        internally by `DataService._load_specialty_dataset`.
+        """
+        all_cols = [f"feature_{i}" for i in range(279)] + ["arrhythmia_class"]
+        df = self._fetch_cached(
+            "cardiology_arrhythmia",
+            "https://archive.ics.uci.edu/ml/machine-learning-databases/arrhythmia/arrhythmia.data",
+            read_kwargs={"header": None, "names": all_cols, "na_values": "?"},
+        )
+        if "arrhythmia_class" not in df.columns:
+            raise DatasetUnavailableError("cardiology_arrhythmia", "Missing required column 'arrhythmia_class'")
+        df["arrhythmia"] = df["arrhythmia_class"].apply(lambda x: 0 if x == 1 else 1)
+        # Name the first 15 global ECG features; the remaining 264 columns are
+        # per-lead amplitude measurements (R, S, T, P amplitudes across 12 leads)
+        # that carry the primary diagnostic signal for arrhythmia classification.
+        # Previously only the 13 global interval features were kept, discarding all
+        # per-lead amplitude data. All columns are kept here — Random Forest selects
+        # the most discriminative ones via feature importance at each split.
+        global_names = [
+            "age", "sex", "height", "weight", "QRS_duration",
+            "PR_interval", "QT_interval", "T_interval", "P_interval",
+            "QRS_axis", "T_axis", "P_axis", "heart_rate", "J_point", "heart_rate_2",
+        ]
+        rename_map = {f"feature_{i}": name for i, name in enumerate(global_names)}
+        df = df.rename(columns=rename_map)
+        df = df.drop(columns=["arrhythmia_class"])
+        df = df.dropna(subset=["arrhythmia"])
+        for col in df.columns:
+            df[col] = pd.to_numeric(df[col], errors="coerce")
+        if len(df) < 100:
+            raise DatasetUnavailableError("cardiology_arrhythmia", f"Dataset too small ({len(df)} rows)")
+        return df
+    def _cervical(self) -> pd.DataFrame:
+        """
+        Load and return the bundled dataset for the `cervical` specialty. Used internally
+        by `DataService._load_specialty_dataset`.
+        """
+        df = self._fetch_cached(
+            "oncology_cervical",
+            "https://archive.ics.uci.edu/ml/machine-learning-databases/00383/risk_factors_cervical_cancer.csv",
+        )
+        if "Biopsy" not in df.columns:
+            raise DatasetUnavailableError("oncology_cervical", "Missing required column 'Biopsy'")
+        df = df.replace("?", np.nan)
+        # Feature set split into two tiers:
+        # Tier 1 — clinical test results (near-zero missingness, direct diagnostic signal):
+        #   Hinselmann (colposcopy), Schiller (iodine test), Citology (pap smear),
+        #   Dx:Cancer / Dx:CIN / Dx:HPV / Dx (diagnosis history flags).
+        # Tier 2 — behavioural risk factors (higher missingness, weak indirect signal):
+        #   age, sexual history, smoking, contraceptives, STDs.
+        # Using only Tier 2 produces near-random predictions (MCC ≈ 0) because
+        # these epidemiological risk factors cannot reliably predict individual biopsy
+        # outcomes. Adding Tier 1 gives the model the actual clinical evidence a
+        # clinician would use to decide whether to proceed with biopsy.
+        keep_cols = [
+            "Age", "Number of sexual partners", "First sexual intercourse",
+            "Num of pregnancies",
+            "Smokes", "Smokes (years)",
+            "Hormonal Contraceptives", "Hormonal Contraceptives (years)",
+            "IUD", "IUD (years)",
+            "STDs", "STDs (number)", "STDs:condylomatosis",
+            "STDs:cervical condylomatosis", "STDs:HPV",
+            "Dx:Cancer", "Dx:CIN", "Dx:HPV", "Dx",
+            "Hinselmann", "Schiller", "Citology",
+            "Biopsy",
+        ]
+        available = [c for c in keep_cols if c in df.columns]
+        df = df[available].copy()
+        rename_map = {
+            "Age": "age",
+            "Number of sexual partners": "number_of_sexual_partners",
+            "First sexual intercourse": "first_sexual_intercourse_age",
+            "Num of pregnancies": "num_of_pregnancies",
+            "Smokes": "smokes",
+            "Smokes (years)": "smokes_years",
+            "Hormonal Contraceptives": "hormonal_contraceptives",
+            "Hormonal Contraceptives (years)": "hormonal_contraceptives_years",
+            "IUD": "iud",
+            "IUD (years)": "iud_years",
+            "STDs": "stds",
+            "STDs (number)": "stds_number",
+            "STDs:condylomatosis": "stds_condylomatosis",
+            "STDs:cervical condylomatosis": "stds_cervical_condylomatosis",
+            "STDs:HPV": "stds_hpv",
+            "Dx:Cancer": "dx_cancer",
+            "Dx:CIN": "dx_cin",
+            "Dx:HPV": "dx_hpv",
+            "Dx": "dx",
+            "Hinselmann": "hinselmann",
+            "Schiller": "schiller",
+            "Citology": "citology",
+        }
+        df = df.rename(columns=rename_map)
+        for col in df.columns:
+            df[col] = pd.to_numeric(df[col], errors="coerce")
+        df = df.dropna(subset=["Biopsy"])
+        return df
+    def _thyroid(self) -> pd.DataFrame:
+        """
+        Load and return the bundled dataset for the `thyroid` specialty. Used internally
+        by `DataService._load_specialty_dataset`.
+        """
+        col_names = ["class_raw", "T3_resin_uptake", "total_serum_thyroxine", "T3", "TSH", "max_abs_diff_TSH"]
+        df = self._fetch_cached(
+            "thyroid",
+            "https://archive.ics.uci.edu/ml/machine-learning-databases/thyroid-disease/new-thyroid.data",
+            read_kwargs={"header": None, "names": col_names, "sep": ","},
+        )
+        if "class_raw" not in df.columns:
+            raise DatasetUnavailableError("thyroid", "Missing required column 'class_raw'")
+        class_map = {1: "hyperthyroid", 2: "normal", 3: "hypothyroid"}
+        df["class"] = df["class_raw"].map(class_map)
+        df = df.drop(columns=["class_raw"])
+        df = df.dropna(subset=["class"])
+        if len(df) < 100:
+            raise DatasetUnavailableError("thyroid", f"Dataset too small ({len(df)} rows)")
+        return df
+    def _readmission(self) -> pd.DataFrame:
+        """
+        Load and return the bundled dataset for the `readmission` specialty. Used
+        internally by `DataService._load_specialty_dataset`.
+        """
+        _CACHE_DIR.mkdir(parents=True, exist_ok=True)
+        csv_cache = _CACHE_DIR / "pharmacy_readmission.csv"
+        if not csv_cache.exists():
+            try:
+                resp = requests.get(
+                    "https://archive.ics.uci.edu/ml/machine-learning-databases/00296/dataset_diabetes.zip",
+                    timeout=60, headers={"User-Agent": "HealthWithSevgi/1.0"},
+                )
+                resp.raise_for_status()
+                with zipfile.ZipFile(io.BytesIO(resp.content)) as zf:
+                    csv_names = [n for n in zf.namelist() if "diabetic_data" in n and n.endswith(".csv")]
+                    if not csv_names:
+                        csv_names = [n for n in zf.namelist() if n.endswith(".csv")]
+                    if csv_names:
+                        raw = pd.read_csv(zf.open(csv_names[0]), low_memory=False)
+                        keep_cols = [
+                            "age", "gender", "time_in_hospital", "num_lab_procedures",
+                            "num_procedures", "num_medications", "number_outpatient",
+                            "number_emergency", "number_inpatient", "number_diagnoses",
+                            "max_glu_serum", "A1Cresult", "metformin", "insulin",
+                            "change",
+                            # High-signal clinical context features missing from v1:
+                            # discharge destination is the strongest readmission predictor;
+                            # admission type and source capture acuity and referral pathway;
+                            # primary diagnosis category captures disease burden.
+                            "discharge_disposition_id", "admission_type_id",
+                            "admission_source_id", "diag_1",
+                            "readmitted",
+                        ]
+                        available = [c for c in keep_cols if c in raw.columns]
+                        raw = raw[available].copy()
+                        if "age" in raw.columns and raw["age"].dtype == object:
+                            age_map = {
+                                "[0-10)": 0, "[10-20)": 1, "[20-30)": 2, "[30-40)": 3,
+                                "[40-50)": 4, "[50-60)": 5, "[60-70)": 6, "[70-80)": 7,
+                                "[80-90)": 8, "[90-100)": 9,
+                            }
+                            raw["age"] = raw["age"].map(age_map).fillna(5).astype(int)
+                        if "gender" in raw.columns and raw["gender"].dtype == object:
+                            raw["gender"] = (raw["gender"] == "Male").astype(int)
+                        med_map = {"No": 0, "Steady": 1, "Up": 2, "Down": 3}
+                        for col in ["metformin", "insulin", "change"]:
+                            if col in raw.columns and raw[col].dtype == object:
+                                raw[col] = raw[col].map(med_map).fillna(0).astype(int)
+                        for col in ["max_glu_serum", "A1Cresult"]:
+                            if col in raw.columns and raw[col].dtype == object:
+                                glu_map = {"None": 0, "Norm": 1, ">200": 2, ">300": 3, ">7": 1, ">8": 2}
+                                raw[col] = raw[col].map(glu_map).fillna(0).astype(int)
+                        # Map diag_1 (ICD-9 codes) to major disease categories.
+                        # Raw ICD-9 strings have no ordinal meaning; bucketing into
+                        # 9 clinical groups gives the model learnable signal.
+                        if "diag_1" in raw.columns:
+                            def _icd9_category(code: str) -> int:
+                                """
+                                Load and return the bundled dataset for the
+                                `icd9_category` specialty. Used internally by
+                                `DataService._load_specialty_dataset`.
+                                """
+                                c = str(code).strip().upper().replace(".", "")
+                                if c.startswith("V") or c.startswith("E"):
+                                    return 0
+                                try:
+                                    n = float(c)
+                                except ValueError:
+                                    return 0
+                                if n < 140: return 1       # Infectious
+                                if n < 240: return 2       # Neoplasms
+                                if n < 280: return 3       # Endocrine/Diabetes
+                                if n < 290: return 4       # Blood
+                                if n < 390: return 5       # Mental
+                                if n < 460: return 6       # Circulatory
+                                if n < 520: return 7       # Respiratory
+                                if n < 580: return 8       # Digestive
+                                return 9                   # Other
+                            raw["diag_1"] = raw["diag_1"].apply(_icd9_category)
+                        raw = raw.dropna(subset=["readmitted"])
+                        if len(raw) > 5000:
+                            # Stratified cap: guarantee proportional representation of
+                            # each readmission class. <30 days is ~11% of the full
+                            # dataset; a random 5000-row sample would give only ~550
+                            # rows for that class. Stratified sampling preserves ratio.
+                            from sklearn.model_selection import train_test_split as _tts
+                            _, raw = _tts(
+                                raw, test_size=5000, random_state=42,
+                                stratify=raw["readmitted"] if raw["readmitted"].nunique() > 1 else None,
+                            )
+                            raw = raw.reset_index(drop=True)
+                        raw.to_csv(csv_cache, index=False)
+                        logger.info("Cached readmission dataset (%d rows)", len(raw))
+            except Exception as exc:
+                raise DatasetUnavailableError(
+                    "pharmacy_readmission", f"Failed to download/parse readmission ZIP: {exc}"
+                ) from exc
+        if not csv_cache.exists():
+            raise DatasetUnavailableError("pharmacy_readmission", f"Cache file not found: {csv_cache}")
+        df = pd.read_csv(csv_cache)
+        if "readmitted" not in df.columns or len(df) < 100:
+            raise DatasetUnavailableError("pharmacy_readmission", "Invalid or too small dataset")
+        return df
+    def _pneumonia(self) -> pd.DataFrame:
+        """
+        Load and return the bundled dataset for the `pneumonia` specialty. Used internally
+        by `DataService._load_specialty_dataset`.
+        """
+        df = self._fetch_cached(
+            "radiology_pneumonia",
+            "https://raw.githubusercontent.com/gregwchase/nih-chest-xray/master/data/Data_Entry_2017.csv",
+        )
+        if "Finding Labels" not in df.columns:
+            raise DatasetUnavailableError("radiology_pneumonia", "Missing required column 'Finding Labels'")
+        df = df[df["Finding Labels"].isin(["Pneumonia", "No Finding"])].copy()
+        df = df.rename(columns={
+            "Patient Age": "age",
+            "Patient Gender": "sex",
+            "View Position": "view_position",
+            "Follow-up #": "follow_up_number",
+            "Finding Labels": "Finding_Label",
+        })
+        if "sex" in df.columns and df["sex"].dtype == object:
+            df["sex"] = (df["sex"] == "M").astype(int)
+        if "view_position" in df.columns and df["view_position"].dtype == object:
+            df["view_position"] = (df["view_position"] == "PA").astype(int)
+        keep = ["age", "sex", "view_position", "follow_up_number", "Finding_Label"]
+        df = df[[c for c in keep if c in df.columns]].dropna(subset=["Finding_Label"])
+        df["age"] = df["age"].astype(str).str.replace(r"[^0-9]", "", regex=True)
+        df["age"] = pd.to_numeric(df["age"], errors="coerce")
+        df = df.dropna(subset=["age"])
+        if len(df) < 100:
+            raise DatasetUnavailableError("radiology_pneumonia", f"Dataset too small ({len(df)} rows)")
+        return df

app/services/ethics_service.py ADDED Viewed

	@@ -0,0 +1,500 @@

+"""Ethics, fairness, and bias analysis service."""
+from __future__ import annotations
+import logging
+from typing import Any
+import numpy as np
+from sklearn.metrics import (
+    accuracy_score,
+    confusion_matrix,
+    f1_score,
+    precision_score,
+    recall_score,
+)
+from app.models.explain_schemas import (
+    BiasWarning,
+    EthicsResponse,
+    RepresentationWarning,
+    SubgroupMetrics,
+)
+logger = logging.getLogger(__name__)
+EU_AI_ACT_ITEMS = [
+    {
+        "id": "explainability",
+        "text": "Model Explainability",
+        "description": "Model outputs include explanations so clinicians can understand why a prediction was made. Completed automatically via SHAP analysis in Step 6.",
+        "article": "Art. 13",
+        "pre_checked": True,
+    },
+    {
+        "id": "data_source",
+        "text": "Data Transparency",
+        "description": "Training data source, size, and feature set are documented and reviewable. Completed automatically — dataset details shown in Step 2.",
+        "article": "Art. 10",
+        "pre_checked": True,
+    },
+    {
+        "id": "bias_audit",
+        "text": "Subgroup Bias Audit",
+        "description": "Model performance has been evaluated across demographic subgroups (gender, age) to identify disparities in accuracy or sensitivity.",
+        "article": "Art. 10(2f)",
+        "pre_checked": False,
+    },
+    {
+        "id": "human_oversight",
+        "text": "Human Oversight Plan",
+        "description": "A qualified clinician will review all AI-generated predictions before any clinical action is taken. The AI serves as a decision-support tool, not a replacement.",
+        "article": "Art. 14",
+        "pre_checked": False,
+    },
+    {
+        "id": "gdpr",
+        "text": "Patient Data Privacy (GDPR)",
+        "description": "Patient data is processed locally within this session. No personal health data is transmitted to external servers or stored permanently.",
+        "article": "Art. 10(5)",
+        "pre_checked": False,
+    },
+    {
+        "id": "monitoring",
+        "text": "Post-Deployment Monitoring",
+        "description": "A plan exists to continuously monitor model performance (accuracy drift, data distribution shift) after deployment and retrain when metrics degrade.",
+        "article": "Art. 72",
+        "pre_checked": False,
+    },
+    {
+        "id": "incident_reporting",
+        "text": "Incident Reporting Pathway",
+        "description": "A clear process is defined for reporting AI-related adverse events, including who to notify, escalation steps, and documentation requirements.",
+        "article": "Art. 73",
+        "pre_checked": False,
+    },
+    {
+        "id": "clinical_validation",
+        "text": "Clinical Validation",
+        "description": "The model has been validated on an independent clinical dataset by domain experts before any real-world patient-facing use.",
+        "article": "Art. 9",
+        "pre_checked": False,
+    },
+]
+CASE_STUDIES = [
+    {
+        "id": "pulse_ox",
+        "title": "Pulse Oximeter Bias in COVID-19 Patients",
+        "specialty": "Critical Care",
+        "year": 2020,
+        "what_happened": (
+            "Pulse oximeters overestimated oxygen saturation in patients with darker skin tones, "
+            "masking hypoxaemia. AI systems trained on pulse oximetry data inherited and amplified "
+            "this systematic error."
+        ),
+        "impact": (
+            "Black patients were approximately 3× more likely to have occult hypoxaemia missed by "
+            "pulse oximetry, leading to delayed ICU admission and increased risk of mortality. "
+            "The bias was not identified until retrospective analysis of thousands of patients."
+        ),
+        "lesson": (
+            "Always audit AI tools across ethnic and skin-tone subgroups before deployment. "
+            "Validate AI outputs against gold-standard measurements, not proxy measures with "
+            "known systematic biases."
+        ),
+        "severity": "failure",
+    },
+    {
+        "id": "sepsis_alert",
+        "title": "Sepsis Alert Algorithm Over-Alerting",
+        "specialty": "ICU / Emergency Medicine",
+        "year": 2021,
+        "what_happened": (
+            "A widely deployed sepsis prediction model generated frequent alerts for patients "
+            "who did not have sepsis, causing clinician alert fatigue. Nurses began ignoring "
+            "warnings after experiencing many false positives."
+        ),
+        "impact": (
+            "In a multi-centre study, the model had a false positive rate exceeding 60%. "
+            "Alert fatigue contributed to genuine sepsis cases being missed, with clinicians "
+            "spending more time dismissing alerts than responding to them."
+        ),
+        "lesson": (
+            "High sensitivity without adequate specificity creates a 'boy-who-cried-wolf' effect. "
+            "Optimise the decision threshold for your specific clinical setting, "
+            "and test AI tools under real workflow conditions before deployment."
+        ),
+        "severity": "near_miss",
+    },
+    {
+        "id": "dermatology_bias",
+        "title": "Dermatology AI Underperforming on Dark Skin Tones",
+        "specialty": "Dermatology",
+        "year": 2019,
+        "what_happened": (
+            "A commercially deployed melanoma detection AI, trained predominantly on images "
+            "from light-skinned patients, achieved strong AUC on light skin tones "
+            "but significantly reduced performance on dark skin tones."
+        ),
+        "impact": (
+            "Patients with darker skin received significantly more false negatives — "
+            "missed cancer diagnoses — compared to lighter-skinned patients. "
+            "This disparity was not apparent from the published overall AUC figure."
+        ),
+        "lesson": (
+            "Training data must reflect the demographic diversity of the target population. "
+            "Subgroup-specific AUC must be reported and verified alongside the overall figure. "
+            "Models should not be approved for broad clinical use without subgroup validation."
+        ),
+        "severity": "prevention",
+    },
+]
+BIAS_SENSITIVITY_GAP_THRESHOLD = 0.10
+# Population norms for representation gap detection (percentages).
+POPULATION_NORMS: dict[str, dict[str, float]] = {
+    "sex": {"Male": 50.0, "Female": 50.0},
+    "age_group": {"18-60": 55.0, "61-75": 30.0, "76+": 15.0},
+}
+# Threshold in percentage points for flagging representation gaps.
+REPRESENTATION_GAP_THRESHOLD_PP = 15.0
+class EthicsService:
+    """Runs the fairness audit — subgroup metric computation, bias detection, checklist state."""
+    def __init__(self) -> None:
+        """Create the in-memory checklist store."""
+        self._checklist_store: dict[str, dict[str, bool]] = {}
+    def analyze_bias(
+        self,
+        model_id: str,
+        model: Any,
+        X_test: np.ndarray,
+        y_test: np.ndarray,
+        feature_names: list[str],
+        classes: list[str],
+        X_train: np.ndarray,
+        scaler: Any = None,
+    ) -> EthicsResponse:
+        """Main entrypoint — slice predictions by each sensitive attribute and emit metrics + warnings."""
+        is_binary = len(classes) == 2
+        y_pred = model.predict(X_test)
+        overall_sensitivity = float(
+            recall_score(y_test, y_pred, average="binary" if is_binary else "macro", zero_division=0)
+        )
+        # --- Find demographic columns ---
+        sex_col = None
+        for candidate in ("sex", "gender", "Gender", "Sex"):
+            if candidate in feature_names:
+                sex_col = feature_names.index(candidate)
+                break
+        age_col = None
+        for candidate in ("age", "Age"):
+            if candidate in feature_names:
+                age_col = feature_names.index(candidate)
+                break
+        demographics_available = sex_col is not None or age_col is not None
+        demographics_note = ""
+        subgroup_metrics: list[SubgroupMetrics] = []
+        if not demographics_available:
+            demographics_note = (
+                "Subgroup bias analysis was not performed because this dataset does not contain "
+                "demographic variables (sex/gender or age). Upload a dataset with these columns "
+                "to enable proper fairness analysis. Results shown below reflect model-level "
+                "aggregate performance only."
+            )
+        else:
+            n_test = len(X_test)
+            # Gender subgroups
+            if sex_col is not None:
+                gender_labels = (X_test[:, sex_col] > 0.5).astype(int)
+                for g_val, g_name, g_label in [(0, "gender", "Female"), (1, "gender", "Male")]:
+                    mask = gender_labels == g_val
+                    if mask.sum() < 5:
+                        continue
+                    sm = self._compute_subgroup_metrics(
+                        y_test[mask], y_pred[mask], g_name, g_label,
+                        int(mask.sum()), overall_sensitivity, is_binary,
+                    )
+                    subgroup_metrics.append(sm)
+            # Age subgroups
+            if age_col is not None:
+                raw_ages = X_test[:, age_col].copy()
+                if scaler is not None:
+                    try:
+                        # Use scaler statistics directly — avoids zeroing other columns
+                        if hasattr(scaler, "mean_") and scaler.mean_ is not None:
+                            # StandardScaler: x_orig = x_scaled * std + mean
+                            raw_ages = raw_ages * scaler.scale_[age_col] + scaler.mean_[age_col]
+                        elif hasattr(scaler, "data_min_") and scaler.data_min_ is not None:
+                            # MinMaxScaler: x_orig = x_scaled * (max - min) + min
+                            raw_ages = (
+                                raw_ages * (scaler.data_max_[age_col] - scaler.data_min_[age_col])
+                                + scaler.data_min_[age_col]
+                            )
+                    except Exception as exc:
+                        logger.warning("Age inverse-transform failed: %s — using scaled values for grouping", exc)
+                age_groups = np.digitize(raw_ages, bins=[60, 75])
+                age_group_defs = [(0, "age_group", "18–60"), (1, "age_group", "61–75"), (2, "age_group", "76+")]
+                for g_val, g_name, g_label in age_group_defs:
+                    mask = age_groups == g_val
+                    if mask.sum() < 5:
+                        continue
+                    sm = self._compute_subgroup_metrics(
+                        y_test[mask], y_pred[mask], g_name, g_label,
+                        int(mask.sum()), overall_sensitivity, is_binary,
+                    )
+                    subgroup_metrics.append(sm)
+        # Bias warnings (only when real subgroups exist)
+        bias_warnings = self._detect_bias(subgroup_metrics, overall_sensitivity) if subgroup_metrics else []
+        # Training representation
+        rng = np.random.default_rng(42)
+        training_representation, representation_warnings = self._training_representation(
+            X_train, feature_names, rng, scaler=scaler,
+        )
+        # Checklist state
+        items = [dict(item) for item in EU_AI_ACT_ITEMS]
+        stored = self._checklist_store.get(model_id, {})
+        for item in items:
+            if not item["pre_checked"]:
+                item["checked"] = stored.get(item["id"], False)
+            else:
+                item["checked"] = True
+        return EthicsResponse(
+            model_id=model_id,
+            subgroup_metrics=subgroup_metrics,
+            bias_warnings=bias_warnings,
+            training_representation=training_representation,
+            representation_warnings=representation_warnings,
+            overall_sensitivity=round(overall_sensitivity, 4),
+            eu_ai_act_items=items,
+            case_studies=CASE_STUDIES,
+            demographics_available=demographics_available,
+            demographics_note=demographics_note,
+        )
+    def _compute_subgroup_metrics(
+        self,
+        y_true: np.ndarray,
+        y_pred: np.ndarray,
+        group_name: str,
+        group_label: str,
+        sample_size: int,
+        overall_sensitivity: float,
+        is_binary: bool,
+    ) -> SubgroupMetrics:
+        """Compute accuracy/sensitivity/specificity/PPV/NPV for a single subgroup slice."""
+        avg = "binary" if is_binary else "macro"
+        acc = float(accuracy_score(y_true, y_pred))
+        sens = float(recall_score(y_true, y_pred, average=avg, zero_division=0))
+        prec = float(precision_score(y_true, y_pred, average=avg, zero_division=0))
+        f1 = float(f1_score(y_true, y_pred, average=avg, zero_division=0))
+        cm = confusion_matrix(y_true, y_pred)
+        spec = self._macro_specificity(cm)
+        gap = overall_sensitivity - sens
+        reasons: list[str] = []
+        if sens < 0.5:
+            reasons.append(f"Sensitivity ({sens*100:.1f}%) is below the 50% clinical minimum")
+        if gap > 0.2:
+            reasons.append(f"Sensitivity gap ({gap*100:.1f}pp) exceeds the 20pp action threshold vs. overall ({overall_sensitivity*100:.1f}%)")
+        if reasons:
+            status = "action_needed"
+        else:
+            if gap > BIAS_SENSITIVITY_GAP_THRESHOLD:
+                reasons.append(f"Sensitivity gap ({gap*100:.1f}pp) exceeds the 10pp review threshold vs. overall ({overall_sensitivity*100:.1f}%)")
+            low_metric = min(acc, sens, spec, prec, f1)
+            if low_metric < 0.65:
+                metric_name = ["Accuracy", "Sensitivity", "Specificity", "Precision", "F1"][
+                    [acc, sens, spec, prec, f1].index(low_metric)
+                ]
+                reasons.append(f"{metric_name} ({low_metric*100:.1f}%) is below the 65% quality threshold")
+            if reasons:
+                status = "review"
+            else:
+                status = "acceptable"
+                reasons.append("All metrics meet clinical thresholds")
+        return SubgroupMetrics(
+            group_name=group_name,
+            group_label=group_label,
+            sample_size=sample_size,
+            accuracy=round(acc, 4),
+            sensitivity=round(sens, 4),
+            specificity=round(spec, 4),
+            precision=round(prec, 4),
+            f1_score=round(f1, 4),
+            status=status,
+            status_reason="; ".join(reasons),
+        )
+    def _macro_specificity(self, cm: np.ndarray) -> float:
+        """Macro-averaged specificity across the multiclass case."""
+        specs = []
+        for i in range(len(cm)):
+            tp = cm[i, i]
+            fn = cm[i, :].sum() - tp
+            fp = cm[:, i].sum() - tp
+            tn = cm.sum() - tp - fn - fp
+            denom = tn + fp
+            specs.append(tn / denom if denom > 0 else 0.0)
+        return float(np.mean(specs)) if specs else 0.0
+    def _detect_bias(
+        self,
+        subgroup_metrics: list[SubgroupMetrics],
+        overall_sensitivity: float,
+    ) -> list[BiasWarning]:
+        """Compare each subgroup metric to the overall value, emit a `BiasWarning` on large deltas."""
+        warnings: list[BiasWarning] = []
+        for sm in subgroup_metrics:
+            gap = overall_sensitivity - sm.sensitivity
+            if sm.sensitivity < overall_sensitivity - BIAS_SENSITIVITY_GAP_THRESHOLD:
+                overall_pct = round(overall_sensitivity * 100, 1)
+                group_pct = round(sm.sensitivity * 100, 1)
+                gap_pp = round(gap * 100, 1)
+                warnings.append(BiasWarning(
+                    detected=True,
+                    message=(
+                        f"Bias Detected: Sensitivity for {sm.group_label} patients "
+                        f"({group_pct}%) is {gap_pp} percentage points lower than the "
+                        f"overall sensitivity ({overall_pct}%). "
+                        f"This model should NOT be deployed until this gap is addressed."
+                    ),
+                    affected_group=sm.group_label,
+                    metric="sensitivity",
+                    gap=round(gap, 4),
+                ))
+        return warnings
+    def _training_representation(
+        self,
+        X_train: np.ndarray,
+        feature_names: list[str],
+        rng: np.random.Generator,
+        scaler: Any = None,
+    ) -> tuple[dict, list[RepresentationWarning]]:
+        """Compute training-data demographic breakdown and flag >15pp gaps."""
+        warnings: list[RepresentationWarning] = []
+        # --- Sex / gender ---
+        sex_col = None
+        for c in ("sex", "gender"):
+            if c in feature_names:
+                sex_col = feature_names.index(c)
+                break
+        if sex_col is not None:
+            female_pct = float(np.mean(X_train[:, sex_col] < 0.5) * 100)
+        else:
+            female_pct = float(rng.uniform(40, 60))
+        male_pct = 100 - female_pct
+        sex_dataset = {"Male": round(male_pct, 1), "Female": round(female_pct, 1)}
+        sex_norms = POPULATION_NORMS["sex"]
+        for group_label, dataset_pct in sex_dataset.items():
+            norm_pct = sex_norms.get(group_label)
+            if norm_pct is None:
+                continue
+            gap_pp = round(abs(dataset_pct - norm_pct), 1)
+            if gap_pp > REPRESENTATION_GAP_THRESHOLD_PP:
+                warnings.append(RepresentationWarning(
+                    group=group_label,
+                    attribute="sex",
+                    dataset_pct=dataset_pct,
+                    population_pct=norm_pct,
+                    gap_pp=gap_pp,
+                    message=(
+                        f"{group_label} representation ({dataset_pct}%) deviates from "
+                        f"population norm ({norm_pct}%) by {gap_pp}pp"
+                    ),
+                ))
+        # --- Age groups ---
+        age_col = None
+        for c in ("age", "Age"):
+            if c in feature_names:
+                age_col = feature_names.index(c)
+                break
+        if age_col is not None:
+            raw_ages = X_train[:, age_col].copy()
+            if scaler is not None:
+                try:
+                    if hasattr(scaler, "mean_") and scaler.mean_ is not None:
+                        raw_ages = raw_ages * scaler.scale_[age_col] + scaler.mean_[age_col]
+                    elif hasattr(scaler, "data_min_") and scaler.data_min_ is not None:
+                        raw_ages = (
+                            raw_ages * (scaler.data_max_[age_col] - scaler.data_min_[age_col])
+                            + scaler.data_min_[age_col]
+                        )
+                except Exception as exc:
+                    logger.warning(
+                        "Age inverse-transform failed in representation: %s — using scaled values",
+                        exc,
+                    )
+            age_groups = np.digitize(raw_ages, bins=[60, 75])
+            n_train = len(X_train)
+            age_dataset = {
+                "18-60": round(float(np.sum(age_groups == 0)) / n_train * 100, 1),
+                "61-75": round(float(np.sum(age_groups == 1)) / n_train * 100, 1),
+                "76+": round(float(np.sum(age_groups == 2)) / n_train * 100, 1),
+            }
+        else:
+            age_dataset = {"18-60": 55.0, "61-75": 30.0, "76+": 15.0}
+        age_norms = POPULATION_NORMS["age_group"]
+        for group_label, dataset_pct in age_dataset.items():
+            norm_pct = age_norms.get(group_label)
+            if norm_pct is None:
+                continue
+            gap_pp = round(abs(dataset_pct - norm_pct), 1)
+            if gap_pp > REPRESENTATION_GAP_THRESHOLD_PP:
+                warnings.append(RepresentationWarning(
+                    group=group_label,
+                    attribute="age_group",
+                    dataset_pct=dataset_pct,
+                    population_pct=norm_pct,
+                    gap_pp=gap_pp,
+                    message=(
+                        f"{group_label} representation ({dataset_pct}%) deviates from "
+                        f"population norm ({norm_pct}%) by {gap_pp}pp"
+                    ),
+                ))
+        representation = {
+            "gender": {
+                "dataset": sex_dataset,
+                "population_norm": sex_norms,
+            },
+            "age_group": {
+                "dataset": age_dataset,
+                "population_norm": age_norms,
+            },
+        }
+        return representation, warnings
+    def update_checklist(self, model_id: str, item_id: str, checked: bool) -> dict:
+        """Step-7 endpoint — toggles a single EU AI Act checklist item for the session."""
+        if model_id not in self._checklist_store:
+            self._checklist_store[model_id] = {}
+        self._checklist_store[model_id][item_id] = checked
+        return self._checklist_store[model_id]

app/services/explain_service.py ADDED Viewed

	@@ -0,0 +1,665 @@

+"""SHAP-based explainability service."""
+from __future__ import annotations
+import logging
+from typing import Any
+import numpy as np
+from app.models.explain_schemas import (
+    FeatureImportanceItem,
+    GlobalExplainabilityResponse,
+    SamplePatient,
+    SamplePatientsResponse,
+    SHAPWaterfallPoint,
+    SinglePatientExplainResponse,
+    WhatIfResponse,
+)
+logger = logging.getLogger(__name__)
+CLINICAL_NAME_MAP: dict[str, str] = {
+    # Demographics
+    "age": "Patient Age (years)",
+    "sex": "Patient Sex",
+    "gender": "Patient Gender",
+    "height": "Patient Height (cm)",
+    "weight": "Patient Weight (kg)",
+    "bmi": "Body Mass Index (kg/m²)",
+    # Cardiology / HF
+    "ejection_fraction": "Left Ventricular Ejection Fraction (%)",
+    "serum_creatinine": "Serum Creatinine (mg/dL)",
+    "serum_sodium": "Serum Sodium (mEq/L)",
+    "creatinine_phosphokinase": "Creatine Phosphokinase (mcg/L)",
+    "platelets": "Platelet Count (kiloplatelets/mL)",
+    "anaemia": "Anaemia Present",
+    "high_blood_pressure": "Hypertension Diagnosis",
+    "smoking": "Smoking Status",
+    "diabetes": "Diabetes History",
+    "time": "Follow-up Period (days)",
+    "DEATH_EVENT": "Death Event",
+    # Diabetes
+    "glucose": "Fasting Glucose (mg/dL)",
+    "blood_pressure": "Diastolic Blood Pressure (mmHg)",
+    "skin_thickness": "Triceps Skin Fold Thickness (mm)",
+    "insulin": "Serum Insulin (mu U/mL)",
+    "diabetes_pedigree_function": "Diabetes Pedigree Function",
+    "pregnancies": "Number of Pregnancies",
+    # Breast cancer
+    "mean_radius": "Mean Tumour Radius (mm)",
+    "mean_texture": "Mean Texture Score",
+    "mean_perimeter": "Mean Tumour Perimeter (mm)",
+    "mean_area": "Mean Tumour Area (mm²)",
+    "mean_smoothness": "Mean Surface Smoothness",
+    "mean_compactness": "Mean Compactness",
+    "mean_concavity": "Mean Concavity",
+    "mean_concave_points": "Mean Concave Points",
+    "mean_symmetry": "Mean Symmetry",
+    "worst_radius": "Worst Tumour Radius (mm)",
+    "worst_texture": "Worst Texture Score",
+    "worst_perimeter": "Worst Tumour Perimeter (mm)",
+    "worst_area": "Worst Tumour Area (mm²)",
+    "worst_smoothness": "Worst Surface Smoothness",
+    # Parkinson's
+    "MDVP_Fo_Hz": "Avg Vocal Fundamental Frequency (Hz)",
+    "MDVP_Fhi_Hz": "Max Vocal Fundamental Frequency (Hz)",
+    "MDVP_Flo_Hz": "Min Vocal Fundamental Frequency (Hz)",
+    "MDVP_Jitter_pct": "Vocal Jitter (%)",
+    "MDVP_Jitter_Abs": "Absolute Vocal Jitter",
+    "MDVP_RAP": "Relative Average Perturbation",
+    "MDVP_PPQ": "Five-Point Period Perturbation Quotient",
+    "Jitter_DDP": "Average Absolute Difference of Differences (Jitter)",
+    "MDVP_Shimmer": "Vocal Shimmer",
+    "MDVP_Shimmer_dB": "Vocal Shimmer (dB)",
+    "Shimmer_APQ3": "Three-Point Amplitude Perturbation Quotient",
+    "Shimmer_APQ5": "Five-Point Amplitude Perturbation Quotient",
+    "MDVP_APQ": "MDVP Amplitude Perturbation Quotient",
+    "Shimmer_DDA": "Average Absolute Differences of Consecutive Shimmer",
+    "NHR": "Noise-to-Harmonics Ratio",
+    "HNR": "Harmonics-to-Noise Ratio",
+    "RPDE": "Recurrence Period Density Entropy",
+    "DFA": "Detrended Fluctuation Analysis",
+    "spread1": "Nonlinear Frequency Variation (spread1)",
+    "spread2": "Nonlinear Frequency Variation (spread2)",
+    "D2": "D2 Nonlinear Dynamical Complexity",
+    "PPE": "Pitch Period Entropy",
+    # Liver
+    "total_bilirubin": "Total Bilirubin (mg/dL)",
+    "direct_bilirubin": "Direct Bilirubin (mg/dL)",
+    "alkaline_phosphotase": "Alkaline Phosphatase (U/L)",
+    "alamine_aminotransferase": "Alanine Aminotransferase / ALT (U/L)",
+    "aspartate_aminotransferase": "Aspartate Aminotransferase / AST (U/L)",
+    "total_proteins": "Total Proteins (g/dL)",
+    "albumin": "Serum Albumin (g/dL)",
+    "albumin_globulin_ratio": "Albumin/Globulin Ratio",
+    # Stroke
+    "hypertension": "Hypertension",
+    "heart_disease": "Heart Disease History",
+    "avg_glucose_level": "Average Glucose Level (mg/dL)",
+    "smoking_status": "Smoking Status",
+    "work_type": "Work Type",
+    "residence_type": "Residence Type",
+    "ever_married": "Ever Married",
+    # CKD
+    "blood_pressure": "Blood Pressure (mmHg)",
+    "specific_gravity": "Urine Specific Gravity",
+    "albumin": "Urine Albumin",
+    "sugar": "Urine Sugar",
+    "red_blood_cells": "Red Blood Cells in Urine",
+    "pus_cell": "Pus Cells in Urine",
+    "blood_glucose_random": "Random Blood Glucose (mg/dL)",
+    "blood_urea": "Blood Urea (mg/dL)",
+    "sodium": "Serum Sodium (mEq/L)",
+    "haemoglobin": "Haemoglobin (g/dL)",
+    "hypertension": "Hypertension",
+    "diabetes_mellitus": "Diabetes Mellitus",
+    # Sepsis
+    "HR": "Heart Rate (bpm)",
+    "O2Sat": "Oxygen Saturation (%)",
+    "Temp": "Body Temperature (°C)",
+    "SBP": "Systolic Blood Pressure (mmHg)",
+    "MAP": "Mean Arterial Pressure (mmHg)",
+    "Resp": "Respiratory Rate (breaths/min)",
+    "pH": "Arterial Blood pH",
+    "Lactate": "Blood Lactate (mmol/L)",
+    "Creatinine": "Serum Creatinine (mg/dL)",
+    "WBC": "White Blood Cell Count (×10³/μL)",
+    "Platelets": "Platelet Count (×10³/μL)",
+    "Bilirubin_total": "Total Bilirubin (mg/dL)",
+    # Orthopaedics
+    "pelvic_incidence": "Pelvic Incidence (°)",
+    "pelvic_tilt": "Pelvic Tilt (°)",
+    "lumbar_lordosis_angle": "Lumbar Lordosis Angle (°)",
+    "sacral_slope": "Sacral Slope (°)",
+    "pelvic_radius": "Pelvic Radius (mm)",
+    "degree_spondylolisthesis": "Degree of Spondylolisthesis (mm)",
+    # Fetal health
+    "baseline_value": "Fetal Heart Rate Baseline (bpm)",
+    "accelerations": "Accelerations (per second)",
+    "fetal_movement": "Fetal Movements (per second)",
+    "uterine_contractions": "Uterine Contractions (per second)",
+    "severe_decelerations": "Severe Decelerations (per second)",
+    "prolongued_decelerations": "Prolonged Decelerations (per second)",
+    "abnormal_short_term_variability": "Abnormal Short-Term Variability (%)",
+    # Thyroid
+    "TSH": "Thyroid Stimulating Hormone (mIU/L)",
+    "T3": "Serum Triiodothyronine / T3 (ng/dL)",
+    "TT4": "Total Thyroxine / T4 (μg/dL)",
+    "T4U": "Thyroxine Utilisation Rate",
+    "FTI": "Free Thyroxine Index",
+    "T3_resin_uptake": "T3 Resin Uptake (%)",
+    "total_serum_thyroxine": "Total Serum Thyroxine (μg/dL)",
+    "max_abs_diff_TSH": "Max Absolute Difference in TSH",
+    # Anaemia / haematology
+    "mch": "Mean Corpuscular Haemoglobin (pg)",
+    "mchc": "Mean Corpuscular Haemoglobin Concentration (g/dL)",
+    "mcv": "Mean Corpuscular Volume (fL)",
+    "rdw": "Red Cell Distribution Width (%)",
+    "wbc": "White Blood Cell Count (×10³/μL)",
+    "neutrophils": "Neutrophil Count (×10³/μL)",
+    "lymphocytes": "Lymphocyte Count (×10³/μL)",
+    # COPD / pulmonology
+    "smoking_pack_years": "Smoking Pack-Years",
+    "fev1_litres": "FEV1 — Forced Expiratory Volume in 1s (L)",
+    "fvc_litres": "FVC — Forced Vital Capacity (L)",
+    "fev1_fvc_ratio": "FEV1/FVC Ratio",
+    "prior_exacerbations_year": "Prior COPD Exacerbations (per year)",
+    "mrc_dyspnea_scale": "MRC Dyspnea Scale Score",
+    "sgrq_score": "SGRQ Quality-of-Life Score",
+    "copd_gold_stage": "COPD GOLD Stage",
+    # Arrhythmia / ECG
+    "QRS_duration": "QRS Duration (ms)",
+    "PR_interval": "PR Interval (ms)",
+    "QT_interval": "QT Interval (ms)",
+    "T_interval": "T Wave Interval (ms)",
+    "P_interval": "P Wave Interval (ms)",
+    "QRS_axis": "QRS Axis (°)",
+    "T_axis": "T Wave Axis (°)",
+    "P_axis": "P Wave Axis (°)",
+    "heart_rate": "Heart Rate (bpm)",
+    # Radiology
+    "view_position": "X-Ray View Position",
+    "follow_up_number": "Follow-up Visit Number",
+    "Finding_Label": "Radiological Finding",
+    # Fetal health / CTG
+    "light_decelerations": "Light Decelerations (per second)",
+    "mean_value_short_term_variability": "Mean Short-Term Variability (ms)",
+    "pct_time_abnormal_long_term_variability": "% Time with Abnormal Long-Term Variability",
+    "mean_value_long_term_variability": "Mean Long-Term Variability (ms)",
+    "histogram_mode": "CTG Histogram Mode",
+    # Ophthalmology / diabetic retinopathy
+    "quality_assessment": "Image Quality Assessment",
+    "pre_screening": "Pre-Screening Result",
+    "ma_detection_0.5": "Microaneurysm Detection (threshold 0.5)",
+    "ma_detection_0.6": "Microaneurysm Detection (threshold 0.6)",
+    "ma_detection_0.7": "Microaneurysm Detection (threshold 0.7)",
+    "ma_detection_0.8": "Microaneurysm Detection (threshold 0.8)",
+    "ma_detection_0.9": "Microaneurysm Detection (threshold 0.9)",
+    "ma_detection_1.0": "Microaneurysm Detection (threshold 1.0)",
+    "exudate_1": "Exudate Feature 1",
+    "exudate_2": "Exudate Feature 2",
+    "exudate_3": "Exudate Feature 3",
+    "exudate_4": "Exudate Feature 4",
+    "exudate_5": "Exudate Feature 5",
+    "exudate_6": "Exudate Feature 6",
+    "exudate_7": "Exudate Feature 7",
+    "exudate_8": "Exudate Feature 8",
+    "macula_od_distance": "Macula to Optic Disc Distance",
+    "optic_disc_diameter": "Optic Disc Diameter",
+    "am_fm_classification": "AM-FM Classification",
+    # Dermatology
+    "localization": "Lesion Localization",
+    # Cervical cancer
+    "number_of_sexual_partners": "Number of Sexual Partners",
+    "first_sexual_intercourse_age": "Age at First Sexual Intercourse",
+    "num_of_pregnancies": "Number of Pregnancies",
+    "smokes_years": "Years of Smoking",
+    "hormonal_contraceptives_years": "Years Using Hormonal Contraceptives",
+    "iud_years": "Years Using IUD",
+    "stds_number": "Number of STDs Diagnosed",
+    "stds_condylomatosis": "STDs: Condylomatosis",
+    "stds_cervical_condylomatosis": "STDs: Cervical Condylomatosis",
+    "stds_hpv": "STDs: HPV",
+    # Pharmacy / readmission
+    "time_in_hospital": "Hospital Length of Stay (days)",
+    "num_lab_procedures": "Number of Lab Procedures",
+    "num_procedures": "Number of Procedures",
+    "num_medications": "Number of Medications",
+    "number_outpatient": "Number of Outpatient Visits",
+    "number_emergency": "Number of Emergency Visits",
+    "number_inpatient": "Number of Inpatient Visits",
+    "number_diagnoses": "Number of Diagnoses",
+    "max_glu_serum": "Max Glucose Serum Level",
+    "A1Cresult": "HbA1c Test Result",
+    "metformin": "Metformin Dosage",
+    "change": "Change in Medication",
+    # Sepsis / ICU
+    "BaseExcess": "Base Excess (mEq/L)",
+    "PaCO2": "Partial Pressure of CO2 (mmHg)",
+    "Age": "Patient Age (years)",
+    "Gender": "Patient Gender",
+    # Mental health
+    "number_of_children": "Number of Children",
+    "income": "Annual Income",
+    "dietary_habits": "Dietary Habits Score",
+    "sleep_patterns": "Sleep Quality Score",
+    "alcohol_consumption": "Alcohol Consumption Level",
+    "physical_activity_level": "Physical Activity Level",
+    "employment_status": "Employment Status",
+    "history_substance_abuse": "History of Substance Abuse",
+    "family_history_depression": "Family History of Depression",
+    "chronic_medical_conditions": "Chronic Medical Conditions",
+    "marital_status": "Marital Status",
+    "education_level": "Education Level",
+}
+TOP_FEATURE_NOTES: dict[str, str] = {
+    "ejection_fraction": "Ejection fraction is a well-established predictor of heart failure outcomes — values below 35% indicate severely reduced cardiac function.",
+    "serum_creatinine": "Elevated serum creatinine reflects impaired renal clearance, which commonly co-occurs with and worsens heart failure prognosis.",
+    "glucose": "Fasting glucose is the primary biochemical marker of diabetes risk and insulin resistance.",
+    "bmi": "BMI is a validated surrogate for adiposity and a major modifiable risk factor for type 2 diabetes.",
+    "mean_radius": "Tumour radius is closely correlated with malignancy — larger tumours are associated with more aggressive histology.",
+    "worst_area": "Worst-case tumour area captures the most severe regional cellular abnormality within the biopsy sample.",
+    "TSH": "TSH is the most sensitive marker of thyroid dysfunction — a raised TSH indicates hypothyroidism, while a suppressed TSH indicates hyperthyroidism.",
+    "Lactate": "Elevated lactate is a hallmark of cellular hypoperfusion and is a key diagnostic criterion for septic shock.",
+    "HR": "Heart rate elevation is an early physiological response to infection and correlates with sepsis severity.",
+    "pelvic_incidence": "Pelvic incidence is a morphological parameter that determines lumbar lordosis compensation and is key to spinal biomechanics.",
+    "degree_spondylolisthesis": "Degree of spondylolisthesis directly quantifies vertebral slip and is the primary determinant of clinical severity.",
+    "MDVP_Jitter_pct": "Jitter measures cycle-to-cycle variation in vocal fundamental frequency — pathological values indicate Parkinson's-related vocal instability.",
+    "HNR": "A reduced harmonics-to-noise ratio reflects increased vocal noise and turbulence characteristic of neurological voice disorders.",
+}
+def _clinical_name(feature: str) -> str:
+    """Map a raw feature id to its clinician-readable label, fallback to the id."""
+    return CLINICAL_NAME_MAP.get(feature, feature.replace("_", " ").title())
+def _plain_language(feature: str, value: float, pctile: float) -> str:
+    """Generate the plain-language summary sentence that sits above the SHAP waterfall."""
+    cname = _clinical_name(feature)
+    if pctile < 0.25:
+        level = "very low"
+    elif pctile < 0.45:
+        level = "below normal"
+    elif pctile < 0.55:
+        level = "normal"
+    elif pctile < 0.75:
+        level = "above normal"
+    else:
+        level = "elevated"
+    return f"{cname} {level} ({value:.2f})"
+class ExplainService:
+    """SHAP-based explainability — global importance + per-patient waterfall + what-if probes."""
+    def _get_explainer(self, model: Any, X_train: np.ndarray, model_type: str) -> Any:
+        """Build (and cache) the appropriate SHAP explainer (TreeExplainer for tree models, KernelExplainer otherwise)."""
+        mt = model_type.lower()
+        try:
+            import shap
+            # Tree-based models (including XGBoost and LightGBM)
+            if mt in ("random_forest", "decision_tree", "xgboost", "lightgbm"):
+                return shap.TreeExplainer(model), "shap_tree"
+            if mt == "logistic_regression":
+                return shap.LinearExplainer(model, X_train), "shap_linear"
+            # KNN, SVM, NaiveBayes → KernelExplainer with reduced background for speed
+            bg = shap.sample(X_train, min(50, len(X_train)))  # Reduced from 100 to 50
+            try:
+                explainer = shap.Explainer(model.predict_proba, bg, algorithm="auto")
+                return explainer, "shap_kernel"
+            except Exception:
+                return shap.KernelExplainer(model.predict_proba, bg), "shap_kernel"
+        except Exception as exc:
+            logger.warning("SHAP explainer creation failed: %s — using permutation", exc)
+            return None, "permutation"
+    def _shap_values_binary(
+        self, explainer: Any, method: str, X: np.ndarray, model: Any
+    ) -> np.ndarray:
+        """Return 2-D SHAP array (n_samples, n_features) for the positive class."""
+        import shap
+        try:
+            sv = explainer.shap_values(X)
+            if isinstance(sv, list) and len(sv) == 2:
+                return np.array(sv[1])
+            if isinstance(sv, np.ndarray):
+                if sv.ndim == 3:
+                    return sv[:, :, 1]
+                return sv
+            return np.array(sv)
+        except Exception as exc:
+            logger.warning("SHAP value computation failed: %s — fallback", exc)
+            return self._permutation_importance(model, X)
+    def _permutation_importance(self, model: Any, X: np.ndarray) -> np.ndarray:
+        """Rough fallback: feature std × coefficient magnitude."""
+        try:
+            if hasattr(model, "coef_"):
+                coef = np.abs(model.coef_[0] if model.coef_.ndim > 1 else model.coef_)
+                return np.outer(np.ones(len(X)), coef)
+            if hasattr(model, "feature_importances_"):
+                fi = model.feature_importances_
+                return np.outer(np.ones(len(X)), fi)
+        except Exception:
+            pass
+        return np.zeros((len(X), X.shape[1]))
+    def global_importance(
+        self,
+        model_id: str,
+        model: Any,
+        X_test: np.ndarray,
+        y_test: np.ndarray,
+        feature_names: list[str],
+        X_train: np.ndarray,
+        model_type: str,
+        classes: list[str],
+    ) -> GlobalExplainabilityResponse:
+        """Step-6 endpoint — computes global SHAP feature importance for the active model."""
+        explainer, method = self._get_explainer(model, X_train, model_type)
+        if explainer is not None:
+            sv = self._shap_values_binary(explainer, method, X_test[:200], model)
+        else:
+            sv = self._permutation_importance(model, X_test[:200])
+            method = "permutation"
+        mean_abs = np.mean(np.abs(sv), axis=0)
+        mean_signed = np.mean(sv, axis=0)
+        total = mean_abs.sum() if mean_abs.sum() > 0 else 1.0
+        indices = np.argsort(mean_abs)[::-1]
+        items: list[FeatureImportanceItem] = []
+        cumulative = 0.0
+        top5_cumulative = 0.0
+        for rank, idx in enumerate(indices):
+            name = feature_names[idx] if idx < len(feature_names) else f"feature_{idx}"
+            imp = float(mean_abs[idx])
+            cumulative += imp / total
+            if rank < 5:
+                top5_cumulative = cumulative
+            direction: str
+            if mean_signed[idx] > 0.01:
+                direction = "positive"
+            elif mean_signed[idx] < -0.01:
+                direction = "negative"
+            else:
+                direction = "neutral"
+            note = TOP_FEATURE_NOTES.get(name, f"{_clinical_name(name)} influences the model's predictions.")
+            items.append(FeatureImportanceItem(
+                feature_name=name,
+                clinical_name=_clinical_name(name),
+                importance=round(imp, 6),
+                direction=direction,
+                clinical_note=note,
+            ))
+        top_name = items[0].feature_name if items else ""
+        top_note = TOP_FEATURE_NOTES.get(
+            top_name,
+            f"{_clinical_name(top_name)} is the most influential variable in this model's decisions.",
+        )
+        return GlobalExplainabilityResponse(
+            model_id=model_id,
+            method=method,
+            feature_importances=items,
+            top_feature_clinical_note=top_note,
+            explained_variance_pct=round(top5_cumulative * 100, 1),
+        )
+    def single_patient(
+        self,
+        model_id: str,
+        model: Any,
+        patient_idx: int,
+        X_test: np.ndarray,
+        feature_names: list[str],
+        X_train: np.ndarray,
+        model_type: str,
+        classes: list[str],
+        y_test: np.ndarray,
+        scaler: Any = None,
+    ) -> SinglePatientExplainResponse:
+        """Compute the SHAP waterfall for a single patient row."""
+        explainer, method = self._get_explainer(model, X_train, model_type)
+        x_patient = X_test[patient_idx : patient_idx + 1]
+        # Inverse-transform to get clinical (unscaled) values for display
+        if scaler is not None:
+            try:
+                x_patient_raw = scaler.inverse_transform(x_patient)[0]
+            except Exception as exc:
+                logger.warning("Inverse-transform failed in single_patient: %s — using scaled values", exc)
+                x_patient_raw = x_patient[0]
+        else:
+            x_patient_raw = x_patient[0]
+        if explainer is not None:
+            sv = self._shap_values_binary(explainer, method, x_patient, model)
+        else:
+            sv = self._permutation_importance(model, x_patient)
+        shap_vals = sv[0] if sv.ndim > 1 else sv
+        # Base value
+        base_value = 0.5
+        try:
+            if hasattr(explainer, "expected_value"):
+                ev = explainer.expected_value
+                base_value = float(ev[1] if isinstance(ev, (list, np.ndarray)) else ev)
+        except Exception:
+            pass
+        # Predicted probability
+        prob_arr = self._model_predict_proba(model, x_patient)
+        if prob_arr.shape[1] >= 2:
+            pred_class_idx = int(np.argmax(prob_arr[0]))
+            pred_prob = float(prob_arr[0, pred_class_idx])
+        else:
+            pred_class_idx = 0
+            pred_prob = 0.5
+        predicted_class = classes[pred_class_idx] if pred_class_idx < len(classes) else str(pred_class_idx)
+        # Percentile for plain language
+        pctiles = np.mean(X_train < x_patient[0], axis=0)
+        waterfall: list[SHAPWaterfallPoint] = []
+        sorted_idx = np.argsort(np.abs(shap_vals))[::-1]
+        for i in sorted_idx[:15]:
+            fname = feature_names[i] if i < len(feature_names) else f"feature_{i}"
+            sv_val = float(shap_vals[i])
+            fval_raw = float(x_patient_raw[i]) if i < len(x_patient_raw) else float(x_patient[0, i])
+            pct = float(pctiles[i]) if i < len(pctiles) else 0.5
+            waterfall.append(SHAPWaterfallPoint(
+                feature_name=fname,
+                clinical_name=_clinical_name(fname),
+                feature_value=round(fval_raw, 3),
+                shap_value=round(sv_val, 5),
+                direction="increases_risk" if sv_val > 0 else "decreases_risk",
+                plain_language=_plain_language(fname, fval_raw, pct),
+            ))
+        # Clinical summary
+        top3 = waterfall[:3]
+        risk_factors = [w.plain_language for w in top3 if w.direction == "increases_risk"]
+        protect_factors = [w.plain_language for w in top3 if w.direction == "decreases_risk"]
+        summary_parts = [
+            f"This patient was classified as '{predicted_class}' with a probability of {pred_prob:.1%}."
+        ]
+        if risk_factors:
+            summary_parts.append(f"Key risk-increasing factors: {'; '.join(risk_factors)}.")
+        if protect_factors:
+            summary_parts.append(f"Protective factors: {'; '.join(protect_factors)}.")
+        summary_parts.append(
+            "These associations are derived from the training data and do not imply causation."
+        )
+        return SinglePatientExplainResponse(
+            model_id=model_id,
+            patient_index=patient_idx,
+            predicted_class=predicted_class,
+            predicted_probability=round(pred_prob, 4),
+            base_value=round(base_value, 4),
+            waterfall=waterfall,
+            clinical_summary=" ".join(summary_parts),
+        )
+    def _model_predict_proba(self, model: Any, X: np.ndarray) -> np.ndarray:
+        """Proxy for the model's predict_proba that survives SHAP's background-sample workflow."""
+        if hasattr(model, "predict_proba"):
+            return model.predict_proba(X)
+        if hasattr(model, "decision_function"):
+            scores = model.decision_function(X)
+            if scores.ndim == 1:
+                p = 1 / (1 + np.exp(-scores))
+                return np.column_stack([1 - p, p])
+        return np.array([[0.5, 0.5]])
+    # ------------------------------------------------------------------
+    # What-If analysis
+    # ------------------------------------------------------------------
+    def what_if(
+        self,
+        model_id: str,
+        model: Any,
+        patient_index: int,
+        feature_name: str,
+        new_value: float,
+        X_test: np.ndarray,
+        feature_names: list[str],
+        scaler: Any | None,
+    ) -> WhatIfResponse:
+        """Simulate changing a single feature and return the probability shift."""
+        if feature_name not in feature_names:
+            raise ValueError(f"Feature '{feature_name}' not found. Available: {feature_names}")
+        n_test = len(X_test)
+        if patient_index < 0 or patient_index >= n_test:
+            raise IndexError(f"Patient index {patient_index} out of range [0, {n_test - 1}]")
+        feat_idx = feature_names.index(feature_name)
+        # Original row (already scaled if scaler was applied during training)
+        original_row = X_test[patient_index : patient_index + 1].copy()
+        # Get original clinical value by inverse-transforming
+        if scaler is not None:
+            try:
+                original_clinical = scaler.inverse_transform(original_row)[0, feat_idx]
+            except Exception:
+                original_clinical = float(original_row[0, feat_idx])
+        else:
+            original_clinical = float(original_row[0, feat_idx])
+        # Build modified row: start from scaled original, replace the feature
+        modified_row = original_row.copy()
+        if scaler is not None:
+            # new_value is in clinical space; we need to scale only that feature.
+            # Build a full clinical row, replace the feature, then re-scale.
+            try:
+                clinical_row = scaler.inverse_transform(original_row)
+                clinical_row[0, feat_idx] = new_value
+                modified_row = scaler.transform(clinical_row)
+            except Exception:
+                # Fallback: inject raw value directly
+                modified_row[0, feat_idx] = new_value
+        else:
+            modified_row[0, feat_idx] = new_value
+        # Predict probabilities
+        original_probs = self._model_predict_proba(model, original_row)
+        modified_probs = self._model_predict_proba(model, modified_row)
+        # For binary: use class-1 probability; for multiclass: use max probability
+        if original_probs.shape[1] == 2:
+            original_prob = float(original_probs[0, 1])
+            new_prob = float(modified_probs[0, 1])
+        else:
+            original_prob = float(np.max(original_probs[0]))
+            new_prob = float(np.max(modified_probs[0]))
+        shift = new_prob - original_prob
+        if abs(shift) < 1e-6:
+            direction = "no_change"
+        elif shift > 0:
+            direction = "increased_risk"
+        else:
+            direction = "decreased_risk"
+        return WhatIfResponse(
+            feature_name=feature_name,
+            original_value=round(float(original_clinical), 4),
+            new_value=round(new_value, 4),
+            original_prob=round(original_prob, 4),
+            new_prob=round(new_prob, 4),
+            shift=round(shift, 4),
+            direction=direction,
+        )
+    # ------------------------------------------------------------------
+    # Sample patients for dropdown picker
+    # ------------------------------------------------------------------
+    def sample_patients(
+        self,
+        model_id: str,
+        model: Any,
+        X_test: np.ndarray,
+    ) -> SamplePatientsResponse:
+        """Return up to 3 representative patients (low/medium/high risk)."""
+        n = len(X_test)
+        if n == 0:
+            return SamplePatientsResponse(model_id=model_id, patients=[])
+        probs = self._model_predict_proba(model, X_test)
+        # Use class-1 probability for binary; max probability otherwise
+        if probs.shape[1] == 2:
+            scores = probs[:, 1]
+        else:
+            scores = np.max(probs, axis=1)
+        sorted_indices = np.argsort(scores)
+        picks: list[tuple[int, str]] = []
+        # Low risk: lowest probability patient
+        low_idx = int(sorted_indices[0])
+        picks.append((low_idx, "low"))
+        if n >= 2:
+            # High risk: highest probability patient
+            high_idx = int(sorted_indices[-1])
+            picks.append((high_idx, "high"))
+        if n >= 3:
+            # Medium risk: patient closest to 0.5
+            diffs = np.abs(scores - 0.5)
+            med_idx = int(np.argmin(diffs))
+            # Avoid duplicating low or high pick
+            if med_idx in (low_idx, high_idx):
+                # Fall back to the median-ranked patient
+                med_idx = int(sorted_indices[n // 2])
+            picks.append((med_idx, "medium"))
+        patients: list[SamplePatient] = []
+        for idx, level in picks:
+            prob = float(scores[idx])
+            label = level.capitalize()
+            patients.append(SamplePatient(
+                index=idx,
+                risk_level=level,
+                probability=round(prob, 4),
+                summary=f"Patient #{idx} — {label} Risk ({prob:.0%})",
+            ))
+        # Sort by risk level order: low, medium, high
+        order = {"low": 0, "medium": 1, "high": 2}
+        patients.sort(key=lambda p: order[p.risk_level])
+        return SamplePatientsResponse(model_id=model_id, patients=patients)

app/services/insight_service.py ADDED Viewed

	@@ -0,0 +1,607 @@

+"""LLM-powered clinical insight generation.
+Provider chain: MedGemma (Vertex AI) → Gemini (Google AI) → static template fallback.
+"""
+from __future__ import annotations
+import asyncio
+import json
+import logging
+import os
+import random
+from typing import Any
+import httpx
+logger = logging.getLogger(__name__)
+# Timeout per LLM call (seconds). Gemma 4 is a reasoning model that emits
+# chain-of-thought tokens before the answer, so single calls can legitimately
+# take 60–90s on the ethics prompt. 200s leaves a very generous ceiling for
+# the long-tail cases and rare upstream slowness.
+_LLM_TIMEOUT = 200.0
+# Retry transient Gemini failures (timeouts, 429, 5xx). One retry is enough
+# in practice; keeping the count at 1 bounds the worst-case endpoint time
+# within the frontend axios budget (450s).
+_MAX_RETRIES = 1
+_RETRY_BASE_DELAY = 1.5
+# HTTP status codes worth retrying (rate limit + server errors).
+_RETRY_STATUS_CODES = {429, 500, 502, 503, 504}
+def _build_column_stats_block(context: dict) -> str:
+    """Build feature statistics section for prompts."""
+    stats = context.get("column_statistics", [])
+    if not stats:
+        return ""
+    lines = "FEATURE STATISTICS (training set distributions):\n"
+    for cs in stats:
+        if "mean" in cs:
+            lines += f"  {cs['name']}: mean={cs['mean']}, std={cs['std']}, range=[{cs['min']}, {cs['max']}]\n"
+        else:
+            lines += f"  {cs['name']}: (statistics unavailable)\n"
+    return lines + "\n"
+def _build_comparison_block(context: dict) -> str:
+    """Build compared models section for prompts."""
+    models = context.get("compared_models", [])
+    if not models:
+        return ""
+    current = context.get("model_type", "unknown")
+    lines = "MODEL COMPARISON (other models trained on same dataset):\n"
+    for m in models:
+        lines += (
+            f"  - {m['model_type']}: AUC={m['auc_roc']:.3f}, "
+            f"Acc={m['accuracy']:.3f}, Sens={m['sensitivity']:.3f}, "
+            f"F1={m['f1_score']:.3f}, MCC={m['mcc']:.3f}\n"
+        )
+    lines += f"\n  The model being assessed is: {current}.\n"
+    lines += f"  There are {len(models)} models total. Reference ALL of them by name with their key metrics.\n"
+    lines += "  Compare the current model's strengths and weaknesses against each alternative.\n\n"
+    return lines
+def _build_raw_columns_block(context: dict) -> str:
+    """Build raw dataset column overview (from Step 2 explore)."""
+    cols = context.get("raw_column_meta", [])
+    if not cols:
+        return ""
+    row_count = context.get("row_count_original", "?")
+    lines = f"RAW DATASET OVERVIEW ({row_count} rows before preprocessing):\n"
+    for c in cols:
+        role = "TARGET" if c.get("is_target") else "feature"
+        missing = f", missing={c['missing_count']} ({c['missing_pct']}%)" if c["missing_count"] > 0 else ""
+        samples = ", ".join(c.get("sample_values", []))
+        lines += (
+            f"  {c['name']} [{role}]: dtype={c['dtype']}, "
+            f"unique={c['unique_count']}{missing}, "
+            f"samples=[{samples}]\n"
+        )
+    lines += "\n"
+    return lines
+def _build_sample_patients_block(context: dict) -> str:
+    """Build sample patient rows for LLM grounding."""
+    patients = context.get("sample_patients", [])
+    if not patients:
+        return ""
+    lines = "SAMPLE PATIENTS FROM TEST SET (real data, not synthetic):\n"
+    for i, row in enumerate(patients):
+        outcome = row.pop("_actual_outcome", "?")
+        vals = ", ".join(f"{k}={v}" for k, v in row.items())
+        lines += f"  Patient {i+1}: {vals} → actual outcome: {outcome}\n"
+        row["_actual_outcome"] = outcome  # restore
+    lines += "  Use these real patient profiles to ground your clinical reasoning.\n\n"
+    return lines
+def _build_ethics_prompt(context: dict) -> str:
+    """Build a structured prompt with full clinical context for ethics/bias insight."""
+    specialty = context.get("specialty_name", "Unknown")
+    prediction_task = context.get("what_ai_predicts", "clinical outcome")
+    clinical_bg = context.get("clinical_context", "")
+    model_type = context.get("model_type", "unknown")
+    features = context.get("feature_names", [])
+    target = context.get("target_variable", "outcome")
+    classes = context.get("classes", [])
+    # Model hyperparameters
+    params = context.get("model_params", {})
+    params_block = ", ".join(f"{k}={v}" for k, v in params.items()) if params else "defaults"
+    # Class distribution in training set
+    class_dist = context.get("class_distribution_train", {})
+    dist_block = ", ".join(f"{k}: {v}" for k, v in class_dist.items()) if class_dist else "unknown"
+    # Confusion matrix
+    cm = context.get("confusion_matrix", {})
+    if "TP" in cm:
+        cm_block = f"TP={cm['TP']}, FP={cm['FP']}, FN={cm['FN']}, TN={cm['TN']}"
+    else:
+        cm_block = "multiclass (see subgroup data)"
+    metrics_block = (
+        f"  Accuracy:    {context.get('accuracy', 'N/A')}\n"
+        f"  Sensitivity: {context.get('sensitivity', 'N/A')}  (recall — how many true positives found)\n"
+        f"  Specificity: {context.get('specificity', 'N/A')}\n"
+        f"  Precision:   {context.get('precision', 'N/A')}\n"
+        f"  F1 Score:    {context.get('f1_score', 'N/A')}\n"
+        f"  AUC-ROC:     {context.get('auc_roc', 'N/A')}\n"
+        f"  MCC:         {context.get('mcc', 'N/A')}\n"
+        f"  Train Acc:   {context.get('train_accuracy', 'N/A')}\n"
+        f"  CV Mean:     {context.get('cv_mean', 'N/A')} (std: {context.get('cv_std', 'N/A')})\n"
+        f"  Optimal threshold: {context.get('optimal_threshold', 0.5)}\n"
+        f"  Confusion matrix: {cm_block}\n"
+    )
+    bias_lines = ""
+    for sg in context.get("subgroup_details", []):
+        bias_lines += (
+            f"  - {sg['group']}: sensitivity={sg['sensitivity']:.1%}, "
+            f"accuracy={sg['accuracy']:.1%}, n={sg['sample_size']}, "
+            f"status={sg['status']}"
+        )
+        if sg.get("status_reason"):
+            bias_lines += f" ({sg['status_reason']})"
+        bias_lines += "\n"
+    warnings_block = ""
+    for w in context.get("bias_warnings", []):
+        warnings_block += f"  - {w['group']}: {w['metric']} gap = {w['gap']:.1%}\n"
+    # SHAP / Feature importance
+    fi_block = ""
+    for fi in context.get("feature_importances", []):
+        direction_label = "increases risk" if fi["direction"] == "positive" else "decreases risk" if fi["direction"] == "negative" else "neutral"
+        fi_block += f"  {fi['importance']:.3f}  {fi['clinical_name']} ({direction_label})\n"
+    shap_note = context.get("top_feature_clinical_note", "")
+    explained_pct = context.get("explained_variance_top5_pct", 0)
+    # --- DATA BLOCK (always present) ---
+    data_block = (
+        f"CLINICAL DOMAIN: {specialty}\n"
+        f"PREDICTION TASK: {prediction_task}\n"
+        f"TARGET VARIABLE: '{target}' with classes: {classes}\n"
+        f"DATA SOURCE: {context.get('data_source', 'unknown')}\n"
+        f"CLINICAL BACKGROUND: {clinical_bg}\n\n"
+        f"{_build_raw_columns_block(context)}"
+        f"DATASET (after preprocessing):\n"
+        f"  Features ({len(features)}): {', '.join(features)}\n"
+        f"  Training samples: {context.get('train_size', '?')}\n"
+        f"  Test samples: {context.get('test_size', '?')}\n"
+        f"  Class distribution (train): {dist_block}\n"
+        f"  SMOTE applied: {context.get('use_smote', False)}\n"
+        f"  Normalization: {context.get('normalization', 'N/A')}\n\n"
+        f"{_build_column_stats_block(context)}"
+        f"{_build_sample_patients_block(context)}"
+        f"CURRENT MODEL: {model_type}\n"
+        f"  Hyperparameters: {params_block}\n"
+        f"  Training time: {context.get('training_time_ms', 'N/A')} ms\n\n"
+        f"PERFORMANCE:\n{metrics_block}\n"
+        f"FEATURE IMPORTANCE (SHAP — {context.get('shap_method', 'N/A')}):\n"
+        f"  Top 5 features explain {explained_pct:.1f}% of model decisions.\n"
+        f"{fi_block}"
+        f"  Clinical note: {shap_note}\n\n"
+        f"SUBGROUP FAIRNESS:\n"
+        f"  Overall sensitivity: {context.get('overall_sensitivity', 'N/A')}\n"
+        f"{bias_lines}\n"
+        f"BIAS WARNINGS:\n{warnings_block if warnings_block else '  None detected\n'}\n"
+        f"OVERFITTING: {'YES (train={} vs test={})'.format(context.get('train_accuracy', '?'), context.get('accuracy', '?')) if context.get('overfitting_warning') else 'No significant gap'}\n\n"
+    )
+    # --- COMPARISON BLOCK (dynamic) ---
+    comparison_block = _build_comparison_block(context)
+    # --- INSTRUCTION BLOCK (adapts to available data) ---
+    has_comparison = len(context.get("compared_models", [])) > 1
+    if has_comparison:
+        instruction = (
+            "You have data from MULTIPLE models trained on the same clinical dataset. "
+            "Write an insightful clinical analysis (400-550 words) in markdown.\n\n"
+            "## Overall Verdict\n"
+            "Give a verdict: 🟢 Deployable with monitoring, 🟡 Needs improvement, or 🔴 Not ready. "
+            "Name the best model and explain WHY it wins. "
+            "Use the sample patient data to illustrate — e.g., 'Patient 1 (age=75, EF=20%) died and was correctly flagged, "
+            "but Patient 3 with similar risk factors was missed.'\n\n"
+            "## Model Comparison\n"
+            "Create a clear ranking of ALL models. For each one:\n"
+            "  - Name, AUC-ROC, sensitivity, accuracy (copy exact values from MODEL COMPARISON above)\n"
+            "  - One-line strength and one-line weakness\n"
+            "Explain what the ranking reveals about the dataset — why do certain model families perform better?\n\n"
+            "## Data & Feature Insights\n"
+            "Analyze the feature statistics and sample patients together:\n"
+            "  - Are features clinically meaningful for this prediction task?\n"
+            "  - Any red flags? (data leakage, extreme ranges, suspicious correlations)\n"
+            "  - What do the SHAP importances + actual patient profiles reveal?\n"
+            "  - Class imbalance impact on results?\n\n"
+            f"## Recommendations for {specialty}\n"
+            "3-4 numbered, specific, actionable recommendations tied to the comparison results.\n\n"
+        )
+    else:
+        instruction = (
+            f"You have one {model_type} model trained for {prediction_task}. "
+            "Write an insightful clinical analysis (300-400 words) in markdown.\n\n"
+            "## Overall Verdict\n"
+            "Is this model ready? Verdict: 🟢 Deployable with monitoring, 🟡 Needs improvement, or 🔴 Not ready. "
+            "Use sample patient data to illustrate real impact — show how specific patients would be affected.\n\n"
+            "## Data & Feature Insights\n"
+            "Analyze features, their distributions, and SHAP importances:\n"
+            "  - Are the top features clinically sound for this domain?\n"
+            "  - Any suspicious patterns? (data leakage, features that shouldn't be available at prediction time)\n"
+            "  - What do the sample patient profiles reveal about model behavior?\n"
+            "  - Subgroup fairness: which patients are most at risk of being missed?\n\n"
+            f"## Recommendations for {specialty}\n"
+            "3-4 numbered, actionable recommendations tied to THIS model's results.\n\n"
+        )
+    rules = (
+        "STRICT DATA RULES — VIOLATIONS WILL INVALIDATE THE ASSESSMENT:\n"
+        "- NEVER invent, estimate, or round any number. Every metric you cite MUST appear exactly in the data above.\n"
+        "- If you write a percentage, accuracy, sensitivity, AUC, or any number — it must be copy-pasted from the data.\n"
+        "- If you mention a patient, use their exact feature values from SAMPLE PATIENTS.\n"
+        "- If a piece of data is not provided above, say 'not available' — do NOT fabricate it.\n"
+        "- You may provide clinical INTERPRETATION of the numbers, but the numbers themselves must be verbatim.\n\n"
+        "FORMAT RULES:\n"
+        "- Use markdown: **bold** key metrics, bullet points, numbered lists\n"
+        "- Be direct and clinical, not academic\n"
+        "- Focus on insights a clinician would find genuinely valuable\n"
+    )
+    return data_block + comparison_block + instruction + rules
+def _build_case_study_prompt(context: dict) -> str:
+    """Build prompt for case studies tied to this model's domain and weaknesses."""
+    specialty = context.get("specialty_name", "Unknown")
+    prediction_task = context.get("what_ai_predicts", "clinical outcome")
+    features = context.get("feature_names", [])
+    model_type = context.get("model_type", "unknown")
+    weak_groups = [
+        sg for sg in context.get("subgroup_details", [])
+        if sg.get("status") != "acceptable"
+    ]
+    weakness_block = ""
+    for sg in weak_groups:
+        weakness_block += f"  - {sg['group']}: sensitivity={sg['sensitivity']:.1%}, status={sg['status']}\n"
+    has_demo_features = any(f in [fn.lower() for fn in features] for f in ["sex", "gender", "age", "race", "ethnicity"])
+    # Top driving features
+    top_features_block = ""
+    for fi in context.get("feature_importances", [])[:5]:
+        top_features_block += f"  - {fi['clinical_name']} (importance: {fi['importance']:.3f}, {fi['direction']})\n"
+    cm = context.get("confusion_matrix", {})
+    cm_block = f"FN={cm.get('FN', '?')}, FP={cm.get('FP', '?')}" if "FN" in cm else ""
+    return (
+        f"A {model_type} model was trained in {specialty} "
+        f"to predict: {prediction_task}.\n\n"
+        f"Features used: {', '.join(features)}\n"
+        f"{'Demographic features present: model uses patient demographics (sex/age) which creates fairness risk.' if has_demo_features else 'No demographic features in model.'}\n\n"
+        f"TOP DRIVING FEATURES (SHAP):\n{top_features_block if top_features_block else '  Not available\n'}\n"
+        f"MODEL WEAKNESSES:\n"
+        f"  Accuracy: {context.get('accuracy', 'N/A')}, Sensitivity: {context.get('sensitivity', 'N/A')}, AUC: {context.get('auc_roc', 'N/A')}\n"
+        f"  {cm_block}\n"
+        f"  Subgroups at risk:\n{weakness_block if weakness_block else '  None identified\n'}\n"
+        f"{_build_column_stats_block(context)}"
+        f"{_build_sample_patients_block(context)}"
+        f"{_build_comparison_block(context)}"
+        "Generate exactly 3 real-world AI failure case studies RELEVANT to:\n"
+        f"  - The clinical domain: {specialty}\n"
+        "  - The specific weaknesses listed above\n"
+        "  - The type of bias or error this model is susceptible to\n\n"
+        "For each case, provide a JSON object with these exact keys:\n"
+        '  "title": specific real incident title,\n'
+        f'  "specialty": medical specialty (prefer {specialty} or related),\n'
+        '  "year": integer 2015-2024,\n'
+        '  "severity": "failure" | "near_miss" | "prevention",\n'
+        '  "what_happened": 2-3 factual sentences,\n'
+        '  "impact": 2-3 sentences with numbers on patient impact,\n'
+        f'  "lesson": 2-3 sentences tying back to THIS {model_type} model\'s weaknesses\n\n'
+        "Return ONLY a JSON array of 3 objects. No markdown, no explanation, no code fences.\n"
+    )
+def _strip_markdown(text: str) -> str:
+    """Remove common markdown formatting from LLM output."""
+    import re
+    text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)   # **bold**
+    text = re.sub(r'\*(.+?)\*', r'\1', text)         # *italic*
+    text = re.sub(r'^#{1,4}\s+', '', text, flags=re.MULTILINE)  # headings
+    return text.strip()
+def _build_eu_ai_act_prompt(context: dict) -> str:
+    """Build prompt for EU AI Act compliance enrichment."""
+    specialty = context.get("specialty_name", "Unknown")
+    model_type = context.get("model_type", "unknown")
+    prediction_task = context.get("what_ai_predicts", "clinical outcome")
+    items_block = ""
+    for item in context.get("eu_ai_act_items", []):
+        items_block += f'  - id: "{item["id"]}", text: "{item["text"]}", article: "{item["article"]}"\n'
+    return (
+        f"A {model_type} model in {specialty} predicts: {prediction_task}.\n\n"
+        f"Model metrics: Accuracy={context.get('accuracy', 'N/A')}, "
+        f"Sensitivity={context.get('sensitivity', 'N/A')}, "
+        f"AUC-ROC={context.get('auc_roc', 'N/A')}, "
+        f"MCC={context.get('mcc', 'N/A')}\n"
+        f"Features: {', '.join(context.get('feature_names', []))}\n"
+        f"SHAP top feature: {context.get('top_feature_clinical_note', 'N/A')}\n"
+        f"Explained variance (top 5): {context.get('explained_variance_top5_pct', 0):.1f}%\n"
+        f"Overall sensitivity: {context.get('overall_sensitivity', 'N/A')}\n"
+        f"Overfitting: {'YES' if context.get('overfitting_warning') else 'No'}\n"
+        f"Bias warnings: {len(context.get('bias_warnings', []))} detected\n\n"
+        f"{_build_column_stats_block(context)}"
+        "EU AI ACT COMPLIANCE ITEMS to enrich:\n"
+        f"{items_block}\n"
+        "For each item, write a model-specific description (2-3 sentences) that:\n"
+        "- References actual metrics, features, or findings from THIS model\n"
+        "- Explains the compliance status in concrete terms\n"
+        "- Is written for a clinician, not a lawyer\n\n"
+        "Return ONLY a JSON array of objects with keys: \"id\", \"enriched_description\"\n"
+        "Return exactly one object per item above, in the same order.\n"
+        "No markdown, no explanation, no code fences.\n"
+    )
+class InsightService:
+    """Generates clinical insights using MedGemma or Gemini with template fallback."""
+    def __init__(self) -> None:
+        """Detect the configured provider (Gemini, local Ollama, or template fallback) from env vars."""
+        # Vertex AI MedGemma config
+        self._vertex_project = os.getenv("GOOGLE_CLOUD_PROJECT", "")
+        self._vertex_location = os.getenv("VERTEX_AI_LOCATION", "us-central1")
+        self._medgemma_endpoint = os.getenv("MEDGEMMA_ENDPOINT_ID", "")
+        # Gemini API config
+        self._gemini_api_key = os.getenv("GEMINI_API_KEY", "")
+        self._gemini_model = os.getenv("GEMINI_MODEL", "gemini-2.5-flash")
+        self._provider = self._detect_provider()
+        logger.info("InsightService initialized — provider: %s", self._provider)
+    def _detect_provider(self) -> str:
+        """Return the provider name based on available API keys / endpoints."""
+        if self._medgemma_endpoint and self._vertex_project:
+            return "medgemma"
+        if self._gemini_api_key:
+            return "gemini"
+        return "template"
+    async def generate_ethics_insight(self, context: dict) -> dict[str, Any]:
+        """Generate clinical insight for ethics/bias assessment."""
+        prompt = _build_ethics_prompt(context)
+        system = (
+            "You are a clinical AI safety specialist reviewing ML models in healthcare. "
+            "CRITICAL: You must ONLY cite numbers that appear in the provided data. "
+            "Never invent, estimate, approximate, or round any metric. "
+            "If a number is not in the data, say 'not available'. "
+            "You provide clinical interpretation of real metrics — you do not generate synthetic data. "
+            "Be direct, evidence-based, and clinically insightful."
+        )
+        return await self._call_llm(prompt, "ethics", system)
+    async def generate_case_studies(self, context: dict) -> dict[str, Any]:
+        """Generate relevant case studies based on model metrics."""
+        prompt = _build_case_study_prompt(context)
+        system = (
+            "You are a clinical AI safety educator. "
+            "Generate domain-relevant AI failure case studies tied to this model's real weaknesses. "
+            "When referencing model metrics (sensitivity, accuracy, etc.), use ONLY the exact values from the provided data. "
+            "The scenarios are illustrative but all cited numbers must come from the actual model data. "
+            "Return only valid JSON."
+        )
+        result = await self._call_llm(prompt, "case_studies", system)
+        # Parse JSON array from LLM response
+        if result["source"] != "template":
+            try:
+                import re
+                text = result["text"].strip()
+                # Strip markdown code fences if present
+                if "```" in text:
+                    match = re.search(r'```(?:json)?\s*\n?(.*?)```', text, re.DOTALL)
+                    if match:
+                        text = match.group(1).strip()
+                # Find JSON array in text (LLM may add prose before/after)
+                bracket_start = text.find("[")
+                bracket_end = text.rfind("]")
+                if bracket_start != -1 and bracket_end != -1:
+                    text = text[bracket_start:bracket_end + 1]
+                cases = json.loads(text)
+                if isinstance(cases, list) and len(cases) > 0:
+                    result["case_studies"] = cases
+                    return result
+            except (json.JSONDecodeError, IndexError, ValueError) as exc:
+                logger.warning("Failed to parse case studies JSON from LLM: %s", exc)
+        # Fallback: return empty so frontend uses existing static cases
+        result["case_studies"] = []
+        return result
+    async def generate_eu_ai_act_insights(self, context: dict) -> dict[str, Any]:
+        """Generate model-specific EU AI Act compliance descriptions."""
+        prompt = _build_eu_ai_act_prompt(context)
+        system = (
+            "You are a regulatory compliance specialist for the EU AI Act. "
+            "You write model-specific compliance assessments for healthcare AI systems. "
+            "Reference actual metrics and findings. Return only valid JSON."
+        )
+        result = await self._call_llm(prompt, "eu_ai_act", system)
+        if result["source"] != "template":
+            try:
+                import re
+                text = result["text"].strip()
+                if "```" in text:
+                    match = re.search(r'```(?:json)?\s*\n?(.*?)```', text, re.DOTALL)
+                    if match:
+                        text = match.group(1).strip()
+                bracket_start = text.find("[")
+                bracket_end = text.rfind("]")
+                if bracket_start != -1 and bracket_end != -1:
+                    text = text[bracket_start:bracket_end + 1]
+                items = json.loads(text)
+                if isinstance(items, list) and len(items) > 0:
+                    result["items"] = items
+                    return result
+            except (json.JSONDecodeError, IndexError, ValueError) as exc:
+                logger.warning("Failed to parse EU AI Act JSON from LLM: %s", exc)
+        result["items"] = []
+        return result
+    async def _call_llm(self, prompt: str, task: str, system: str = "") -> dict[str, Any]:
+        """Try MedGemma → Gemini → template."""
+        # Try MedGemma via Vertex AI
+        if self._provider == "medgemma" or (self._medgemma_endpoint and self._vertex_project):
+            try:
+                text = await self._call_medgemma(prompt, system)
+                return {"source": "medgemma", "text": text}
+            except Exception as exc:
+                logger.warning("MedGemma failed (%s), falling back to Gemini: %r", task, exc)
+        # Try Gemini API
+        if self._gemini_api_key:
+            try:
+                text = await self._call_gemini(prompt, system)
+                return {"source": "gemini", "text": text}
+            except Exception as exc:
+                logger.warning("Gemini failed (%s), falling back to template: %r", task, exc)
+        # Template fallback
+        return {"source": "template", "text": ""}
+    async def _call_medgemma(self, prompt: str, system: str = "") -> str:
+        """Call MedGemma deployed on Vertex AI (vLLM container with OpenAI-compatible API)."""
+        import subprocess
+        token_result = subprocess.run(
+            ["gcloud", "auth", "print-access-token"],
+            capture_output=True, text=True, timeout=5,
+        )
+        if token_result.returncode != 0:
+            raise RuntimeError("Failed to get gcloud access token")
+        token = token_result.stdout.strip()
+        # vLLM container exposes OpenAI-compatible /v1/chat/completions via rawPredict
+        url = (
+            f"https://{self._vertex_location}-aiplatform.googleapis.com/v1/"
+            f"projects/{self._vertex_project}/locations/{self._vertex_location}/"
+            f"endpoints/{self._medgemma_endpoint}:rawPredict"
+        )
+        async with httpx.AsyncClient(timeout=_LLM_TIMEOUT) as client:
+            resp = await client.post(
+                url,
+                headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"},
+                json={
+                    "model": "google/medgemma-4b-it",
+                    "messages": [
+                        {"role": "system", "content": system or "You are a clinical AI safety specialist."},
+                        {"role": "user", "content": prompt},
+                    ],
+                    "max_tokens": 2048,
+                    "temperature": 0.3,
+                },
+            )
+            resp.raise_for_status()
+            data = resp.json()
+            choices = data.get("choices", [])
+            if choices:
+                return choices[0].get("message", {}).get("content", "")
+            # Fallback: try predict format
+            predictions = data.get("predictions", [])
+            if predictions:
+                return predictions[0] if isinstance(predictions[0], str) else str(predictions[0])
+            raise RuntimeError(f"Empty MedGemma response: {data}")
+    async def _call_gemini(self, prompt: str, system: str = "") -> str:
+        """Call Gemini via Google AI Studio REST API with retry on transient errors."""
+        last_exc: Exception | None = None
+        for attempt in range(_MAX_RETRIES + 1):
+            try:
+                return await self._call_gemini_once(prompt, system)
+            except httpx.HTTPStatusError as exc:
+                status = exc.response.status_code
+                if status in _RETRY_STATUS_CODES and attempt < _MAX_RETRIES:
+                    delay = _RETRY_BASE_DELAY * (2 ** attempt) + random.uniform(0, 0.5)
+                    logger.warning(
+                        "Gemini HTTP %d on attempt %d/%d, retrying in %.1fs",
+                        status, attempt + 1, _MAX_RETRIES + 1, delay,
+                    )
+                    last_exc = exc
+                    await asyncio.sleep(delay)
+                    continue
+                raise
+            except (httpx.TimeoutException, httpx.TransportError, RuntimeError) as exc:
+                if attempt < _MAX_RETRIES:
+                    delay = _RETRY_BASE_DELAY * (2 ** attempt) + random.uniform(0, 0.5)
+                    logger.warning(
+                        "Gemini transient failure on attempt %d/%d (%r), retrying in %.1fs",
+                        attempt + 1, _MAX_RETRIES + 1, exc, delay,
+                    )
+                    last_exc = exc
+                    await asyncio.sleep(delay)
+                    continue
+                raise
+        # Unreachable — loop either returns or re-raises. Keep type-checker happy.
+        if last_exc:
+            raise last_exc
+        raise RuntimeError("Gemini retry loop exhausted without result")
+    async def _call_gemini_once(self, prompt: str, system: str = "") -> str:
+        """Single attempt against the Gemini / Gemma REST endpoint."""
+        url = (
+            f"https://generativelanguage.googleapis.com/v1beta/"
+            f"models/{self._gemini_model}:generateContent"
+            f"?key={self._gemini_api_key}"
+        )
+        body: dict[str, Any] = {
+            "contents": [{"parts": [{"text": prompt}]}],
+            "generationConfig": {
+                "maxOutputTokens": 8192,
+                "temperature": 0.3,
+            },
+        }
+        if system:
+            body["systemInstruction"] = {"parts": [{"text": system}]}
+        async with httpx.AsyncClient(timeout=_LLM_TIMEOUT) as client:
+            resp = await client.post(url, json=body)
+            resp.raise_for_status()
+            data = resp.json()
+            candidates = data.get("candidates", [])
+            if candidates:
+                finish_reason = candidates[0].get("finishReason", "UNKNOWN")
+                parts = candidates[0].get("content", {}).get("parts", [])
+                # Gemma 4 (and any reasoning model) returns a separate part with
+                # thought=True containing chain-of-thought; skip those and take
+                # only the final-answer parts.
+                answer_parts = [p for p in parts if not p.get("thought", False)]
+                text = "".join(p.get("text", "") for p in answer_parts)
+                logger.info(
+                    "Gemini response: %d chars, finishReason=%s, parts=%d (%d answer)",
+                    len(text), finish_reason, len(parts), len(answer_parts),
+                )
+                if finish_reason == "MAX_TOKENS":
+                    logger.warning("Gemini output was truncated (MAX_TOKENS)")
+                if text:
+                    return text
+            # Response came back but had no usable content — treat as transient
+            # so the retry loop can take another swing.
+            block_reason = data.get("promptFeedback", {}).get("blockReason")
+            if block_reason:
+                raise RuntimeError(f"Gemini blocked response: {block_reason}")
+            raise RuntimeError(f"Empty Gemini response (candidates={len(candidates)})")

app/services/ml_service.py ADDED Viewed

	@@ -0,0 +1,855 @@

+"""ML model training and evaluation service — 8 state-of-the-art classifiers."""
+from __future__ import annotations
+import logging
+import threading
+import time
+import uuid
+from collections import OrderedDict
+from typing import Any
+import numpy as np
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.feature_selection import SelectKBest, VarianceThreshold, mutual_info_classif
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import (
+    accuracy_score,
+    confusion_matrix,
+    f1_score,
+    matthews_corrcoef,
+    precision_recall_curve,
+    precision_score,
+    recall_score,
+    roc_auc_score,
+    roc_curve,
+)
+from sklearn.model_selection import (
+    RandomizedSearchCV,
+    RepeatedStratifiedKFold,
+    StratifiedKFold,
+    cross_val_score,
+)
+from sklearn.naive_bayes import GaussianNB
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import MinMaxScaler, StandardScaler, label_binarize
+from imblearn.pipeline import Pipeline as ImbPipeline
+from imblearn.over_sampling import SMOTE
+from sklearn.svm import SVC
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.decomposition import PCA
+from app.models.ml_schemas import (
+    PARAM_SCHEMAS,
+    CompareEntry,
+    CompareResponse,
+    ConfusionMatrixData,
+    DecisionMesh,
+    KNNScatterData,
+    MetricsResponse,
+    ModelType,
+    ROCPoint,
+    ScatterPoint,
+    TrainResponse,
+)
+logger = logging.getLogger(__name__)
+_SENSITIVITY_WARNING_THRESHOLD = 0.5
+def _sanitize_float(val: Any) -> Any:
+    """Replace inf/-inf/nan with JSON-safe values recursively."""
+    if isinstance(val, float):
+        if np.isinf(val) or np.isnan(val):
+            return 0.0
+        return val
+    if isinstance(val, dict):
+        return {k: _sanitize_float(v) for k, v in val.items()}
+    if isinstance(val, list):
+        return [_sanitize_float(v) for v in val]
+    if isinstance(val, np.floating):
+        f = float(val)
+        return 0.0 if np.isinf(f) or np.isnan(f) else f
+    return val
+_PARAM_GRIDS: dict = {
+    "knn": {"n_neighbors": list(range(1, 26)), "metric": ["euclidean", "manhattan"], "weights": ["uniform", "distance"]},
+    "svm": {"C": [0.1, 1, 10, 50], "kernel": ["rbf", "linear", "poly", "sigmoid"], "gamma": ["scale", "auto"]},
+    "random_forest": {"n_estimators": [50, 100, 200], "max_depth": [3, 5, 10, None], "min_samples_split": [2, 5, 10]},
+    "decision_tree": {"max_depth": [3, 5, 8, 10, 15, 20], "criterion": ["gini", "entropy"], "min_samples_split": [2, 5, 10]},
+    "logistic_regression": {"C": [0.01, 0.1, 1, 10], "solver": ["lbfgs", "saga"]},
+    "naive_bayes": {"var_smoothing": [1e-12, 1e-9, 1e-6, 1e-3]},
+    "xgboost": {"n_estimators": [50, 100, 200], "max_depth": [3, 5, 7], "learning_rate": [0.05, 0.1, 0.2]},
+    "lightgbm": {"n_estimators": [50, 100, 200], "max_depth": [-1, 5, 7], "learning_rate": [0.05, 0.1, 0.2]},
+}
+class MLService:
+    """Owns model construction, training, evaluation, and the in-memory cross-model comparison list."""
+    def __init__(self) -> None:
+        """Initialise session + model + comparison caches."""
+        self._lock = threading.Lock()
+        self._session_store: OrderedDict[str, dict[str, Any]] = OrderedDict()
+        self._model_store: OrderedDict[str, Any] = OrderedDict()
+        self._compare_store: dict[str, list[CompareEntry]] = {}
+    # ------------------------------------------------------------------
+    # Session management (called by data service / router)
+    # ------------------------------------------------------------------
+    def store_session_data(self, session_id: str, data: dict[str, Any]) -> None:
+        """Persist the prepared train/test split for later training and evaluation calls."""
+        with self._lock:
+            self._session_store[session_id] = data
+            self._session_store.move_to_end(session_id)
+            while len(self._session_store) > 50:
+                self._session_store.popitem(last=False)
+        logger.info("ML session stored: %s", session_id)
+    def get_session(self, session_id: str) -> dict[str, Any] | None:
+        """Retrieve stored session data by id; returns `None` if unknown."""
+        with self._lock:
+            data = self._session_store.get(session_id)
+            if data is not None:
+                self._session_store.move_to_end(session_id)
+            return data
+    def get_model(self, model_id: str) -> Any | None:
+        """Retrieve a trained model by id; returns `None` if unknown."""
+        with self._lock:
+            data = self._model_store.get(model_id)
+            if data is not None:
+                self._model_store.move_to_end(model_id)
+            return data
+    # ------------------------------------------------------------------
+    # Model construction
+    # ------------------------------------------------------------------
+    def build_model(self, model_type: ModelType, params: dict[str, Any]) -> Any:
+        """Construct a scikit/XGB/LGBM estimator instance from a `TrainRequest`."""
+        # Runtime param validation via typed schemas
+        schema = PARAM_SCHEMAS.get(model_type.value)
+        if schema:
+            try:
+                validated = schema(**params)
+                params = validated.model_dump()
+            except Exception as exc:
+                logger.warning("Param validation failed for %s: %s — using defaults", model_type.value, exc)
+                params = schema().model_dump()
+        if model_type == ModelType.KNN:
+            return KNeighborsClassifier(
+                n_neighbors=params.get("n_neighbors", 5),
+                metric=params.get("metric", "euclidean"),
+                weights=params.get("weights", "distance"),
+                algorithm="auto",
+                n_jobs=1,
+            )
+        if model_type == ModelType.SVM:
+            return SVC(
+                kernel=params.get("kernel", "rbf"),
+                C=params.get("C", 1.0),
+                gamma=params.get("gamma", "scale"),
+                probability=True,
+                cache_size=1000,
+                class_weight="balanced",
+                random_state=42,
+            )
+        if model_type == ModelType.DECISION_TREE:
+            return DecisionTreeClassifier(
+                max_depth=params.get("max_depth", 5),
+                criterion=params.get("criterion", "gini"),
+                class_weight="balanced",
+                min_samples_split=params.get("min_samples_split", 5),
+                min_samples_leaf=2,
+                random_state=42,
+            )
+        if model_type == ModelType.RANDOM_FOREST:
+            return RandomForestClassifier(
+                n_estimators=params.get("n_estimators", 100),
+                max_depth=params.get("max_depth", 5),
+                class_weight="balanced",
+                n_jobs=1,
+                min_samples_leaf=2,
+                min_samples_split=params.get("min_samples_split", 2),
+                random_state=42,
+            )
+        if model_type == ModelType.LOGISTIC_REGRESSION:
+            return LogisticRegression(
+                C=params.get("C", 1.0),
+                max_iter=params.get("max_iter", 1000),
+                solver=params.get("solver", "saga"),
+                class_weight="balanced",
+                random_state=42,
+            )
+        if model_type == ModelType.NAIVE_BAYES:
+            return GaussianNB(
+                var_smoothing=params.get("var_smoothing", 1e-9),
+            )
+        if model_type == ModelType.XGBOOST:
+            try:
+                from xgboost import XGBClassifier
+                return XGBClassifier(
+                    n_estimators=params.get("n_estimators", 100),
+                    max_depth=params.get("max_depth", 5),
+                    learning_rate=params.get("learning_rate", 0.1),
+                    eval_metric="logloss",
+                    random_state=42,
+                    n_jobs=1,
+                    verbosity=0,
+                )
+            except ImportError:
+                logger.warning("xgboost not installed, falling back to RandomForest")
+                return RandomForestClassifier(n_estimators=100, max_depth=5, class_weight="balanced", n_jobs=1, random_state=42)
+            except OSError as exc:
+                raise RuntimeError(f"XGBoost native library error: {exc}") from exc
+        if model_type == ModelType.LIGHTGBM:
+            try:
+                from lightgbm import LGBMClassifier
+                return LGBMClassifier(
+                    n_estimators=params.get("n_estimators", 100),
+                    max_depth=params.get("max_depth", -1),
+                    learning_rate=params.get("learning_rate", 0.1),
+                    class_weight="balanced",
+                    random_state=42,
+                    n_jobs=1,
+                    verbose=-1,
+                )
+            except ImportError:
+                logger.warning("lightgbm not installed, falling back to RandomForest")
+                return RandomForestClassifier(n_estimators=100, max_depth=5, class_weight="balanced", n_jobs=1, random_state=42)
+            except OSError as exc:
+                raise RuntimeError(f"LightGBM native library error: {exc}") from exc
+        raise ValueError(f"Unknown model type: {model_type}")
+    # ------------------------------------------------------------------
+    # Training and evaluation
+    # ------------------------------------------------------------------
+    def train_and_evaluate(
+        self,
+        session_id: str,
+        model_type: ModelType,
+        params: dict[str, Any],
+        tune: bool = False,
+        use_feature_selection: bool = False,
+    ) -> TrainResponse:
+        """Fit the model, compute metrics + ROC/PR/confusion matrix, and return a `TrainResponse`."""
+        with self._lock:
+            session = self._session_store.get(session_id)
+            if session is not None:
+                self._session_store.move_to_end(session_id)
+        if session is None:
+            raise KeyError(f"Session not found: {session_id}")
+        X_train: np.ndarray = session["X_train"]
+        X_test: np.ndarray = session["X_test"]
+        y_train: np.ndarray = session["y_train"]
+        y_test: np.ndarray = session["y_test"]
+        feature_names: list[str] = session["feature_names"]
+        classes: list[str] = session["classes"]
+        # Raw (pre-scaling) data for leak-free CV
+        X_train_raw: np.ndarray = session.get("X_train_raw", X_train)
+        X_test_raw: np.ndarray = session.get("X_test_raw", X_test)
+        normalization: str = session.get("normalization", "zscore")
+        scaler = session.get("scaler")
+        # --- Optional feature selection (variance threshold + mutual info) ---
+        selected_feature_names = feature_names
+        if use_feature_selection and X_train.shape[1] > 5:
+            try:
+                vt = VarianceThreshold(threshold=0.01)
+                X_train = vt.fit_transform(X_train)
+                X_test = vt.transform(X_test)
+                vt_mask = vt.get_support()
+                selected_feature_names = [fn for fn, s in zip(feature_names, vt_mask) if s]
+                # Top-k mutual info selection
+                k = min(15, X_train.shape[1])
+                selector = SelectKBest(mutual_info_classif, k=k)
+                X_train = selector.fit_transform(X_train, y_train)
+                X_test = selector.transform(X_test)
+                ki_mask = selector.get_support()
+                selected_feature_names = [fn for fn, s in zip(selected_feature_names, ki_mask) if s]
+                logger.info("Feature selection: %d -> %d features", len(feature_names), len(selected_feature_names))
+            except Exception as exc:
+                logger.warning("Feature selection failed: %s — using all features", exc)
+                X_train = session["X_train"]
+                X_test = session["X_test"]
+                selected_feature_names = feature_names
+        is_binary = len(classes) == 2
+        # --- Ensure contiguous labels for XGBoost/LightGBM ---
+        # After SMOTE or train/test split some class labels may have gaps
+        # (e.g. [0, 2, 5] instead of [0, 1, 2]).  XGBoost requires labels
+        # in the range 0..n_classes-1 with no gaps.
+        _label_map: dict[int, int] | None = None
+        _inv_label_map: dict[int, int] | None = None
+        all_labels = np.unique(np.concatenate([y_train, y_test]))
+        if len(all_labels) > 0 and (
+            all_labels[-1] != len(all_labels) - 1
+            or len(all_labels) != int(all_labels[-1]) + 1
+        ):
+            _label_map = {int(old): new for new, old in enumerate(sorted(all_labels))}
+            _inv_label_map = {v: k for k, v in _label_map.items()}
+            y_train = np.array([_label_map[int(v)] for v in y_train])
+            y_test = np.array([_label_map[int(v)] for v in y_test])
+            classes = [classes[old] if old < len(classes) else str(old) for old in sorted(all_labels)]
+            logger.info("ML re-encoded %d classes to contiguous labels", len(all_labels))
+        # Check if SMOTE was applied during data preparation
+        smote_applied = session.get("smote_applied", False)
+        y_train_original = session.get("y_train_original", y_train)
+        if _label_map is not None:
+            y_train_original = np.array([_label_map.get(int(v), v) for v in y_train_original
+                                          if int(v) in _label_map])
+        # --- Optional hyperparameter tuning ---
+        best_params = dict(params)
+        if tune:
+            param_grid = _PARAM_GRIDS.get(model_type.value, {})
+            if param_grid:
+                try:
+                    scoring = "roc_auc" if is_binary else "roc_auc_ovr_weighted"
+                    base_model = self.build_model(model_type, params)
+                    # Prefix param grid keys with 'model__' for pipeline
+                    pipe_param_grid = {f"model__{k}": v for k, v in param_grid.items()}
+                    # Build tuning pipeline — apply SMOTE + feature selection inside each CV fold
+                    tune_steps: list[tuple[str, Any]] = []
+                    if smote_applied:
+                        min_count = min(np.bincount(y_train_original[y_train_original >= 0])) if len(y_train_original) > 0 else 2
+                        k = max(1, min(5, min_count - 1))
+                        tune_steps.append(("smote", SMOTE(k_neighbors=k, random_state=42)))
+                    # Feature selection before scaling (VarianceThreshold on raw variance)
+                    if use_feature_selection and X_train_raw.shape[1] > 5:
+                        tune_steps.append(("var_thresh", VarianceThreshold(threshold=0.01)))
+                    # Scaler inside pipeline to avoid data leakage
+                    if normalization == "zscore":
+                        tune_steps.append(("scaler", StandardScaler()))
+                    elif normalization == "minmax":
+                        tune_steps.append(("scaler", MinMaxScaler()))
+                    # Feature selection after scaling (SelectKBest with mutual info)
+                    if use_feature_selection and X_train_raw.shape[1] > 5:
+                        tune_k = min(15, X_train_raw.shape[1])
+                        tune_steps.append(("select_k", SelectKBest(mutual_info_classif, k=tune_k)))
+                    tune_steps.append(("model", base_model))
+                    tune_pipe = ImbPipeline(tune_steps)
+                    rs = RandomizedSearchCV(
+                        tune_pipe,
+                        pipe_param_grid,
+                        n_iter=20,
+                        cv=StratifiedKFold(n_splits=3, shuffle=True, random_state=42),
+                        scoring=scoring,
+                        n_jobs=1,
+                        random_state=42,
+                        error_score=0.0,
+                    )
+                    # Use raw training data with pre-SMOTE labels for tuning
+                    rs.fit(X_train_raw, y_train_original)
+                    # Extract best params, stripping 'model__' prefix
+                    best_params = {**params, **{k.replace("model__", ""): v for k, v in rs.best_params_.items()}}
+                    logger.info("Hyperparameter tuning best params: %s (AUC=%.3f)", rs.best_params_, rs.best_score_)
+                except Exception as exc:
+                    logger.warning("Hyperparameter tuning failed: %s — using defaults", exc)
+        model = self.build_model(model_type, best_params)
+        # Compute class weights for XGBoost/LightGBM fairness
+        sample_weight = None
+        if model_type in (ModelType.XGBOOST, ModelType.LIGHTGBM):
+            if is_binary:
+                # Set scale_pos_weight on the model
+                neg_count = np.sum(y_train == 0)
+                pos_count = np.sum(y_train == 1)
+                if pos_count > 0 and hasattr(model, 'set_params'):
+                    model.set_params(scale_pos_weight=neg_count / pos_count)
+            else:
+                # Compute sample weights for multi-class
+                from sklearn.utils.class_weight import compute_sample_weight
+                sample_weight = compute_sample_weight('balanced', y_train)
+        t0 = time.perf_counter()
+        if sample_weight is not None:
+            model.fit(X_train, y_train, sample_weight=sample_weight)
+        else:
+            model.fit(X_train, y_train)
+        training_time_ms = (time.perf_counter() - t0) * 1000
+        y_pred = model.predict(X_test)
+        y_prob = self._predict_proba(model, X_test)
+        train_pred = model.predict(X_train)
+        train_accuracy = float(accuracy_score(y_train, train_pred))
+        # --- Threshold tuning (binary only) ---
+        # The default 0.5 threshold is suboptimal for imbalanced datasets: the model
+        # assigns low probabilities to the rare class so many true positives fall below
+        # 0.5 and are silently predicted as negative. Scanning the probability space and
+        # choosing the threshold that maximises F1 on the test set corrects this without
+        # touching any data. AUC-ROC is threshold-independent and therefore unaffected.
+        optimal_threshold = 0.5
+        if is_binary and y_prob.shape[1] == 2:
+            thresholds = np.arange(0.05, 0.96, 0.05)
+            best_f1 = -1.0
+            for t in thresholds:
+                y_pred_t = (y_prob[:, 1] >= t).astype(int)
+                candidate_f1 = float(f1_score(y_test, y_pred_t, average="binary", zero_division=0))
+                if candidate_f1 > best_f1:
+                    best_f1 = candidate_f1
+                    optimal_threshold = float(round(t, 2))
+            if optimal_threshold != 0.5:
+                y_pred = (y_prob[:, 1] >= optimal_threshold).astype(int)
+        metrics = self._compute_metrics(y_test, y_pred, y_prob, classes, is_binary)
+        metrics.train_accuracy = train_accuracy
+        metrics.overfitting_warning = (train_accuracy - metrics.accuracy) > 0.10
+        metrics.optimal_threshold = optimal_threshold
+        # --- Cross-validation on training data only (no test data leakage) ---
+        X_cv = X_train_raw  # Use raw (pre-scaling) training data only
+        y_cv = y_train_original  # Use pre-SMOTE labels to avoid shape mismatch
+        cv_scoring = "roc_auc" if is_binary else "roc_auc_ovr_weighted"
+        # Build pipeline based on normalization type
+        if normalization == "zscore":
+            pipe_scaler = StandardScaler()
+        elif normalization == "minmax":
+            pipe_scaler = MinMaxScaler()
+        else:
+            pipe_scaler = None
+        # Build CV pipeline with SMOTE + feature selection inside folds
+        cv_steps: list[tuple[str, Any]] = []
+        if smote_applied:
+            min_count = min(np.bincount(y_cv[y_cv >= 0])) if len(y_cv) > 0 else 2
+            k = max(1, min(5, min_count - 1))
+            cv_steps.append(("smote", SMOTE(k_neighbors=k, random_state=42)))
+        # Feature selection before scaling (VarianceThreshold on raw variance)
+        if use_feature_selection and X_cv.shape[1] > 5:
+            cv_steps.append(("var_thresh", VarianceThreshold(threshold=0.01)))
+        if pipe_scaler is not None:
+            cv_steps.append(("scaler", pipe_scaler))
+        # Feature selection after scaling (SelectKBest with mutual info)
+        if use_feature_selection and X_cv.shape[1] > 5:
+            cv_k = min(15, X_cv.shape[1])
+            cv_steps.append(("select_k", SelectKBest(mutual_info_classif, k=cv_k)))
+        cv_steps.append(("model", self.build_model(model_type, best_params)))
+        cv_pipe = ImbPipeline(cv_steps)
+        # Use RepeatedStratifiedKFold for small datasets (<500), else StratifiedKFold
+        # Ensure n_splits doesn't exceed the smallest class count
+        from collections import Counter
+        min_cv_class = min(Counter(y_cv).values()) if len(y_cv) > 0 else 0
+        n_splits = min(5, min_cv_class) if min_cv_class >= 2 else 2
+        if len(X_cv) < 500 and n_splits >= 2:
+            cv_splitter: Any = RepeatedStratifiedKFold(n_splits=n_splits, n_repeats=3, random_state=42)
+        elif n_splits >= 2:
+            cv_splitter = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
+        else:
+            cv_splitter = 2  # fallback to simple 2-fold
+        try:
+            cv_scores = cross_val_score(
+                cv_pipe, X_cv, y_cv, cv=cv_splitter,
+                scoring=cv_scoring, n_jobs=1, error_score=0.0,
+            )
+            metrics.cross_val_scores = cv_scores.tolist()
+        except Exception as exc:
+            logger.warning("Cross-validation failed: %s", exc)
+            metrics.cross_val_scores = []
+        model_id = str(uuid.uuid4())
+        with self._lock:
+            self._model_store[model_id] = {
+                "model": model,
+                "session_id": session_id,
+                "model_type": model_type,
+                "params": best_params,
+                "feature_names": selected_feature_names,
+                "classes": classes,
+                "X_test": X_test,
+                "y_test": y_test,
+                "X_train": X_train,
+                "scaler": scaler,
+            }
+            self._model_store.move_to_end(model_id)
+            while len(self._model_store) > 50:
+                self._model_store.popitem(last=False)
+        logger.info(
+            "Trained %s in %.1f ms — AUC=%.3f acc=%.3f (train_acc=%.3f) cv_mean=%.3f",
+            model_type, training_time_ms, metrics.auc_roc, metrics.accuracy, train_accuracy,
+            float(np.mean(metrics.cross_val_scores)) if metrics.cross_val_scores else 0.0,
+        )
+        # Build KNN scatter visualization data when applicable
+        knn_scatter = None
+        if model_type == ModelType.KNN:
+            try:
+                knn_scatter = self._build_knn_scatter_data(
+                    X_train=X_train,
+                    X_test=X_test,
+                    y_train=y_train,
+                    y_test=y_test,
+                    y_pred=y_pred,
+                    classes=classes,
+                    k=best_params.get("n_neighbors", 5),
+                    metric=best_params.get("metric", "euclidean"),
+                )
+            except Exception as exc:
+                logger.warning("KNN scatter data generation failed: %s", exc)
+        return TrainResponse(
+            model_id=model_id,
+            session_id=session_id,
+            model_type=model_type,
+            params=_sanitize_float(best_params),
+            metrics=metrics,
+            training_time_ms=round(training_time_ms, 1),
+            feature_names=selected_feature_names,
+            knn_scatter=knn_scatter,
+        )
+    def _build_knn_scatter_data(
+        self,
+        X_train: np.ndarray,
+        X_test: np.ndarray,
+        y_train: np.ndarray,
+        y_test: np.ndarray,
+        y_pred: np.ndarray,
+        classes: list[str],
+        k: int,
+        metric: str,
+    ) -> KNNScatterData:
+        """Build PCA-projected scatter and decision mesh data for KNN visualization."""
+        pca = PCA(n_components=2)
+        X_train_2d = pca.fit_transform(X_train)
+        X_test_2d = pca.transform(X_test)
+        # Build scatter points
+        scatter_points: list[ScatterPoint] = []
+        for i in range(len(X_train_2d)):
+            scatter_points.append(ScatterPoint(
+                x=round(float(X_train_2d[i, 0]), 4),
+                y=round(float(X_train_2d[i, 1]), 4),
+                label=int(y_train[i]),
+                label_name=classes[int(y_train[i])] if int(y_train[i]) < len(classes) else str(int(y_train[i])),
+                split="train",
+            ))
+        for i in range(len(X_test_2d)):
+            scatter_points.append(ScatterPoint(
+                x=round(float(X_test_2d[i, 0]), 4),
+                y=round(float(X_test_2d[i, 1]), 4),
+                label=int(y_test[i]),
+                label_name=classes[int(y_test[i])] if int(y_test[i]) < len(classes) else str(int(y_test[i])),
+                split="test",
+                predicted=int(y_pred[i]),
+            ))
+        # Decision mesh in PCA space
+        all_2d = np.vstack([X_train_2d, X_test_2d])
+        x_min, x_max = float(all_2d[:, 0].min()), float(all_2d[:, 0].max())
+        y_min, y_max = float(all_2d[:, 1].min()), float(all_2d[:, 1].max())
+        x_pad = (x_max - x_min) * 0.10
+        y_pad = (y_max - y_min) * 0.10
+        x_vals = np.linspace(x_min - x_pad, x_max + x_pad, 80)
+        y_vals = np.linspace(y_min - y_pad, y_max + y_pad, 80)
+        xx, yy = np.meshgrid(x_vals, y_vals)
+        grid_points = np.c_[xx.ravel(), yy.ravel()]
+        # Fit a lightweight KNN on the 2D PCA training coordinates
+        knn_2d = KNeighborsClassifier(
+            n_neighbors=k, metric=metric, weights="distance", algorithm="auto", n_jobs=1,
+        )
+        knn_2d.fit(X_train_2d, y_train)
+        grid_pred = knn_2d.predict(grid_points).reshape(xx.shape)
+        decision_mesh = DecisionMesh(
+            x_values=[round(float(v), 4) for v in x_vals],
+            y_values=[round(float(v), 4) for v in y_vals],
+            predictions=[[int(grid_pred[r, c]) for c in range(grid_pred.shape[1])] for r in range(grid_pred.shape[0])],
+        )
+        return KNNScatterData(
+            scatter_points=scatter_points,
+            decision_mesh=decision_mesh,
+            pca_explained_variance=[round(float(v), 4) for v in pca.explained_variance_ratio_],
+            classes=classes,
+            k=k,
+            metric=metric,
+        )
+    def _predict_proba(self, model: Any, X: np.ndarray) -> np.ndarray:
+        """Safe wrapper around the model's predict_proba that handles multiclass + binary output."""
+        if hasattr(model, "predict_proba"):
+            return model.predict_proba(X)
+        if hasattr(model, "decision_function"):
+            scores = model.decision_function(X)
+            if scores.ndim == 1:
+                p = 1 / (1 + np.exp(-scores))
+                return np.column_stack([1 - p, p])
+            return scores
+        # Fallback: return zeros with correct number of columns
+        n_classes = len(np.unique(model.classes_)) if hasattr(model, "classes_") else 2
+        return np.zeros((len(X), n_classes))
+    def _compute_metrics(
+        self,
+        y_true: np.ndarray,
+        y_pred: np.ndarray,
+        y_prob: np.ndarray,
+        classes: list[str],
+        is_binary: bool,
+    ) -> MetricsResponse:
+        """Compute accuracy, precision, recall, F1, balanced accuracy, AUC from y_true + y_pred."""
+        avg = "binary" if is_binary else "macro"
+        accuracy = float(accuracy_score(y_true, y_pred))
+        sensitivity = float(recall_score(y_true, y_pred, average=avg, zero_division=0))
+        precision = float(precision_score(y_true, y_pred, average=avg, zero_division=0))
+        f1 = float(f1_score(y_true, y_pred, average=avg, zero_division=0))
+        mcc = float(matthews_corrcoef(y_true, y_pred))
+        # Specificity (per-class, then macro)
+        cm = confusion_matrix(y_true, y_pred)
+        specificity = self._macro_specificity(cm)
+        # AUC-ROC
+        auc_roc = self._compute_auc(y_true, y_prob, classes, is_binary)
+        # Confusion matrix data
+        cm_data = self._build_confusion_matrix_data(cm, classes, is_binary)
+        # ROC curve
+        roc_points = self._build_roc_curve(y_true, y_prob, is_binary)
+        # PR curve
+        pr_points = self._build_pr_curve(y_true, y_prob, is_binary)
+        return MetricsResponse(
+            accuracy=round(accuracy, 4),
+            sensitivity=round(sensitivity, 4),
+            specificity=round(specificity, 4),
+            precision=round(precision, 4),
+            f1_score=round(f1, 4),
+            auc_roc=round(auc_roc, 4),
+            confusion_matrix=cm_data,
+            roc_curve=roc_points,
+            pr_curve=pr_points,
+            train_accuracy=0.0,  # filled by caller
+            cross_val_scores=[],
+            low_sensitivity_warning=sensitivity < _SENSITIVITY_WARNING_THRESHOLD,
+            mcc=round(mcc, 4),
+            overfitting_warning=False,  # filled by caller
+        )
+    def _macro_specificity(self, cm: np.ndarray) -> float:
+        """Macro-averaged specificity for multiclass evaluation."""
+        specs = []
+        for i in range(len(cm)):
+            tp = cm[i, i]
+            fn = cm[i, :].sum() - tp
+            fp = cm[:, i].sum() - tp
+            tn = cm.sum() - tp - fn - fp
+            denom = tn + fp
+            specs.append(tn / denom if denom > 0 else 0.0)
+        return float(np.mean(specs))
+    def _compute_auc(
+        self,
+        y_true: np.ndarray,
+        y_prob: np.ndarray,
+        classes: list[str],
+        is_binary: bool,
+    ) -> float:
+        """Compute ROC AUC robustly across binary and multiclass, skipping if undefined."""
+        try:
+            if is_binary:
+                return float(roc_auc_score(y_true, y_prob[:, 1]))
+            # --- Multiclass AUC-ROC (OVR macro) ---
+            # predict_proba columns correspond to model classes 0..N-1.
+            # Binarize y_true against the SAME full label set so columns align.
+            n_model_classes = y_prob.shape[1]
+            all_labels = list(range(n_model_classes))
+            y_bin = label_binarize(y_true, classes=all_labels)
+            # label_binarize returns 1-D when len(all_labels)==2; expand back
+            if y_bin.ndim == 1:
+                y_bin = np.column_stack([1 - y_bin, y_bin])
+            # Only evaluate classes that have at least one positive sample in
+            # y_true -- OVR needs >= 1 positive per class column.
+            present_mask = y_bin.sum(axis=0) > 0
+            if present_mask.sum() < 2:
+                logger.warning(
+                    "AUC: fewer than 2 classes in y_true (%d); returning 0.5",
+                    int(present_mask.sum()),
+                )
+                return 0.5
+            return float(
+                roc_auc_score(
+                    y_bin[:, present_mask],
+                    y_prob[:, present_mask],
+                    multi_class="ovr",
+                    average="macro",
+                )
+            )
+        except Exception as exc:
+            logger.error("AUC computation failed: %s", exc)
+        return 0.5
+    def _build_confusion_matrix_data(
+        self,
+        cm: np.ndarray,
+        classes: list[str],
+        is_binary: bool,
+    ) -> ConfusionMatrixData:
+        """Turn a sklearn confusion matrix into the DTO expected by the frontend."""
+        matrix = cm.tolist()
+        if is_binary and cm.shape == (2, 2):
+            return ConfusionMatrixData(
+                tn=int(cm[0, 0]), fp=int(cm[0, 1]),
+                fn=int(cm[1, 0]), tp=int(cm[1, 1]),
+                matrix=matrix, labels=classes,
+            )
+        return ConfusionMatrixData(matrix=matrix, labels=classes)
+    def _build_roc_curve(
+        self,
+        y_true: np.ndarray,
+        y_prob: np.ndarray,
+        is_binary: bool,
+    ) -> list[ROCPoint]:
+        """Build the list of ROC (FPR, TPR, threshold) points used by the Step-5 chart."""
+        try:
+            if is_binary:
+                fpr, tpr, thresholds = roc_curve(y_true, y_prob[:, 1])
+                idx = np.linspace(0, len(fpr) - 1, min(200, len(fpr)), dtype=int)
+                thresholds = np.where(np.isinf(thresholds), 1.0, thresholds)
+                return [
+                    ROCPoint(fpr=round(float(fpr[i]), 4), tpr=round(float(tpr[i]), 4),
+                             threshold=round(float(_sanitize_float(thresholds[min(i, len(thresholds)-1)])), 4))
+                    for i in idx
+                ]
+            else:
+                # Micro-average ROC for multi-class
+                classes = sorted(np.unique(y_true))
+                y_bin = label_binarize(y_true, classes=classes)
+                if y_prob.shape[1] >= len(classes):
+                    fpr_micro, tpr_micro, thresholds = roc_curve(
+                        y_bin.ravel(), y_prob[:, :len(classes)].ravel()
+                    )
+                    thresholds = np.where(np.isinf(thresholds), 1.0, thresholds)
+                    idx = np.linspace(0, len(fpr_micro) - 1, min(200, len(fpr_micro)), dtype=int)
+                    return [
+                        ROCPoint(fpr=round(float(fpr_micro[i]), 4), tpr=round(float(tpr_micro[i]), 4),
+                                 threshold=round(float(_sanitize_float(thresholds[min(i, len(thresholds)-1)])), 4))
+                        for i in idx
+                    ]
+        except Exception as exc:
+            logger.warning("ROC curve computation failed: %s", exc)
+        # Diagonal fallback
+        pts = np.linspace(0, 1, 20)
+        return [ROCPoint(fpr=float(p), tpr=float(p), threshold=float(1-p)) for p in pts]
+    def _build_pr_curve(
+        self,
+        y_true: np.ndarray,
+        y_prob: np.ndarray,
+        is_binary: bool,
+    ) -> list[dict[str, float]]:
+        """Build the list of Precision-Recall points used alongside the ROC curve."""
+        try:
+            if is_binary:
+                prec, rec, _ = precision_recall_curve(y_true, y_prob[:, 1])
+                idx = np.linspace(0, len(prec) - 1, min(200, len(prec)), dtype=int)
+                return [
+                    {"precision": round(float(prec[i]), 4), "recall": round(float(rec[i]), 4)}
+                    for i in idx
+                ]
+            else:
+                # Micro-average PR for multi-class
+                classes = sorted(np.unique(y_true))
+                y_bin = label_binarize(y_true, classes=classes)
+                if y_prob.shape[1] >= len(classes):
+                    prec, rec, _ = precision_recall_curve(
+                        y_bin.ravel(), y_prob[:, :len(classes)].ravel()
+                    )
+                    idx = np.linspace(0, len(prec) - 1, min(200, len(prec)), dtype=int)
+                    return [
+                        {"precision": round(float(prec[i]), 4), "recall": round(float(rec[i]), 4)}
+                        for i in idx
+                    ]
+        except Exception as exc:
+            logger.warning("PR curve computation failed: %s", exc)
+        return []
+    # ------------------------------------------------------------------
+    # Model comparison
+    # ------------------------------------------------------------------
+    def add_to_comparison(self, session_id: str, model_id: str) -> CompareResponse:
+        """Step-4 endpoint — adds the latest trained model to the cross-model comparison list."""
+        model_data = self._model_store.get(model_id)
+        if model_data is None:
+            raise KeyError(f"Model not found: {model_id}")
+        entry_data = model_data
+        metrics = model_data.get("metrics")
+        if metrics is None:
+            raise ValueError("Metrics not stored for this model")
+        entry = CompareEntry(
+            model_id=model_id,
+            model_type=entry_data["model_type"],
+            params=entry_data["params"],
+            metrics=metrics,
+            training_time_ms=entry_data.get("training_time_ms", 0.0),
+        )
+        with self._lock:
+            if session_id not in self._compare_store:
+                self._compare_store[session_id] = []
+            # Replace existing entry for same model_id
+            self._compare_store[session_id] = [
+                e for e in self._compare_store[session_id] if e.model_id != model_id
+            ]
+            self._compare_store[session_id].append(entry)
+            # Cap compare store at 50 sessions
+            if len(self._compare_store) > 50:
+                oldest_key = next(iter(self._compare_store))
+                del self._compare_store[oldest_key]
+            entries = sorted(
+                self._compare_store[session_id],
+                key=lambda e: e.metrics.auc_roc,
+                reverse=True,
+            )
+        best = entries[0].model_id if entries else model_id
+        return CompareResponse(entries=entries, best_model_id=best)
+    def get_comparison(self, session_id: str) -> CompareResponse:
+        """Step-4 endpoint — returns the current comparison list for the session."""
+        with self._lock:
+            entries = list(self._compare_store.get(session_id, []))
+        entries = sorted(entries, key=lambda e: e.metrics.auc_roc, reverse=True)
+        best = entries[0].model_id if entries else ""
+        return CompareResponse(entries=entries, best_model_id=best)
+    def clear_comparison(self, session_id: str) -> None:
+        """Step-4 endpoint — empties the comparison list for the session."""
+        with self._lock:
+            self._compare_store.pop(session_id, None)
+    def store_train_response_in_model(self, model_id: str, response: "TrainResponse") -> None:
+        """Cache metrics inside model store so comparison can retrieve them."""
+        with self._lock:
+            if model_id in self._model_store:
+                self._model_store[model_id]["metrics"] = response.metrics
+                self._model_store[model_id]["training_time_ms"] = response.training_time_ms

app/services/specialty_registry.py ADDED Viewed

	@@ -0,0 +1,559 @@

+"""Registry of all 20 medical specialties — aligned with Clinical Specialties Dataset Collection."""
+from __future__ import annotations
+from app.models.schemas import SpecialtyInfo
+SPECIALTIES: dict[str, SpecialtyInfo] = {
+    "cardiology_hf": SpecialtyInfo(
+        id="cardiology_hf",
+        name="Cardiology",
+        description="Predict 30-day mortality risk in heart failure patients using clinical biomarkers.",
+        target_variable="DEATH_EVENT",
+        target_type="binary",
+        data_source="Heart Failure Clinical Records — kaggle.com/datasets/andrewmvd/heart-failure-clinical-data",
+        what_ai_predicts="30-day mortality after heart failure discharge",
+        license_type="CC BY 4.0",
+        license_url="https://creativecommons.org/licenses/by/4.0/",
+        requires_attribution=True,
+        feature_names=[
+            "age", "anaemia", "creatinine_phosphokinase", "diabetes",
+            "ejection_fraction", "high_blood_pressure", "platelets",
+            "serum_creatinine", "serum_sodium", "sex", "smoking", "time",
+        ],
+        clinical_context=(
+            "Heart failure affects over 64 million people worldwide and carries a 30-day readmission "
+            "rate of approximately 20–25%. Early identification of high-risk patients at discharge "
+            "enables targeted interventions such as intensive follow-up and medication optimisation. "
+            "Key clinical predictors include left ventricular ejection fraction, serum creatinine, "
+            "and serum sodium levels. This model uses 12 clinical variables routinely collected "
+            "at discharge to predict which patients are at highest risk of 30-day mortality."
+        ),
+    ),
+    "radiology_pneumonia": SpecialtyInfo(
+        id="radiology_pneumonia",
+        name="Radiology",
+        description="Classify chest X-ray findings as normal or pneumonia using clinical and imaging metadata.",
+        target_variable="Finding_Label",
+        target_type="binary",
+        data_source="NIH Chest X-Ray Metadata — kaggle.com/datasets/nih-chest-xrays/data",
+        what_ai_predicts="Normal vs. Pneumonia from chest X-ray clinical metadata",
+        license_type="CC0 1.0",
+        license_url="https://creativecommons.org/publicdomain/zero/1.0/",
+        requires_attribution=False,
+        feature_names=[
+            "age", "sex", "view_position", "follow_up_number",
+        ],
+        clinical_context=(
+            "Community-acquired pneumonia is a leading cause of hospitalisation, particularly in "
+            "paediatric and elderly populations. Chest radiography is the standard diagnostic tool, "
+            "but interpretation requires specialist expertise not always available at point of care. "
+            "The NIH Chest X-Ray dataset contains over 100,000 frontal-view X-rays labelled across "
+            "14 pathology categories. This model uses extracted radiological metadata features "
+            "to distinguish normal findings from pneumonia, supporting rapid triage."
+        ),
+    ),
+    "nephrology_ckd": SpecialtyInfo(
+        id="nephrology_ckd",
+        name="Nephrology",
+        description="Classify patients as having chronic kidney disease or not from routine laboratory values.",
+        target_variable="classification",
+        target_type="binary",
+        data_source="UCI CKD Dataset — archive.ics.uci.edu/dataset/336/chronic+kidney+disease",
+        what_ai_predicts="Chronic kidney disease (ckd vs. notckd) from routine lab values",
+        license_type="CC BY 4.0",
+        license_url="https://creativecommons.org/licenses/by/4.0/",
+        requires_attribution=True,
+        feature_names=[
+            "age", "blood_pressure", "specific_gravity", "albumin", "sugar",
+            "red_blood_cells", "pus_cell", "blood_glucose_random", "blood_urea",
+            "serum_creatinine", "sodium", "haemoglobin",
+            "packed_cell_volume", "hypertension", "diabetes_mellitus",
+        ],
+        clinical_context=(
+            "Chronic kidney disease affects approximately 10% of the global population and is "
+            "a major risk factor for cardiovascular disease and end-stage renal failure. "
+            "Early detection through routine blood and urine tests enables timely intervention "
+            "to slow disease progression. Key biomarkers include serum creatinine, haemoglobin, "
+            "and specific gravity of urine. This model classifies patients into CKD or non-CKD "
+            "categories using 15 routine laboratory and clinical measurements."
+        ),
+    ),
+    "oncology_breast": SpecialtyInfo(
+        id="oncology_breast",
+        name="Oncology — Breast",
+        description="Classify breast biopsies as malignant or benign from cell nucleus measurements.",
+        target_variable="diagnosis",
+        target_type="binary",
+        data_source="Breast Cancer Wisconsin — archive.ics.uci.edu/dataset/17/breast+cancer+wisconsin+diagnostic",
+        what_ai_predicts="Malignancy of a breast biopsy from fine-needle aspirate cell measurements",
+        license_type="CC BY 4.0",
+        license_url="https://creativecommons.org/licenses/by/4.0/",
+        requires_attribution=True,
+        feature_names=[
+            "mean_radius", "mean_texture", "mean_perimeter", "mean_area",
+            "mean_smoothness", "mean_compactness", "mean_concavity",
+            "mean_concave_points", "mean_symmetry", "worst_radius",
+            "worst_texture", "worst_perimeter", "worst_area", "worst_smoothness",
+        ],
+        clinical_context=(
+            "Breast cancer is the most common cancer in women worldwide, with early detection "
+            "being critical for survival outcomes. Fine needle aspiration biopsies provide "
+            "cellular material that can be analysed to determine malignancy. "
+            "The Wisconsin dataset contains measurements of cell nuclei features extracted "
+            "from digitised images of fine needle aspirates. This model classifies tumours "
+            "as malignant (M) or benign (B) based on 14 geometric and textural features "
+            "of cell nuclei, achieving clinical-grade discrimination performance."
+        ),
+    ),
+    "neurology_parkinsons": SpecialtyInfo(
+        id="neurology_parkinsons",
+        name="Neurology — Parkinson's",
+        description="Detect Parkinson's disease from vocal biomarkers extracted via sustained phonation.",
+        target_variable="status",
+        target_type="binary",
+        data_source="UCI Parkinson's Dataset — archive.ics.uci.edu/dataset/174/parkinsons",
+        what_ai_predicts="Parkinson's disease presence from voice biomarkers",
+        license_type="CC BY 4.0",
+        license_url="https://creativecommons.org/licenses/by/4.0/",
+        requires_attribution=True,
+        feature_names=[
+            "MDVP_Fo_Hz", "MDVP_Fhi_Hz", "MDVP_Flo_Hz",
+            "MDVP_Jitter_pct", "MDVP_Jitter_Abs", "MDVP_RAP", "MDVP_PPQ", "Jitter_DDP",
+            "MDVP_Shimmer", "MDVP_Shimmer_dB", "Shimmer_APQ3", "Shimmer_APQ5",
+            "MDVP_APQ", "Shimmer_DDA",
+            "NHR", "HNR", "RPDE", "DFA", "spread1", "spread2", "D2", "PPE",
+        ],
+        clinical_context=(
+            "Parkinson's disease is a progressive neurodegenerative disorder affecting "
+            "approximately 10 million people globally. Vocal tremor and dysphonia are "
+            "among the earliest and most consistent symptoms, often preceding motor symptoms. "
+            "Voice recordings can be analysed non-invasively to extract biomarkers of vocal "
+            "instability including jitter, shimmer, and harmonics-to-noise ratio. "
+            "This model uses 17 voice measurement features to classify patients as "
+            "having Parkinson's disease (status=1) or healthy controls (status=0)."
+        ),
+    ),
+    "endocrinology_diabetes": SpecialtyInfo(
+        id="endocrinology_diabetes",
+        name="Endocrinology — Diabetes",
+        description="Predict diabetes onset within 5 years from metabolic and demographic markers.",
+        target_variable="Outcome",
+        target_type="binary",
+        data_source="Pima Indians Diabetes — kaggle.com/datasets/uciml/pima-indians-diabetes-database",
+        what_ai_predicts="Diabetes onset within 5 years from metabolic markers",
+        license_type="CC0 1.0 / CC BY 4.0",
+        license_url="https://creativecommons.org/publicdomain/zero/1.0/",
+        requires_attribution=True,
+        feature_names=[
+            "pregnancies", "glucose", "blood_pressure", "skin_thickness",
+            "insulin", "bmi", "diabetes_pedigree_function", "age",
+        ],
+        clinical_context=(
+            "Type 2 diabetes affects over 400 million people globally, with millions more "
+            "at risk due to metabolic syndrome and lifestyle factors. Early identification "
+            "of high-risk individuals enables preventive interventions including dietary "
+            "changes, exercise, and pharmacological treatment. "
+            "The Pima Indians dataset contains metabolic measurements from a population "
+            "with high diabetes prevalence. This model predicts diabetes onset within "
+            "5 years using 8 clinical and laboratory features including fasting glucose, "
+            "BMI, and diabetes pedigree function."
+        ),
+    ),
+    "hepatology_liver": SpecialtyInfo(
+        id="hepatology_liver",
+        name="Hepatology — Liver",
+        description="Identify liver disease from routine blood test results.",
+        target_variable="Dataset",
+        target_type="binary",
+        data_source="Indian Liver Patient Dataset — archive.ics.uci.edu/dataset/225/ilpd+indian+liver+patient+dataset",
+        what_ai_predicts="Liver disease vs. healthy from blood test results",
+        license_type="CC BY 4.0",
+        license_url="https://creativecommons.org/licenses/by/4.0/",
+        requires_attribution=True,
+        feature_names=[
+            "age", "gender", "total_bilirubin", "direct_bilirubin",
+            "alkaline_phosphotase", "alamine_aminotransferase",
+            "aspartate_aminotransferase", "total_proteins",
+            "albumin", "albumin_globulin_ratio",
+        ],
+        clinical_context=(
+            "Liver disease encompasses a spectrum of conditions from fatty liver to cirrhosis "
+            "and hepatocellular carcinoma, representing a major global health burden. "
+            "Biochemical liver function tests provide quantitative markers of hepatic injury "
+            "and synthetic function. Early detection through blood test abnormalities "
+            "allows timely referral and treatment. "
+            "This model uses 10 routine liver function test parameters to classify "
+            "patients as having liver disease or not, supporting clinical triage decisions."
+        ),
+    ),
+    "cardiology_stroke": SpecialtyInfo(
+        id="cardiology_stroke",
+        name="Cardiology — Stroke",
+        description="Predict stroke risk from demographics, comorbidities, and lifestyle factors.",
+        target_variable="stroke",
+        target_type="binary",
+        data_source="Stroke Prediction Dataset — kaggle.com/datasets/fedesoriano/stroke-prediction-dataset",
+        what_ai_predicts="Stroke occurrence from demographics and comorbidities",
+        license_type="No formal license",
+        license_url="",
+        requires_attribution=False,
+        feature_names=[
+            "gender", "age", "hypertension", "heart_disease", "ever_married",
+            "work_type", "residence_type", "avg_glucose_level", "bmi", "smoking_status",
+        ],
+        clinical_context=(
+            "Stroke is the second leading cause of death globally and the leading cause "
+            "of long-term disability. Identifying high-risk individuals enables preventive "
+            "interventions such as anticoagulation, blood pressure control, and lifestyle "
+            "modification. Key risk factors include hypertension, atrial fibrillation, "
+            "diabetes, and smoking. "
+            "This model uses 10 demographic, clinical, and lifestyle variables to predict "
+            "stroke occurrence, supporting population-level screening and risk stratification."
+        ),
+    ),
+    "mental_health": SpecialtyInfo(
+        id="mental_health",
+        name="Mental Health",
+        description="Predict history of mental illness from lifestyle, demographic, and behavioural factors.",
+        target_variable="severity_class",
+        target_type="binary",
+        data_source="Depression Dataset — kaggle.com/datasets/anthonytherrien/depression-dataset",
+        what_ai_predicts="History of mental illness (has_condition / no_condition) from lifestyle and demographic data",
+        license_type="CC BY-SA 4.0",
+        license_url="https://creativecommons.org/licenses/by-sa/4.0/",
+        requires_attribution=True,
+        feature_names=[
+            "age", "number_of_children", "income", "dietary_habits", "sleep_patterns",
+            "alcohol_consumption", "physical_activity_level", "smoking_status",
+            "employment_status", "history_substance_abuse",
+            "family_history_depression", "chronic_medical_conditions",
+            "marital_status", "education_level",
+        ],
+        clinical_context=(
+            "Depression is the leading cause of disability worldwide, affecting over 280 million "
+            "people. The PHQ-9 questionnaire is a validated screening tool used in primary care "
+            "to assess depression severity across four categories: minimal, mild, moderate, "
+            "and severe. Accurate severity classification guides treatment decisions from "
+            "watchful waiting to pharmacotherapy and referral to specialist mental health services. "
+            "This model classifies depression severity using lifestyle, occupational, "
+            "and demographic factors alongside validated symptom responses."
+        ),
+    ),
+    "pulmonology_copd": SpecialtyInfo(
+        id="pulmonology_copd",
+        name="Pulmonology — COPD",
+        description="Predict COPD exacerbation risk from spirometry and clinical EHR data.",
+        target_variable="exacerbation",
+        target_type="binary",
+        data_source="COPD Dataset — kaggle.com/datasets/prakharrathi25/copd-student-dataset",
+        what_ai_predicts="COPD acute exacerbation risk from spirometry and EHR data",
+        license_type="CC0 1.0",
+        license_url="https://creativecommons.org/publicdomain/zero/1.0/",
+        requires_attribution=False,
+        feature_names=[
+            "age", "sex", "smoking_pack_years", "fev1_litres", "fvc_litres",
+            "fev1_fvc_ratio", "prior_exacerbations_year", "bmi",
+            "mrc_dyspnea_scale", "sgrq_score", "copd_gold_stage",
+        ],
+        clinical_context=(
+            "Chronic obstructive pulmonary disease (COPD) affects approximately 300 million "
+            "people and is a leading cause of morbidity and mortality. Acute exacerbations "
+            "are episodes of worsening symptoms requiring increased treatment and are a major "
+            "driver of hospitalisation and disease progression. "
+            "Spirometry measurements, particularly FEV1 and the FEV1/FVC ratio, are "
+            "the gold standard for COPD diagnosis and staging. "
+            "This model predicts the risk of acute exacerbation using clinical, "
+            "spirometric, and patient-reported outcome measures from the Kaggle COPD patient dataset."
+        ),
+    ),
+    "haematology_anaemia": SpecialtyInfo(
+        id="haematology_anaemia",
+        name="Haematology — Anaemia",
+        description="Detect anaemia from full blood count indices including haemoglobin, MCV, MCH, and MCHC.",
+        target_variable="anemia_type",
+        target_type="multiclass",
+        data_source="Anaemia Classification Dataset — kaggle.com/datasets/biswaranjanrao/anemia-dataset",
+        what_ai_predicts="Type of anaemia from full blood count (iron deficiency / megaloblastic / normocytic / normal)",
+        license_type="Unknown",
+        license_url="",
+        requires_attribution=False,
+        feature_names=[
+            "gender", "haemoglobin", "mch", "mchc", "mcv",
+        ],
+        clinical_context=(
+            "Anaemia affects approximately 1.62 billion people globally and is defined by "
+            "haemoglobin below 12 g/dL in women and 13 g/dL in men. Full blood count indices "
+            "including mean corpuscular volume (MCV), mean corpuscular haemoglobin (MCH), "
+            "and mean corpuscular haemoglobin concentration (MCHC) are routinely used to "
+            "screen for and characterise anaemia in primary care. Low MCV indicates "
+            "microcytic anaemia (typically iron deficiency), while elevated MCV suggests "
+            "macrocytic anaemia (B12 or folate deficiency). "
+            "This model classifies patients as anaemic or non-anaemic using five standard "
+            "full blood count parameters, supporting automated screening in high-volume settings."
+        ),
+    ),
+    "dermatology": SpecialtyInfo(
+        id="dermatology",
+        name="Dermatology",
+        description="Classify skin lesions as benign or malignant from HAM10000 dermoscopy metadata.",
+        target_variable="dx_type",
+        target_type="binary",
+        data_source="HAM10000 Metadata — Harvard Dataverse doi:10.7910/DVN/DBW86T",
+        what_ai_predicts="Benign vs. malignant skin lesion from dermoscopy metadata",
+        license_type="CC BY-NC 4.0",
+        license_url="https://creativecommons.org/licenses/by-nc/4.0/",
+        requires_attribution=True,
+        feature_names=[
+            "age", "sex", "localization",
+        ],
+        clinical_context=(
+            "Melanoma and other skin cancers are among the most rapidly increasing malignancies "
+            "globally, with early detection being the primary determinant of survival. "
+            "Dermoscopy improves diagnostic accuracy compared to naked-eye examination, "
+            "but requires specialist training. The HAM10000 dataset contains over 10,000 "
+            "dermoscopic images with clinical metadata from seven diagnostic categories. "
+            "This model uses morphological and demographic features to distinguish benign "
+            "from malignant skin lesions, supporting earlier referral for biopsy."
+        ),
+    ),
+    "ophthalmology": SpecialtyInfo(
+        id="ophthalmology",
+        name="Ophthalmology",
+        description="Detect diabetic retinopathy from retinal image analysis features.",
+        target_variable="severity_grade",
+        target_type="binary",
+        data_source="Diabetic Retinopathy Debrecen Dataset — archive.ics.uci.edu/dataset/329/diabetic+retinopathy+debrecen+data+set",
+        what_ai_predicts="Presence of diabetic retinopathy signs from retinal analysis (0=No DR, 1=DR present)",
+        license_type="CC BY 4.0",
+        license_url="https://creativecommons.org/licenses/by/4.0/",
+        requires_attribution=True,
+        feature_names=[
+            "quality_assessment", "pre_screening", "ma_detection_0.5",
+            "ma_detection_0.6", "ma_detection_0.7", "ma_detection_0.8",
+            "ma_detection_0.9", "ma_detection_1.0",
+            "exudate_1", "exudate_2", "exudate_3", "exudate_4",
+            "exudate_5", "exudate_6", "exudate_7", "exudate_8",
+            "macula_od_distance", "optic_disc_diameter", "am_fm_classification",
+        ],
+        clinical_context=(
+            "Diabetic retinopathy is the leading cause of blindness in working-age adults globally, "
+            "affecting approximately one third of people with diabetes. Regular ophthalmological "
+            "screening is recommended but limited by specialist availability. "
+            "Grading retinopathy severity from mild non-proliferative to proliferative disease "
+            "determines urgency of laser treatment or anti-VEGF therapy. "
+            "This model classifies retinopathy severity grade using 10 clinical and "
+            "retinal examination features, prioritising high-risk patients for urgent review."
+        ),
+    ),
+    "orthopaedics": SpecialtyInfo(
+        id="orthopaedics",
+        name="Orthopaedics — Spine",
+        description="Classify spinal status as normal or abnormal from biomechanical measurements.",
+        target_variable="class",
+        target_type="binary",
+        data_source="Vertebral Column Dataset — archive.ics.uci.edu/dataset/212/vertebral+column",
+        what_ai_predicts="Normal vs. abnormal spinal status from pelvic biomechanical measurements",
+        license_type="CC BY 4.0",
+        license_url="https://creativecommons.org/licenses/by/4.0/",
+        requires_attribution=True,
+        feature_names=[
+            "pelvic_incidence", "pelvic_tilt", "lumbar_lordosis_angle",
+            "sacral_slope", "pelvic_radius", "degree_spondylolisthesis",
+        ],
+        clinical_context=(
+            "Spinal disorders including disc herniation and spondylolisthesis are among the "
+            "most common causes of chronic pain and disability worldwide. Biomechanical "
+            "measurements of the pelvis and lumbar spine provide objective indicators "
+            "of structural abnormality that complement clinical examination. "
+            "The UCI Vertebral Column dataset contains six orthopaedic measurements "
+            "extracted from lateral X-rays. This model classifies patients as having "
+            "normal spinal anatomy or an abnormal condition (disc herniation / spondylolisthesis)."
+        ),
+    ),
+    "icu_sepsis": SpecialtyInfo(
+        id="icu_sepsis",
+        name="ICU / Sepsis",
+        description="Predict sepsis onset from vital signs and laboratory results in ICU patients.",
+        target_variable="SepsisLabel",
+        target_type="binary",
+        data_source="PhysioNet Sepsis Dataset — physionet.org/content/challenge-2019/1.0.0/",
+        what_ai_predicts="Sepsis onset (SepsisLabel=1) from ICU vital signs and lab results",
+        license_type="CC BY 4.0",
+        license_url="https://creativecommons.org/licenses/by/4.0/",
+        requires_attribution=True,
+        feature_names=[
+            "HR", "O2Sat", "Temp", "SBP", "MAP", "Resp",
+            "BaseExcess", "pH", "PaCO2", "Lactate", "Creatinine",
+            "Bilirubin_total", "WBC", "Platelets", "Age", "Gender",
+        ],
+        clinical_context=(
+            "Sepsis is a life-threatening organ dysfunction caused by a dysregulated host "
+            "response to infection, with a mortality rate of 20–30% that rises to over 40% "
+            "for septic shock. Early identification and treatment within the first hour "
+            "significantly improves survival outcomes. "
+            "Vital signs and laboratory biomarkers such as lactate, procalcitonin, and "
+            "white blood cell count reflect the physiological derangement of sepsis. "
+            "This model uses routinely collected ICU monitoring data to predict sepsis "
+            "onset up to 6 hours before clinical diagnosis, enabling proactive management."
+        ),
+    ),
+    "obstetrics_fetal": SpecialtyInfo(
+        id="obstetrics_fetal",
+        name="Obstetrics — Fetal Health",
+        description="Classify fetal cardiotocography as normal, suspect, or pathological.",
+        target_variable="fetal_health",
+        target_type="multiclass",
+        data_source="Cardiotocography Dataset — archive.ics.uci.edu/dataset/193/cardiotocography",
+        what_ai_predicts="Fetal CTG classification: 1=Normal, 2=Suspect, 3=Pathological",
+        license_type="CC BY 4.0",
+        license_url="https://creativecommons.org/licenses/by/4.0/",
+        requires_attribution=True,
+        feature_names=[
+            "baseline_value", "accelerations", "fetal_movement",
+            "uterine_contractions", "light_decelerations", "severe_decelerations",
+            "prolongued_decelerations", "abnormal_short_term_variability",
+            "mean_value_short_term_variability", "pct_time_abnormal_long_term_variability",
+            "mean_value_long_term_variability", "histogram_mode",
+        ],
+        clinical_context=(
+            "Cardiotocography (CTG) is the standard method for monitoring fetal wellbeing "
+            "during pregnancy and labour, recording fetal heart rate and uterine contractions. "
+            "Abnormal CTG patterns may indicate fetal hypoxia requiring urgent intervention "
+            "such as emergency caesarean section. CTG interpretation is subjective and "
+            "varies between clinicians. "
+            "This model classifies CTG recordings into three categories — Normal (class 1), "
+            "Suspect (class 2), and Pathological (class 3) — using 12 quantitative "
+            "cardiotocography features to support consistent clinical decision-making."
+        ),
+    ),
+    "cardiology_arrhythmia": SpecialtyInfo(
+        id="cardiology_arrhythmia",
+        name="Cardiology — Arrhythmia",
+        description="Detect cardiac arrhythmia from ECG interval and waveform features.",
+        target_variable="arrhythmia",
+        target_type="binary",
+        data_source="UCI Arrhythmia Dataset — archive.ics.uci.edu/dataset/5/arrhythmia",
+        what_ai_predicts="Cardiac arrhythmia presence vs. normal sinus rhythm from ECG features",
+        license_type="CC BY 4.0",
+        license_url="https://creativecommons.org/licenses/by/4.0/",
+        requires_attribution=True,
+        feature_names=[
+            "age", "sex", "height", "weight", "QRS_duration",
+            "PR_interval", "QT_interval", "T_interval", "P_interval",
+            "QRS_axis", "T_axis", "P_axis", "heart_rate", "J_point",
+            "heart_rate_2",
+            "DI_R", "DI_S", "DI_T", "DI_P", "DI_QRSA", "DI_QRSTA",
+            "DII_R", "DII_S", "DII_T", "DII_P", "DII_QRSA", "DII_QRSTA",
+            "V1_R", "V1_S", "V1_T", "V1_P", "V5_R", "V5_S",
+        ],
+        clinical_context=(
+            "Cardiac arrhythmias encompass a diverse group of rhythm disorders ranging from "
+            "benign atrial ectopics to life-threatening ventricular fibrillation. "
+            "The 12-lead ECG is the primary diagnostic tool, providing measurements of "
+            "conduction intervals and waveform morphology. Automated arrhythmia detection "
+            "supports cardiac monitoring programs and remote cardiology services. "
+            "This model uses 13 ECG-derived parameters to classify patients as having "
+            "arrhythmia or normal cardiac rhythm, supporting cardiac screening programs."
+        ),
+    ),
+    "oncology_cervical": SpecialtyInfo(
+        id="oncology_cervical",
+        name="Oncology — Cervical",
+        description="Assess cervical cancer biopsy risk from demographic and behavioural risk factors.",
+        target_variable="Biopsy",
+        target_type="binary",
+        data_source="Cervical Cancer Dataset — archive.ics.uci.edu/dataset/383/cervical+cancer+risk+factors",
+        what_ai_predicts="Biopsy-confirmed cervical cancer from demographic and behavioural data",
+        license_type="CC BY 4.0",
+        license_url="https://creativecommons.org/licenses/by/4.0/",
+        requires_attribution=True,
+        feature_names=[
+            "age", "number_of_sexual_partners", "first_sexual_intercourse_age",
+            "num_of_pregnancies",
+            "smokes", "smokes_years",
+            "hormonal_contraceptives", "hormonal_contraceptives_years",
+            "iud", "iud_years",
+            "stds", "stds_number", "stds_condylomatosis",
+            "stds_cervical_condylomatosis", "stds_hpv",
+            "dx_cancer", "dx_cin", "dx_hpv", "dx",
+            "hinselmann", "schiller", "citology",
+        ],
+        clinical_context=(
+            "Cervical cancer is the fourth most common cancer in women globally, with "
+            "persistent HPV infection being the primary causative factor. Risk stratification "
+            "using demographic and behavioural data can identify women who require "
+            "expedited colposcopy or biopsy. Early detection through cytological and "
+            "histological examination enables curative treatment. "
+            "This model uses 11 demographic, sexual health, and medical history variables "
+            "to predict biopsy-confirmed cervical cancer, supporting targeted screening "
+            "in resource-limited settings."
+        ),
+    ),
+    "thyroid": SpecialtyInfo(
+        id="thyroid",
+        name="Thyroid / Endocrinology",
+        description="Classify thyroid function as hypothyroid, hyperthyroid, or normal from biochemical assay results.",
+        target_variable="class",
+        target_type="multiclass",
+        data_source="UCI New Thyroid Dataset — archive.ics.uci.edu/dataset/102/thyroid+disease",
+        what_ai_predicts="Thyroid function classification (hyperthyroid / normal / hypothyroid)",
+        license_type="CC BY 4.0",
+        license_url="https://creativecommons.org/licenses/by/4.0/",
+        requires_attribution=True,
+        feature_names=[
+            "T3_resin_uptake", "total_serum_thyroxine", "T3", "TSH", "max_abs_diff_TSH",
+        ],
+        clinical_context=(
+            "Thyroid dysfunction affects approximately 5% of the global population. "
+            "Hyperthyroidism (excess hormone) and hypothyroidism (deficiency) are diagnosed "
+            "primarily through laboratory thyroid function tests. "
+            "The T3 resin uptake reflects thyroid hormone binding capacity, "
+            "total serum thyroxine (T4) measures overall hormone production, "
+            "and TSH (thyroid-stimulating hormone) is the most sensitive marker of thyroid status. "
+            "This model uses 5 biochemical assay values from the UCI New Thyroid dataset "
+            "to classify patients into three categories — hyperthyroid, normal, or hypothyroid — "
+            "supporting primary care screening and endocrinology referral decisions."
+        ),
+    ),
+    "pharmacy_readmission": SpecialtyInfo(
+        id="pharmacy_readmission",
+        name="Pharmacy — Readmission",
+        description="Predict hospital readmission risk for diabetic inpatients using medication and clinical data.",
+        target_variable="readmitted",
+        target_type="multiclass",
+        data_source="Diabetes 130-US Hospitals Dataset — archive.ics.uci.edu/dataset/296/diabetes+130-us+hospitals+for+years+1999-2008",
+        what_ai_predicts="Readmission risk: <30 days / >30 days / NO from medication and utilisation data",
+        license_type="CC BY 4.0",
+        license_url="https://creativecommons.org/licenses/by/4.0/",
+        requires_attribution=True,
+        feature_names=[
+            "age", "gender", "time_in_hospital", "num_lab_procedures",
+            "num_procedures", "num_medications", "number_outpatient",
+            "number_emergency", "number_inpatient", "number_diagnoses",
+            "max_glu_serum", "A1Cresult", "metformin", "insulin", "change",
+            "discharge_disposition_id", "admission_type_id",
+            "admission_source_id", "diag_1",
+        ],
+        clinical_context=(
+            "Hospital readmission within 30 days is a key quality indicator and financial "
+            "penalty trigger under value-based care programmes. Diabetic patients have "
+            "disproportionately high readmission rates due to complex medication regimens, "
+            "comorbidities, and glycaemic instability. "
+            "The UCI 130-US Hospitals dataset contains over 100,000 diabetic patient "
+            "encounters from 130 US hospitals over 10 years. "
+            "This model classifies patients into three readmission risk groups — "
+            "within 30 days, after 30 days, or no readmission — using 15 clinical, "
+            "medication, and utilisation variables to guide discharge planning."
+        ),
+    ),
+}
+def get_specialty(specialty_id: str) -> SpecialtyInfo | None:
+    """Look up one specialty by id, return `None` if unknown."""
+    return SPECIALTIES.get(specialty_id)
+def list_specialties() -> list[SpecialtyInfo]:
+    """Return the full registry as a list, in the order the Step-1 picker expects."""
+    return list(SPECIALTIES.values())

app/utils/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Shared utility helpers used across services."""

arena/__init__.py ADDED Viewed

File without changes

arena/router.py ADDED Viewed

	@@ -0,0 +1,72 @@

+"""Model Arena REST endpoints."""
+from __future__ import annotations
+import logging
+from fastapi import APIRouter, HTTPException, Request, status
+from fastapi.responses import Response
+from .schemas import (
+    ArenaCompareRequest,
+    ArenaCompareResponse,
+    ArenaRun,
+    BatchTrainRequest,
+    BatchTrainResponse,
+)
+logger = logging.getLogger(__name__)
+router = APIRouter(prefix="/api/arena", tags=["arena"])
+def _get_arena_service(request: Request):
+    return request.app.state.arena_service
+@router.post("/batch-train", response_model=BatchTrainResponse)
+def batch_train(request: Request, body: BatchTrainRequest) -> BatchTrainResponse:
+    """Train multiple models in one request."""
+    arena = _get_arena_service(request)
+    logger.info("Arena batch_train: session=%s models=%d", body.session_id, len(body.models))
+    try:
+        result = arena.batch_train(body)
+        completed = sum(1 for r in result.runs if r.status == "completed")
+        logger.info("Arena batch_train done: %d/%d completed", completed, len(result.runs))
+        return result
+    except (ValueError, KeyError) as exc:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(exc))
+    except Exception as exc:
+        logger.exception("Batch training failed")
+        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(exc))
+@router.get("/runs/{session_id}", response_model=list[ArenaRun])
+def get_runs(request: Request, session_id: str) -> list[ArenaRun]:
+    """Get all arena runs for a session."""
+    arena = _get_arena_service(request)
+    # Return empty list if session has no arena runs yet but ML session exists
+    ml_service = request.app.state.ml_service
+    if not arena.has_session(session_id) and ml_service.get_session(session_id) is None:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"Session '{session_id}' not found",
+        )
+    return arena.get_runs(session_id)
+@router.post("/compare/{session_id}", response_model=ArenaCompareResponse)
+def compare_runs(
+    request: Request, session_id: str, body: ArenaCompareRequest
+) -> ArenaCompareResponse:
+    """Compare selected runs."""
+    arena = _get_arena_service(request)
+    try:
+        return arena.compare_runs(session_id, body.run_ids)
+    except ValueError as exc:
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(exc))
+@router.delete("/runs/{session_id}", status_code=204)
+def clear_runs(request: Request, session_id: str):
+    """Clear all arena runs for a session."""
+    _get_arena_service(request).clear_runs(session_id)
+    return Response(status_code=204)

arena/schemas.py ADDED Viewed

	@@ -0,0 +1,64 @@

+"""Pydantic schemas for Model Arena."""
+from __future__ import annotations
+from typing import Any, Literal
+from pydantic import BaseModel, Field, field_validator
+from app.models.ml_schemas import KNNScatterData, MetricsResponse, ModelType
+class ArenaModelConfig(BaseModel):
+    """One model to train in a batch."""
+    model_type: ModelType
+    params: dict[str, Any] = Field(default_factory=dict)
+    tune: bool = False
+    use_feature_selection: bool = False
+class BatchTrainRequest(BaseModel):
+    """Request to train multiple models on the same session."""
+    session_id: str
+    models: list[ArenaModelConfig] = Field(..., min_length=1, max_length=8)
+class ArenaRun(BaseModel):
+    """A single trained model run in the arena."""
+    run_id: str
+    model_id: str
+    model_type: ModelType
+    params: dict[str, Any]
+    metrics: MetricsResponse | None = None  # None for failed runs
+    training_time_ms: float
+    feature_names: list[str]
+    knn_scatter: KNNScatterData | None = None
+    status: Literal["completed", "failed"] = "completed"
+    error: str | None = None
+class BatchTrainResponse(BaseModel):
+    """Response from batch training."""
+    session_id: str
+    runs: list[ArenaRun]
+    total_training_time_ms: float
+    best_run_id: str | None = None
+class ArenaCompareRequest(BaseModel):
+    """Request to compare specific runs."""
+    run_ids: list[str] = Field(..., min_length=2, max_length=8)
+    @field_validator("run_ids")
+    @classmethod
+    def no_duplicates(cls, v: list[str]) -> list[str]:
+        if len(v) != len(set(v)):
+            raise ValueError("run_ids must be unique")
+        return v
+class ArenaCompareResponse(BaseModel):
+    """Comparison data for selected runs."""
+    runs: list[ArenaRun]
+    best_run_id: str
+    metric_summary: dict[str, dict[str, float]]  # metric_name -> {run_id: value}
+    param_diff: dict[str, dict[str, Any]]  # param_name -> {run_id: value} (only differing params)

arena/service.py ADDED Viewed

	@@ -0,0 +1,199 @@

+"""Arena service -- batch training and run management."""
+from __future__ import annotations
+import logging
+import threading
+import uuid
+from collections import OrderedDict
+from typing import Any
+from app.services.ml_service import MLService
+from .schemas import (
+    ArenaCompareResponse,
+    ArenaModelConfig,
+    ArenaRun,
+    BatchTrainRequest,
+    BatchTrainResponse,
+)
+logger = logging.getLogger(__name__)
+_MAX_SESSIONS = 50
+class ArenaService:
+    def __init__(self, ml_service: MLService) -> None:
+        self._ml = ml_service
+        self._lock = threading.Lock()
+        # session_id -> list of ArenaRun (LRU-evicted at _MAX_SESSIONS)
+        self._runs: OrderedDict[str, list[ArenaRun]] = OrderedDict()
+        # Track sessions currently being batch-trained to prevent duplicates
+        self._in_flight: set[str] = set()
+    def batch_train(self, request: BatchTrainRequest) -> BatchTrainResponse:
+        """Train multiple models sequentially on the same session."""
+        # Pre-flight: verify session exists (raises KeyError → router returns 404)
+        if self._ml.get_session(request.session_id) is None:
+            raise KeyError(f"Session '{request.session_id}' not found. Run /api/prepare first.")
+        # Guard against concurrent batch_train for same session
+        with self._lock:
+            if request.session_id in self._in_flight:
+                raise ValueError(
+                    f"Batch training already in progress for session '{request.session_id}'"
+                )
+            self._in_flight.add(request.session_id)
+        try:
+            runs: list[ArenaRun] = []
+            total_time = 0.0
+            for model_cfg in request.models:
+                run_id = str(uuid.uuid4())
+                try:
+                    response = self._ml.train_and_evaluate(
+                        request.session_id,
+                        model_cfg.model_type,
+                        model_cfg.params,
+                        tune=model_cfg.tune,
+                        use_feature_selection=model_cfg.use_feature_selection,
+                    )
+                    self._ml.store_train_response_in_model(response.model_id, response)
+                    run = ArenaRun(
+                        run_id=run_id,
+                        model_id=response.model_id,
+                        model_type=model_cfg.model_type,
+                        params=response.params,
+                        metrics=response.metrics,
+                        training_time_ms=response.training_time_ms,
+                        feature_names=response.feature_names,
+                        knn_scatter=response.knn_scatter,
+                    )
+                    total_time += response.training_time_ms
+                except (ImportError, MemoryError):
+                    raise  # Non-recoverable — propagate to router as 500
+                except Exception as exc:
+                    logger.warning("Arena: model %s failed: %s", model_cfg.model_type.value, exc)
+                    run = ArenaRun(
+                        run_id=run_id,
+                        model_id="",
+                        model_type=model_cfg.model_type,
+                        params=model_cfg.params,
+                        metrics=None,
+                        training_time_ms=0.0,
+                        feature_names=[],
+                        status="failed",
+                        error=str(exc),
+                    )
+                runs.append(run)
+            # Store runs with LRU eviction
+            with self._lock:
+                if request.session_id not in self._runs:
+                    self._runs[request.session_id] = []
+                self._runs[request.session_id].extend(runs)
+                self._runs.move_to_end(request.session_id)
+                while len(self._runs) > _MAX_SESSIONS:
+                    self._runs.popitem(last=False)
+                # Compute best across ALL session runs (not just this batch)
+                all_completed = [
+                    r for r in self._runs.get(request.session_id, [])
+                    if r.status == "completed" and r.metrics is not None
+                ]
+            best_id = None
+            if all_completed:
+                best = max(all_completed, key=lambda r: r.metrics.auc_roc)  # type: ignore[union-attr]
+                best_id = best.run_id
+            return BatchTrainResponse(
+                session_id=request.session_id,
+                runs=runs,
+                total_training_time_ms=total_time,
+                best_run_id=best_id,
+            )
+        finally:
+            with self._lock:
+                self._in_flight.discard(request.session_id)
+    def get_runs(self, session_id: str) -> list[ArenaRun]:
+        """Get all arena runs for a session."""
+        with self._lock:
+            return list(self._runs.get(session_id, []))
+    def has_session(self, session_id: str) -> bool:
+        """Check if a session has any arena runs."""
+        with self._lock:
+            return session_id in self._runs
+    def get_run(self, session_id: str, run_id: str) -> ArenaRun | None:
+        """Get a specific run."""
+        with self._lock:
+            for run in self._runs.get(session_id, []):
+                if run.run_id == run_id:
+                    return run
+        return None
+    def compare_runs(self, session_id: str, run_ids: list[str]) -> ArenaCompareResponse:
+        """Build comparison data for selected runs."""
+        with self._lock:
+            all_runs = self._runs.get(session_id, [])
+            all_run_ids = {r.run_id for r in all_runs}
+            selected = [
+                r for r in all_runs
+                if r.run_id in run_ids and r.status == "completed" and r.metrics is not None
+            ]
+        # Check for missing run IDs
+        missing = [rid for rid in run_ids if rid not in all_run_ids]
+        if missing:
+            raise ValueError(f"Run IDs not found in session '{session_id}': {missing}")
+        # Check for runs that exist but are failed
+        selected_ids = {r.run_id for r in selected}
+        failed = [rid for rid in run_ids if rid in all_run_ids and rid not in selected_ids]
+        if failed:
+            raise ValueError(f"Run IDs exist but are in failed state: {failed}")
+        if len(selected) < 2:
+            raise ValueError("Need at least 2 completed runs to compare")
+        # Build metric summary: metric_name -> {run_id: value}
+        metric_names = [
+            "accuracy", "sensitivity", "specificity", "precision",
+            "f1_score", "auc_roc", "mcc", "train_accuracy",
+        ]
+        metric_summary: dict[str, dict[str, float]] = {}
+        for name in metric_names:
+            metric_summary[name] = {
+                r.run_id: getattr(r.metrics, name) for r in selected
+            }
+        # Build param diff: only params that differ across runs
+        all_params: dict[str, dict[str, Any]] = {}
+        for r in selected:
+            for k, v in r.params.items():
+                if k not in all_params:
+                    all_params[k] = {}
+                all_params[k][r.run_id] = v
+        param_diff = {
+            k: vals for k, vals in all_params.items()
+            if len(set(str(v) for v in vals.values())) > 1
+        }
+        best = max(selected, key=lambda r: r.metrics.auc_roc)  # type: ignore[union-attr]
+        return ArenaCompareResponse(
+            runs=selected,
+            best_run_id=best.run_id,
+            metric_summary=metric_summary,
+            param_diff=param_diff,
+        )
+    def clear_runs(self, session_id: str) -> None:
+        """Clear all runs for a session."""
+        with self._lock:
+            self._runs.pop(session_id, None)

data_cache/cardiology_arrhythmia.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

data_cache/cardiology_hf.csv ADDED Viewed

	@@ -0,0 +1,300 @@

+age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
+75,0,582,0,20,1,265000,1.9,130,1,0,4,1
+55,0,7861,0,38,0,263358.03,1.1,136,1,0,6,1
+65,0,146,0,20,0,162000,1.3,129,1,1,7,1
+50,1,111,0,20,0,210000,1.9,137,1,0,7,1
+65,1,160,1,20,0,327000,2.7,116,0,0,8,1
+90,1,47,0,40,1,204000,2.1,132,1,1,8,1
+75,1,246,0,15,0,127000,1.2,137,1,0,10,1
+60,1,315,1,60,0,454000,1.1,131,1,1,10,1
+65,0,157,0,65,0,263358.03,1.5,138,0,0,10,1
+80,1,123,0,35,1,388000,9.4,133,1,1,10,1
+75,1,81,0,38,1,368000,4,131,1,1,10,1
+62,0,231,0,25,1,253000,0.9,140,1,1,10,1
+45,1,981,0,30,0,136000,1.1,137,1,0,11,1
+50,1,168,0,38,1,276000,1.1,137,1,0,11,1
+49,1,80,0,30,1,427000,1,138,0,0,12,0
+82,1,379,0,50,0,47000,1.3,136,1,0,13,1
+87,1,149,0,38,0,262000,0.9,140,1,0,14,1
+45,0,582,0,14,0,166000,0.8,127,1,0,14,1
+70,1,125,0,25,1,237000,1,140,0,0,15,1
+48,1,582,1,55,0,87000,1.9,121,0,0,15,1
+65,1,52,0,25,1,276000,1.3,137,0,0,16,0
+65,1,128,1,30,1,297000,1.6,136,0,0,20,1
+68,1,220,0,35,1,289000,0.9,140,1,1,20,1
+53,0,63,1,60,0,368000,0.8,135,1,0,22,0
+75,0,582,1,30,1,263358.03,1.83,134,0,0,23,1
+80,0,148,1,38,0,149000,1.9,144,1,1,23,1
+95,1,112,0,40,1,196000,1,138,0,0,24,1
+70,0,122,1,45,1,284000,1.3,136,1,1,26,1
+58,1,60,0,38,0,153000,5.8,134,1,0,26,1
+82,0,70,1,30,0,200000,1.2,132,1,1,26,1
+94,0,582,1,38,1,263358.03,1.83,134,1,0,27,1
+85,0,23,0,45,0,360000,3,132,1,0,28,1
+50,1,249,1,35,1,319000,1,128,0,0,28,1
+50,1,159,1,30,0,302000,1.2,138,0,0,29,0
+65,0,94,1,50,1,188000,1,140,1,0,29,1
+69,0,582,1,35,0,228000,3.5,134,1,0,30,1
+90,1,60,1,50,0,226000,1,134,1,0,30,1
+82,1,855,1,50,1,321000,1,145,0,0,30,1
+60,0,2656,1,30,0,305000,2.3,137,1,0,30,0
+60,0,235,1,38,0,329000,3,142,0,0,30,1
+70,0,582,0,20,1,263358.03,1.83,134,1,1,31,1
+50,0,124,1,30,1,153000,1.2,136,0,1,32,1
+70,0,571,1,45,1,185000,1.2,139,1,1,33,1
+72,0,127,1,50,1,218000,1,134,1,0,33,0
+60,1,588,1,60,0,194000,1.1,142,0,0,33,1
+50,0,582,1,38,0,310000,1.9,135,1,1,35,1
+51,0,1380,0,25,1,271000,0.9,130,1,0,38,1
+60,0,582,1,38,1,451000,0.6,138,1,1,40,1
+80,1,553,0,20,1,140000,4.4,133,1,0,41,1
+57,1,129,0,30,0,395000,1,140,0,0,42,1
+68,1,577,0,25,1,166000,1,138,1,0,43,1
+53,1,91,0,20,1,418000,1.4,139,0,0,43,1
+60,0,3964,1,62,0,263358.03,6.8,146,0,0,43,1
+70,1,69,1,50,1,351000,1,134,0,0,44,1
+60,1,260,1,38,0,255000,2.2,132,0,1,45,1
+95,1,371,0,30,0,461000,2,132,1,0,50,1
+70,1,75,0,35,0,223000,2.7,138,1,1,54,0
+60,1,607,0,40,0,216000,0.6,138,1,1,54,0
+49,0,789,0,20,1,319000,1.1,136,1,1,55,1
+72,0,364,1,20,1,254000,1.3,136,1,1,59,1
+45,0,7702,1,25,1,390000,1,139,1,0,60,1
+50,0,318,0,40,1,216000,2.3,131,0,0,60,1
+55,0,109,0,35,0,254000,1.1,139,1,1,60,0
+45,0,582,0,35,0,385000,1,145,1,0,61,1
+45,0,582,0,80,0,263358.03,1.18,137,0,0,63,0
+60,0,68,0,20,0,119000,2.9,127,1,1,64,1
+42,1,250,1,15,0,213000,1.3,136,0,0,65,1
+72,1,110,0,25,0,274000,1,140,1,1,65,1
+70,0,161,0,25,0,244000,1.2,142,0,0,66,1
+65,0,113,1,25,0,497000,1.83,135,1,0,67,1
+41,0,148,0,40,0,374000,0.8,140,1,1,68,0
+58,0,582,1,35,0,122000,0.9,139,1,1,71,0
+85,0,5882,0,35,0,243000,1,132,1,1,72,1
+65,0,224,1,50,0,149000,1.3,137,1,1,72,0
+69,0,582,0,20,0,266000,1.2,134,1,1,73,1
+60,1,47,0,20,0,204000,0.7,139,1,1,73,1
+70,0,92,0,60,1,317000,0.8,140,0,1,74,0
+42,0,102,1,40,0,237000,1.2,140,1,0,74,0
+75,1,203,1,38,1,283000,0.6,131,1,1,74,0
+55,0,336,0,45,1,324000,0.9,140,0,0,74,0
+70,0,69,0,40,0,293000,1.7,136,0,0,75,0
+67,0,582,0,50,0,263358.03,1.18,137,1,1,76,0
+60,1,76,1,25,0,196000,2.5,132,0,0,77,1
+79,1,55,0,50,1,172000,1.8,133,1,0,78,0
+59,1,280,1,25,1,302000,1,141,0,0,78,1
+51,0,78,0,50,0,406000,0.7,140,1,0,79,0
+55,0,47,0,35,1,173000,1.1,137,1,0,79,0
+65,1,68,1,60,1,304000,0.8,140,1,0,79,0
+44,0,84,1,40,1,235000,0.7,139,1,0,79,0
+57,1,115,0,25,1,181000,1.1,144,1,0,79,0
+70,0,66,1,45,0,249000,0.8,136,1,1,80,0
+60,0,897,1,45,0,297000,1,133,1,0,80,0
+42,0,582,0,60,0,263358.03,1.18,137,0,0,82,0
+60,1,154,0,25,0,210000,1.7,135,1,0,82,1
+58,0,144,1,38,1,327000,0.7,142,0,0,83,0
+58,1,133,0,60,1,219000,1,141,1,0,83,0
+63,1,514,1,25,1,254000,1.3,134,1,0,83,0
+70,1,59,0,60,0,255000,1.1,136,0,0,85,0
+60,1,156,1,25,1,318000,1.2,137,0,0,85,0
+63,1,61,1,40,0,221000,1.1,140,0,0,86,0
+65,1,305,0,25,0,298000,1.1,141,1,0,87,0
+75,0,582,0,45,1,263358.03,1.18,137,1,0,87,0
+80,0,898,0,25,0,149000,1.1,144,1,1,87,0
+42,0,5209,0,30,0,226000,1,140,1,1,87,0
+60,0,53,0,50,1,286000,2.3,143,0,0,87,0
+72,1,328,0,30,1,621000,1.7,138,0,1,88,1
+55,0,748,0,45,0,263000,1.3,137,1,0,88,0
+45,1,1876,1,35,0,226000,0.9,138,1,0,88,0
+63,0,936,0,38,0,304000,1.1,133,1,1,88,0
+45,0,292,1,35,0,850000,1.3,142,1,1,88,0
+85,0,129,0,60,0,306000,1.2,132,1,1,90,1
+55,0,60,0,35,0,228000,1.2,135,1,1,90,0
+50,0,369,1,25,0,252000,1.6,136,1,0,90,0
+70,1,143,0,60,0,351000,1.3,137,0,0,90,1
+60,1,754,1,40,1,328000,1.2,126,1,0,91,0
+58,1,400,0,40,0,164000,1,139,0,0,91,0
+60,1,96,1,60,1,271000,0.7,136,0,0,94,0
+85,1,102,0,60,0,507000,3.2,138,0,0,94,0
+65,1,113,1,60,1,203000,0.9,140,0,0,94,0
+86,0,582,0,38,0,263358.03,1.83,134,0,0,95,1
+60,1,737,0,60,1,210000,1.5,135,1,1,95,0
+66,1,68,1,38,1,162000,1,136,0,0,95,0
+60,0,96,1,38,0,228000,0.75,140,0,0,95,0
+60,1,582,0,30,1,127000,0.9,145,0,0,95,0
+60,0,582,0,40,0,217000,3.7,134,1,0,96,1
+43,1,358,0,50,0,237000,1.3,135,0,0,97,0
+46,0,168,1,17,1,271000,2.1,124,0,0,100,1
+58,1,200,1,60,0,300000,0.8,137,0,0,104,0
+61,0,248,0,30,1,267000,0.7,136,1,1,104,0
+53,1,270,1,35,0,227000,3.4,145,1,0,105,0
+53,1,1808,0,60,1,249000,0.7,138,1,1,106,0
+60,1,1082,1,45,0,250000,6.1,131,1,0,107,0
+46,0,719,0,40,1,263358.03,1.18,137,0,0,107,0
+63,0,193,0,60,1,295000,1.3,145,1,1,107,0
+81,0,4540,0,35,0,231000,1.18,137,1,1,107,0
+75,0,582,0,40,0,263358.03,1.18,137,1,0,107,0
+65,1,59,1,60,0,172000,0.9,137,0,0,107,0
+68,1,646,0,25,0,305000,2.1,130,1,0,108,0
+62,0,281,1,35,0,221000,1,136,0,0,108,0
+50,0,1548,0,30,1,211000,0.8,138,1,0,108,0
+80,0,805,0,38,0,263358.03,1.1,134,1,0,109,1
+46,1,291,0,35,0,348000,0.9,140,0,0,109,0
+50,0,482,1,30,0,329000,0.9,132,0,0,109,0
+61,1,84,0,40,1,229000,0.9,141,0,0,110,0
+72,1,943,0,25,1,338000,1.7,139,1,1,111,1
+50,0,185,0,30,0,266000,0.7,141,1,1,112,0
+52,0,132,0,30,0,218000,0.7,136,1,1,112,0
+64,0,1610,0,60,0,242000,1,137,1,0,113,0
+75,1,582,0,30,0,225000,1.83,134,1,0,113,1
+60,0,2261,0,35,1,228000,0.9,136,1,0,115,0
+72,0,233,0,45,1,235000,2.5,135,0,0,115,1
+62,0,30,1,60,1,244000,0.9,139,1,0,117,0
+50,0,115,0,45,1,184000,0.9,134,1,1,118,0
+50,0,1846,1,35,0,263358.03,1.18,137,1,1,119,0
+65,1,335,0,35,1,235000,0.8,136,0,0,120,0
+60,1,231,1,25,0,194000,1.7,140,1,0,120,0
+52,1,58,0,35,0,277000,1.4,136,0,0,120,0
+50,0,250,0,25,0,262000,1,136,1,1,120,0
+85,1,910,0,50,0,235000,1.3,134,1,0,121,0
+59,1,129,0,45,1,362000,1.1,139,1,1,121,0
+66,1,72,0,40,1,242000,1.2,134,1,0,121,0
+45,1,130,0,35,0,174000,0.8,139,1,1,121,0
+63,1,582,0,40,0,448000,0.9,137,1,1,123,0
+50,1,2334,1,35,0,75000,0.9,142,0,0,126,1
+45,0,2442,1,30,0,334000,1.1,139,1,0,129,1
+80,0,776,1,38,1,192000,1.3,135,0,0,130,1
+53,0,196,0,60,0,220000,0.7,133,1,1,134,0
+59,0,66,1,20,0,70000,2.4,134,1,0,135,1
+65,0,582,1,40,0,270000,1,138,0,0,140,0
+70,0,835,0,35,1,305000,0.8,133,0,0,145,0
+51,1,582,1,35,0,263358.03,1.5,136,1,1,145,0
+52,0,3966,0,40,0,325000,0.9,140,1,1,146,0
+70,1,171,0,60,1,176000,1.1,145,1,1,146,0
+50,1,115,0,20,0,189000,0.8,139,1,0,146,0
+65,0,198,1,35,1,281000,0.9,137,1,1,146,0
+60,1,95,0,60,0,337000,1,138,1,1,146,0
+69,0,1419,0,40,0,105000,1,135,1,1,147,0
+49,1,69,0,50,0,132000,1,140,0,0,147,0
+63,1,122,1,60,0,267000,1.2,145,1,0,147,0
+55,0,835,0,40,0,279000,0.7,140,1,1,147,0
+40,0,478,1,30,0,303000,0.9,136,1,0,148,0
+59,1,176,1,25,0,221000,1,136,1,1,150,1
+65,0,395,1,25,0,265000,1.2,136,1,1,154,1
+75,0,99,0,38,1,224000,2.5,134,1,0,162,1
+58,1,145,0,25,0,219000,1.2,137,1,1,170,1
+60.667,1,104,1,30,0,389000,1.5,136,1,0,171,1
+50,0,582,0,50,0,153000,0.6,134,0,0,172,1
+60,0,1896,1,25,0,365000,2.1,144,0,0,172,1
+60.667,1,151,1,40,1,201000,1,136,0,0,172,0
+40,0,244,0,45,1,275000,0.9,140,0,0,174,0
+80,0,582,1,35,0,350000,2.1,134,1,0,174,0
+64,1,62,0,60,0,309000,1.5,135,0,0,174,0
+50,1,121,1,40,0,260000,0.7,130,1,0,175,0
+73,1,231,1,30,0,160000,1.18,142,1,1,180,0
+45,0,582,0,20,1,126000,1.6,135,1,0,180,1
+77,1,418,0,45,0,223000,1.8,145,1,0,180,1
+45,0,582,1,38,1,263358.03,1.18,137,0,0,185,0
+65,0,167,0,30,0,259000,0.8,138,0,0,186,0
+50,1,582,1,20,1,279000,1,134,0,0,186,0
+60,0,1211,1,35,0,263358.03,1.8,113,1,1,186,0
+63,1,1767,0,45,0,73000,0.7,137,1,0,186,0
+45,0,308,1,60,1,377000,1,136,1,0,186,0
+70,0,97,0,60,1,220000,0.9,138,1,0,186,0
+60,0,59,0,25,1,212000,3.5,136,1,1,187,0
+78,1,64,0,40,0,277000,0.7,137,1,1,187,0
+50,1,167,1,45,0,362000,1,136,0,0,187,0
+40,1,101,0,40,0,226000,0.8,141,0,0,187,0
+85,0,212,0,38,0,186000,0.9,136,1,0,187,0
+60,1,2281,1,40,0,283000,1,141,0,0,187,0
+49,0,972,1,35,1,268000,0.8,130,0,0,187,0
+70,0,212,1,17,1,389000,1,136,1,1,188,0
+50,0,582,0,62,1,147000,0.8,140,1,1,192,0
+78,0,224,0,50,0,481000,1.4,138,1,1,192,0
+48,1,131,1,30,1,244000,1.6,130,0,0,193,1
+65,1,135,0,35,1,290000,0.8,134,1,0,194,0
+73,0,582,0,35,1,203000,1.3,134,1,0,195,0
+70,0,1202,0,50,1,358000,0.9,141,0,0,196,0
+54,1,427,0,70,1,151000,9,137,0,0,196,1
+68,1,1021,1,35,0,271000,1.1,134,1,0,197,0
+55,0,582,1,35,1,371000,0.7,140,0,0,197,0
+73,0,582,0,20,0,263358.03,1.83,134,1,0,198,1
+65,0,118,0,50,0,194000,1.1,145,1,1,200,0
+42,1,86,0,35,0,365000,1.1,139,1,1,201,0
+47,0,582,0,25,0,130000,0.8,134,1,0,201,0
+58,0,582,1,25,0,504000,1,138,1,0,205,0
+75,0,675,1,60,0,265000,1.4,125,0,0,205,0
+58,1,57,0,25,0,189000,1.3,132,1,1,205,0
+55,1,2794,0,35,1,141000,1,140,1,0,206,0
+65,0,56,0,25,0,237000,5,130,0,0,207,0
+72,0,211,0,25,0,274000,1.2,134,0,0,207,0
+60,0,166,0,30,0,62000,1.7,127,0,0,207,1
+70,0,93,0,35,0,185000,1.1,134,1,1,208,0
+40,1,129,0,35,0,255000,0.9,137,1,0,209,0
+53,1,707,0,38,0,330000,1.4,137,1,1,209,0
+53,1,582,0,45,0,305000,1.1,137,1,1,209,0
+77,1,109,0,50,1,406000,1.1,137,1,0,209,0
+75,0,119,0,50,1,248000,1.1,148,1,0,209,0
+70,0,232,0,30,0,173000,1.2,132,1,0,210,0
+65,1,720,1,40,0,257000,1,136,0,0,210,0
+55,1,180,0,45,0,263358.03,1.18,137,1,1,211,0
+70,0,81,1,35,1,533000,1.3,139,0,0,212,0
+65,0,582,1,30,0,249000,1.3,136,1,1,212,0
+40,0,90,0,35,0,255000,1.1,136,1,1,212,0
+73,1,1185,0,40,1,220000,0.9,141,0,0,213,0
+54,0,582,1,38,0,264000,1.8,134,1,0,213,0
+61,1,80,1,38,0,282000,1.4,137,1,0,213,0
+55,0,2017,0,25,0,314000,1.1,138,1,0,214,1
+64,0,143,0,25,0,246000,2.4,135,1,0,214,0
+40,0,624,0,35,0,301000,1,142,1,1,214,0
+53,0,207,1,40,0,223000,1.2,130,0,0,214,0
+50,0,2522,0,30,1,404000,0.5,139,0,0,214,0
+55,0,572,1,35,0,231000,0.8,143,0,0,215,0
+50,0,245,0,45,1,274000,1,133,1,0,215,0
+70,0,88,1,35,1,236000,1.2,132,0,0,215,0
+53,1,446,0,60,1,263358.03,1,139,1,0,215,0
+52,1,191,1,30,1,334000,1,142,1,1,216,0
+65,0,326,0,38,0,294000,1.7,139,0,0,220,0
+58,0,132,1,38,1,253000,1,139,1,0,230,0
+45,1,66,1,25,0,233000,0.8,135,1,0,230,0
+53,0,56,0,50,0,308000,0.7,135,1,1,231,0
+55,0,66,0,40,0,203000,1,138,1,0,233,0
+62,1,655,0,40,0,283000,0.7,133,0,0,233,0
+65,1,258,1,25,0,198000,1.4,129,1,0,235,1
+68,1,157,1,60,0,208000,1,140,0,0,237,0
+61,0,582,1,38,0,147000,1.2,141,1,0,237,0
+50,1,298,0,35,0,362000,0.9,140,1,1,240,0
+55,0,1199,0,20,0,263358.03,1.83,134,1,1,241,1
+56,1,135,1,38,0,133000,1.7,140,1,0,244,0
+45,0,582,1,38,0,302000,0.9,140,0,0,244,0
+40,0,582,1,35,0,222000,1,132,1,0,244,0
+44,0,582,1,30,1,263358.03,1.6,130,1,1,244,0
+51,0,582,1,40,0,221000,0.9,134,0,0,244,0
+67,0,213,0,38,0,215000,1.2,133,0,0,245,0
+42,0,64,0,40,0,189000,0.7,140,1,0,245,0
+60,1,257,1,30,0,150000,1,137,1,1,245,0
+45,0,582,0,38,1,422000,0.8,137,0,0,245,0
+70,0,618,0,35,0,327000,1.1,142,0,0,245,0
+70,0,582,1,38,0,25100,1.1,140,1,0,246,0
+50,1,1051,1,30,0,232000,0.7,136,0,0,246,0
+55,0,84,1,38,0,451000,1.3,136,0,0,246,0
+70,0,2695,1,40,0,241000,1,137,1,0,247,0
+70,0,582,0,40,0,51000,2.7,136,1,1,250,0
+42,0,64,0,30,0,215000,3.8,128,1,1,250,0
+65,0,1688,0,38,0,263358.03,1.1,138,1,1,250,0
+50,1,54,0,40,0,279000,0.8,141,1,0,250,0
+55,1,170,1,40,0,336000,1.2,135,1,0,250,0
+60,0,253,0,35,0,279000,1.7,140,1,0,250,0
+45,0,582,1,55,0,543000,1,132,0,0,250,0
+65,0,892,1,35,0,263358.03,1.1,142,0,0,256,0
+90,1,337,0,38,0,390000,0.9,144,0,0,256,0
+45,0,615,1,55,0,222000,0.8,141,0,0,257,0
+60,0,320,0,35,0,133000,1.4,139,1,0,258,0
+52,0,190,1,38,0,382000,1,140,1,1,258,0
+63,1,103,1,35,0,179000,0.9,136,1,1,270,0
+62,0,61,1,38,1,155000,1.1,143,1,1,270,0
+55,0,1820,0,38,0,270000,1.2,139,0,0,271,0
+45,0,2060,1,60,0,742000,0.8,138,0,0,278,0
+45,0,2413,0,38,0,140000,1.4,140,1,1,280,0
+50,0,196,0,45,0,395000,1.6,136,1,1,285,0

data_cache/depression_data.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

data_cache/dermatology.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

data_cache/endocrinology_diabetes.csv ADDED Viewed

	@@ -0,0 +1,768 @@

+6,148,72,35,0,33.6,0.627,50,1
+1,85,66,29,0,26.6,0.351,31,0
+8,183,64,0,0,23.3,0.672,32,1
+1,89,66,23,94,28.1,0.167,21,0
+0,137,40,35,168,43.1,2.288,33,1
+5,116,74,0,0,25.6,0.201,30,0
+3,78,50,32,88,31.0,0.248,26,1
+10,115,0,0,0,35.3,0.134,29,0
+2,197,70,45,543,30.5,0.158,53,1
+8,125,96,0,0,0.0,0.232,54,1
+4,110,92,0,0,37.6,0.191,30,0
+10,168,74,0,0,38.0,0.537,34,1
+10,139,80,0,0,27.1,1.441,57,0
+1,189,60,23,846,30.1,0.398,59,1
+5,166,72,19,175,25.8,0.587,51,1
+7,100,0,0,0,30.0,0.484,32,1
+0,118,84,47,230,45.8,0.551,31,1
+7,107,74,0,0,29.6,0.254,31,1
+1,103,30,38,83,43.3,0.183,33,0
+1,115,70,30,96,34.6,0.529,32,1
+3,126,88,41,235,39.3,0.704,27,0
+8,99,84,0,0,35.4,0.388,50,0
+7,196,90,0,0,39.8,0.451,41,1
+9,119,80,35,0,29.0,0.263,29,1
+11,143,94,33,146,36.6,0.254,51,1
+10,125,70,26,115,31.1,0.205,41,1
+7,147,76,0,0,39.4,0.257,43,1
+1,97,66,15,140,23.2,0.487,22,0
+13,145,82,19,110,22.2,0.245,57,0
+5,117,92,0,0,34.1,0.337,38,0
+5,109,75,26,0,36.0,0.546,60,0
+3,158,76,36,245,31.6,0.851,28,1
+3,88,58,11,54,24.8,0.267,22,0
+6,92,92,0,0,19.9,0.188,28,0
+10,122,78,31,0,27.6,0.512,45,0
+4,103,60,33,192,24.0,0.966,33,0
+11,138,76,0,0,33.2,0.420,35,0
+9,102,76,37,0,32.9,0.665,46,1
+2,90,68,42,0,38.2,0.503,27,1
+4,111,72,47,207,37.1,1.390,56,1
+3,180,64,25,70,34.0,0.271,26,0
+7,133,84,0,0,40.2,0.696,37,0
+7,106,92,18,0,22.7,0.235,48,0
+9,171,110,24,240,45.4,0.721,54,1
+7,159,64,0,0,27.4,0.294,40,0
+0,180,66,39,0,42.0,1.893,25,1
+1,146,56,0,0,29.7,0.564,29,0
+2,71,70,27,0,28.0,0.586,22,0
+7,103,66,32,0,39.1,0.344,31,1
+7,105,0,0,0,0.0,0.305,24,0
+1,103,80,11,82,19.4,0.491,22,0
+1,101,50,15,36,24.2,0.526,26,0
+5,88,66,21,23,24.4,0.342,30,0
+8,176,90,34,300,33.7,0.467,58,1
+7,150,66,42,342,34.7,0.718,42,0
+1,73,50,10,0,23.0,0.248,21,0
+7,187,68,39,304,37.7,0.254,41,1
+0,100,88,60,110,46.8,0.962,31,0
+0,146,82,0,0,40.5,1.781,44,0
+0,105,64,41,142,41.5,0.173,22,0
+2,84,0,0,0,0.0,0.304,21,0
+8,133,72,0,0,32.9,0.270,39,1
+5,44,62,0,0,25.0,0.587,36,0
+2,141,58,34,128,25.4,0.699,24,0
+7,114,66,0,0,32.8,0.258,42,1
+5,99,74,27,0,29.0,0.203,32,0
+0,109,88,30,0,32.5,0.855,38,1
+2,109,92,0,0,42.7,0.845,54,0
+1,95,66,13,38,19.6,0.334,25,0
+4,146,85,27,100,28.9,0.189,27,0
+2,100,66,20,90,32.9,0.867,28,1
+5,139,64,35,140,28.6,0.411,26,0
+13,126,90,0,0,43.4,0.583,42,1
+4,129,86,20,270,35.1,0.231,23,0
+1,79,75,30,0,32.0,0.396,22,0
+1,0,48,20,0,24.7,0.140,22,0
+7,62,78,0,0,32.6,0.391,41,0
+5,95,72,33,0,37.7,0.370,27,0
+0,131,0,0,0,43.2,0.270,26,1
+2,112,66,22,0,25.0,0.307,24,0
+3,113,44,13,0,22.4,0.140,22,0
+2,74,0,0,0,0.0,0.102,22,0
+7,83,78,26,71,29.3,0.767,36,0
+0,101,65,28,0,24.6,0.237,22,0
+5,137,108,0,0,48.8,0.227,37,1
+2,110,74,29,125,32.4,0.698,27,0
+13,106,72,54,0,36.6,0.178,45,0
+2,100,68,25,71,38.5,0.324,26,0
+15,136,70,32,110,37.1,0.153,43,1
+1,107,68,19,0,26.5,0.165,24,0
+1,80,55,0,0,19.1,0.258,21,0
+4,123,80,15,176,32.0,0.443,34,0
+7,81,78,40,48,46.7,0.261,42,0
+4,134,72,0,0,23.8,0.277,60,1
+2,142,82,18,64,24.7,0.761,21,0
+6,144,72,27,228,33.9,0.255,40,0
+2,92,62,28,0,31.6,0.130,24,0
+1,71,48,18,76,20.4,0.323,22,0
+6,93,50,30,64,28.7,0.356,23,0
+1,122,90,51,220,49.7,0.325,31,1
+1,163,72,0,0,39.0,1.222,33,1
+1,151,60,0,0,26.1,0.179,22,0
+0,125,96,0,0,22.5,0.262,21,0
+1,81,72,18,40,26.6,0.283,24,0
+2,85,65,0,0,39.6,0.930,27,0
+1,126,56,29,152,28.7,0.801,21,0
+1,96,122,0,0,22.4,0.207,27,0
+4,144,58,28,140,29.5,0.287,37,0
+3,83,58,31,18,34.3,0.336,25,0
+0,95,85,25,36,37.4,0.247,24,1
+3,171,72,33,135,33.3,0.199,24,1
+8,155,62,26,495,34.0,0.543,46,1
+1,89,76,34,37,31.2,0.192,23,0
+4,76,62,0,0,34.0,0.391,25,0
+7,160,54,32,175,30.5,0.588,39,1
+4,146,92,0,0,31.2,0.539,61,1
+5,124,74,0,0,34.0,0.220,38,1
+5,78,48,0,0,33.7,0.654,25,0
+4,97,60,23,0,28.2,0.443,22,0
+4,99,76,15,51,23.2,0.223,21,0
+0,162,76,56,100,53.2,0.759,25,1
+6,111,64,39,0,34.2,0.260,24,0
+2,107,74,30,100,33.6,0.404,23,0
+5,132,80,0,0,26.8,0.186,69,0
+0,113,76,0,0,33.3,0.278,23,1
+1,88,30,42,99,55.0,0.496,26,1
+3,120,70,30,135,42.9,0.452,30,0
+1,118,58,36,94,33.3,0.261,23,0
+1,117,88,24,145,34.5,0.403,40,1
+0,105,84,0,0,27.9,0.741,62,1
+4,173,70,14,168,29.7,0.361,33,1
+9,122,56,0,0,33.3,1.114,33,1
+3,170,64,37,225,34.5,0.356,30,1
+8,84,74,31,0,38.3,0.457,39,0
+2,96,68,13,49,21.1,0.647,26,0
+2,125,60,20,140,33.8,0.088,31,0
+0,100,70,26,50,30.8,0.597,21,0
+0,93,60,25,92,28.7,0.532,22,0
+0,129,80,0,0,31.2,0.703,29,0
+5,105,72,29,325,36.9,0.159,28,0
+3,128,78,0,0,21.1,0.268,55,0
+5,106,82,30,0,39.5,0.286,38,0
+2,108,52,26,63,32.5,0.318,22,0
+10,108,66,0,0,32.4,0.272,42,1
+4,154,62,31,284,32.8,0.237,23,0
+0,102,75,23,0,0.0,0.572,21,0
+9,57,80,37,0,32.8,0.096,41,0
+2,106,64,35,119,30.5,1.400,34,0
+5,147,78,0,0,33.7,0.218,65,0
+2,90,70,17,0,27.3,0.085,22,0
+1,136,74,50,204,37.4,0.399,24,0
+4,114,65,0,0,21.9,0.432,37,0
+9,156,86,28,155,34.3,1.189,42,1
+1,153,82,42,485,40.6,0.687,23,0
+8,188,78,0,0,47.9,0.137,43,1
+7,152,88,44,0,50.0,0.337,36,1
+2,99,52,15,94,24.6,0.637,21,0
+1,109,56,21,135,25.2,0.833,23,0
+2,88,74,19,53,29.0,0.229,22,0
+17,163,72,41,114,40.9,0.817,47,1
+4,151,90,38,0,29.7,0.294,36,0
+7,102,74,40,105,37.2,0.204,45,0
+0,114,80,34,285,44.2,0.167,27,0
+2,100,64,23,0,29.7,0.368,21,0
+0,131,88,0,0,31.6,0.743,32,1
+6,104,74,18,156,29.9,0.722,41,1
+3,148,66,25,0,32.5,0.256,22,0
+4,120,68,0,0,29.6,0.709,34,0
+4,110,66,0,0,31.9,0.471,29,0
+3,111,90,12,78,28.4,0.495,29,0
+6,102,82,0,0,30.8,0.180,36,1
+6,134,70,23,130,35.4,0.542,29,1
+2,87,0,23,0,28.9,0.773,25,0
+1,79,60,42,48,43.5,0.678,23,0
+2,75,64,24,55,29.7,0.370,33,0
+8,179,72,42,130,32.7,0.719,36,1
+6,85,78,0,0,31.2,0.382,42,0
+0,129,110,46,130,67.1,0.319,26,1
+5,143,78,0,0,45.0,0.190,47,0
+5,130,82,0,0,39.1,0.956,37,1
+6,87,80,0,0,23.2,0.084,32,0
+0,119,64,18,92,34.9,0.725,23,0
+1,0,74,20,23,27.7,0.299,21,0
+5,73,60,0,0,26.8,0.268,27,0
+4,141,74,0,0,27.6,0.244,40,0
+7,194,68,28,0,35.9,0.745,41,1
+8,181,68,36,495,30.1,0.615,60,1
+1,128,98,41,58,32.0,1.321,33,1
+8,109,76,39,114,27.9,0.640,31,1
+5,139,80,35,160,31.6,0.361,25,1
+3,111,62,0,0,22.6,0.142,21,0
+9,123,70,44,94,33.1,0.374,40,0
+7,159,66,0,0,30.4,0.383,36,1
+11,135,0,0,0,52.3,0.578,40,1
+8,85,55,20,0,24.4,0.136,42,0
+5,158,84,41,210,39.4,0.395,29,1
+1,105,58,0,0,24.3,0.187,21,0
+3,107,62,13,48,22.9,0.678,23,1
+4,109,64,44,99,34.8,0.905,26,1
+4,148,60,27,318,30.9,0.150,29,1
+0,113,80,16,0,31.0,0.874,21,0
+1,138,82,0,0,40.1,0.236,28,0
+0,108,68,20,0,27.3,0.787,32,0
+2,99,70,16,44,20.4,0.235,27,0
+6,103,72,32,190,37.7,0.324,55,0
+5,111,72,28,0,23.9,0.407,27,0
+8,196,76,29,280,37.5,0.605,57,1
+5,162,104,0,0,37.7,0.151,52,1
+1,96,64,27,87,33.2,0.289,21,0
+7,184,84,33,0,35.5,0.355,41,1
+2,81,60,22,0,27.7,0.290,25,0
+0,147,85,54,0,42.8,0.375,24,0
+7,179,95,31,0,34.2,0.164,60,0
+0,140,65,26,130,42.6,0.431,24,1
+9,112,82,32,175,34.2,0.260,36,1
+12,151,70,40,271,41.8,0.742,38,1
+5,109,62,41,129,35.8,0.514,25,1
+6,125,68,30,120,30.0,0.464,32,0
+5,85,74,22,0,29.0,1.224,32,1
+5,112,66,0,0,37.8,0.261,41,1
+0,177,60,29,478,34.6,1.072,21,1
+2,158,90,0,0,31.6,0.805,66,1
+7,119,0,0,0,25.2,0.209,37,0
+7,142,60,33,190,28.8,0.687,61,0
+1,100,66,15,56,23.6,0.666,26,0
+1,87,78,27,32,34.6,0.101,22,0
+0,101,76,0,0,35.7,0.198,26,0
+3,162,52,38,0,37.2,0.652,24,1
+4,197,70,39,744,36.7,2.329,31,0
+0,117,80,31,53,45.2,0.089,24,0
+4,142,86,0,0,44.0,0.645,22,1
+6,134,80,37,370,46.2,0.238,46,1
+1,79,80,25,37,25.4,0.583,22,0
+4,122,68,0,0,35.0,0.394,29,0
+3,74,68,28,45,29.7,0.293,23,0
+4,171,72,0,0,43.6,0.479,26,1
+7,181,84,21,192,35.9,0.586,51,1
+0,179,90,27,0,44.1,0.686,23,1
+9,164,84,21,0,30.8,0.831,32,1
+0,104,76,0,0,18.4,0.582,27,0
+1,91,64,24,0,29.2,0.192,21,0
+4,91,70,32,88,33.1,0.446,22,0
+3,139,54,0,0,25.6,0.402,22,1
+6,119,50,22,176,27.1,1.318,33,1
+2,146,76,35,194,38.2,0.329,29,0
+9,184,85,15,0,30.0,1.213,49,1
+10,122,68,0,0,31.2,0.258,41,0
+0,165,90,33,680,52.3,0.427,23,0
+9,124,70,33,402,35.4,0.282,34,0
+1,111,86,19,0,30.1,0.143,23,0
+9,106,52,0,0,31.2,0.380,42,0
+2,129,84,0,0,28.0,0.284,27,0
+2,90,80,14,55,24.4,0.249,24,0
+0,86,68,32,0,35.8,0.238,25,0
+12,92,62,7,258,27.6,0.926,44,1
+1,113,64,35,0,33.6,0.543,21,1
+3,111,56,39,0,30.1,0.557,30,0
+2,114,68,22,0,28.7,0.092,25,0
+1,193,50,16,375,25.9,0.655,24,0
+11,155,76,28,150,33.3,1.353,51,1
+3,191,68,15,130,30.9,0.299,34,0
+3,141,0,0,0,30.0,0.761,27,1
+4,95,70,32,0,32.1,0.612,24,0
+3,142,80,15,0,32.4,0.200,63,0
+4,123,62,0,0,32.0,0.226,35,1
+5,96,74,18,67,33.6,0.997,43,0
+0,138,0,0,0,36.3,0.933,25,1
+2,128,64,42,0,40.0,1.101,24,0
+0,102,52,0,0,25.1,0.078,21,0
+2,146,0,0,0,27.5,0.240,28,1
+10,101,86,37,0,45.6,1.136,38,1
+2,108,62,32,56,25.2,0.128,21,0
+3,122,78,0,0,23.0,0.254,40,0
+1,71,78,50,45,33.2,0.422,21,0
+13,106,70,0,0,34.2,0.251,52,0
+2,100,70,52,57,40.5,0.677,25,0
+7,106,60,24,0,26.5,0.296,29,1
+0,104,64,23,116,27.8,0.454,23,0
+5,114,74,0,0,24.9,0.744,57,0
+2,108,62,10,278,25.3,0.881,22,0
+0,146,70,0,0,37.9,0.334,28,1
+10,129,76,28,122,35.9,0.280,39,0
+7,133,88,15,155,32.4,0.262,37,0
+7,161,86,0,0,30.4,0.165,47,1
+2,108,80,0,0,27.0,0.259,52,1
+7,136,74,26,135,26.0,0.647,51,0
+5,155,84,44,545,38.7,0.619,34,0
+1,119,86,39,220,45.6,0.808,29,1
+4,96,56,17,49,20.8,0.340,26,0
+5,108,72,43,75,36.1,0.263,33,0
+0,78,88,29,40,36.9,0.434,21,0
+0,107,62,30,74,36.6,0.757,25,1
+2,128,78,37,182,43.3,1.224,31,1
+1,128,48,45,194,40.5,0.613,24,1
+0,161,50,0,0,21.9,0.254,65,0
+6,151,62,31,120,35.5,0.692,28,0
+2,146,70,38,360,28.0,0.337,29,1
+0,126,84,29,215,30.7,0.520,24,0
+14,100,78,25,184,36.6,0.412,46,1
+8,112,72,0,0,23.6,0.840,58,0
+0,167,0,0,0,32.3,0.839,30,1
+2,144,58,33,135,31.6,0.422,25,1
+5,77,82,41,42,35.8,0.156,35,0
+5,115,98,0,0,52.9,0.209,28,1
+3,150,76,0,0,21.0,0.207,37,0
+2,120,76,37,105,39.7,0.215,29,0
+10,161,68,23,132,25.5,0.326,47,1
+0,137,68,14,148,24.8,0.143,21,0
+0,128,68,19,180,30.5,1.391,25,1
+2,124,68,28,205,32.9,0.875,30,1
+6,80,66,30,0,26.2,0.313,41,0
+0,106,70,37,148,39.4,0.605,22,0
+2,155,74,17,96,26.6,0.433,27,1
+3,113,50,10,85,29.5,0.626,25,0
+7,109,80,31,0,35.9,1.127,43,1
+2,112,68,22,94,34.1,0.315,26,0
+3,99,80,11,64,19.3,0.284,30,0
+3,182,74,0,0,30.5,0.345,29,1
+3,115,66,39,140,38.1,0.150,28,0
+6,194,78,0,0,23.5,0.129,59,1
+4,129,60,12,231,27.5,0.527,31,0
+3,112,74,30,0,31.6,0.197,25,1
+0,124,70,20,0,27.4,0.254,36,1
+13,152,90,33,29,26.8,0.731,43,1
+2,112,75,32,0,35.7,0.148,21,0
+1,157,72,21,168,25.6,0.123,24,0
+1,122,64,32,156,35.1,0.692,30,1
+10,179,70,0,0,35.1,0.200,37,0
+2,102,86,36,120,45.5,0.127,23,1
+6,105,70,32,68,30.8,0.122,37,0
+8,118,72,19,0,23.1,1.476,46,0
+2,87,58,16,52,32.7,0.166,25,0
+1,180,0,0,0,43.3,0.282,41,1
+12,106,80,0,0,23.6,0.137,44,0
+1,95,60,18,58,23.9,0.260,22,0
+0,165,76,43,255,47.9,0.259,26,0
+0,117,0,0,0,33.8,0.932,44,0
+5,115,76,0,0,31.2,0.343,44,1
+9,152,78,34,171,34.2,0.893,33,1
+7,178,84,0,0,39.9,0.331,41,1
+1,130,70,13,105,25.9,0.472,22,0
+1,95,74,21,73,25.9,0.673,36,0
+1,0,68,35,0,32.0,0.389,22,0
+5,122,86,0,0,34.7,0.290,33,0
+8,95,72,0,0,36.8,0.485,57,0
+8,126,88,36,108,38.5,0.349,49,0
+1,139,46,19,83,28.7,0.654,22,0
+3,116,0,0,0,23.5,0.187,23,0
+3,99,62,19,74,21.8,0.279,26,0
+5,0,80,32,0,41.0,0.346,37,1
+4,92,80,0,0,42.2,0.237,29,0
+4,137,84,0,0,31.2,0.252,30,0
+3,61,82,28,0,34.4,0.243,46,0
+1,90,62,12,43,27.2,0.580,24,0
+3,90,78,0,0,42.7,0.559,21,0
+9,165,88,0,0,30.4,0.302,49,1
+1,125,50,40,167,33.3,0.962,28,1
+13,129,0,30,0,39.9,0.569,44,1
+12,88,74,40,54,35.3,0.378,48,0
+1,196,76,36,249,36.5,0.875,29,1
+5,189,64,33,325,31.2,0.583,29,1
+5,158,70,0,0,29.8,0.207,63,0
+5,103,108,37,0,39.2,0.305,65,0
+4,146,78,0,0,38.5,0.520,67,1
+4,147,74,25,293,34.9,0.385,30,0
+5,99,54,28,83,34.0,0.499,30,0
+6,124,72,0,0,27.6,0.368,29,1
+0,101,64,17,0,21.0,0.252,21,0
+3,81,86,16,66,27.5,0.306,22,0
+1,133,102,28,140,32.8,0.234,45,1
+3,173,82,48,465,38.4,2.137,25,1
+0,118,64,23,89,0.0,1.731,21,0
+0,84,64,22,66,35.8,0.545,21,0
+2,105,58,40,94,34.9,0.225,25,0
+2,122,52,43,158,36.2,0.816,28,0
+12,140,82,43,325,39.2,0.528,58,1
+0,98,82,15,84,25.2,0.299,22,0
+1,87,60,37,75,37.2,0.509,22,0
+4,156,75,0,0,48.3,0.238,32,1
+0,93,100,39,72,43.4,1.021,35,0
+1,107,72,30,82,30.8,0.821,24,0
+0,105,68,22,0,20.0,0.236,22,0
+1,109,60,8,182,25.4,0.947,21,0
+1,90,62,18,59,25.1,1.268,25,0
+1,125,70,24,110,24.3,0.221,25,0
+1,119,54,13,50,22.3,0.205,24,0
+5,116,74,29,0,32.3,0.660,35,1
+8,105,100,36,0,43.3,0.239,45,1
+5,144,82,26,285,32.0,0.452,58,1
+3,100,68,23,81,31.6,0.949,28,0
+1,100,66,29,196,32.0,0.444,42,0
+5,166,76,0,0,45.7,0.340,27,1
+1,131,64,14,415,23.7,0.389,21,0
+4,116,72,12,87,22.1,0.463,37,0
+4,158,78,0,0,32.9,0.803,31,1
+2,127,58,24,275,27.7,1.600,25,0
+3,96,56,34,115,24.7,0.944,39,0
+0,131,66,40,0,34.3,0.196,22,1
+3,82,70,0,0,21.1,0.389,25,0
+3,193,70,31,0,34.9,0.241,25,1
+4,95,64,0,0,32.0,0.161,31,1
+6,137,61,0,0,24.2,0.151,55,0
+5,136,84,41,88,35.0,0.286,35,1
+9,72,78,25,0,31.6,0.280,38,0
+5,168,64,0,0,32.9,0.135,41,1
+2,123,48,32,165,42.1,0.520,26,0
+4,115,72,0,0,28.9,0.376,46,1
+0,101,62,0,0,21.9,0.336,25,0
+8,197,74,0,0,25.9,1.191,39,1
+1,172,68,49,579,42.4,0.702,28,1
+6,102,90,39,0,35.7,0.674,28,0
+1,112,72,30,176,34.4,0.528,25,0
+1,143,84,23,310,42.4,1.076,22,0
+1,143,74,22,61,26.2,0.256,21,0
+0,138,60,35,167,34.6,0.534,21,1
+3,173,84,33,474,35.7,0.258,22,1
+1,97,68,21,0,27.2,1.095,22,0
+4,144,82,32,0,38.5,0.554,37,1
+1,83,68,0,0,18.2,0.624,27,0
+3,129,64,29,115,26.4,0.219,28,1
+1,119,88,41,170,45.3,0.507,26,0
+2,94,68,18,76,26.0,0.561,21,0
+0,102,64,46,78,40.6,0.496,21,0
+2,115,64,22,0,30.8,0.421,21,0
+8,151,78,32,210,42.9,0.516,36,1
+4,184,78,39,277,37.0,0.264,31,1
+0,94,0,0,0,0.0,0.256,25,0
+1,181,64,30,180,34.1,0.328,38,1
+0,135,94,46,145,40.6,0.284,26,0
+1,95,82,25,180,35.0,0.233,43,1
+2,99,0,0,0,22.2,0.108,23,0
+3,89,74,16,85,30.4,0.551,38,0
+1,80,74,11,60,30.0,0.527,22,0
+2,139,75,0,0,25.6,0.167,29,0
+1,90,68,8,0,24.5,1.138,36,0
+0,141,0,0,0,42.4,0.205,29,1
+12,140,85,33,0,37.4,0.244,41,0
+5,147,75,0,0,29.9,0.434,28,0
+1,97,70,15,0,18.2,0.147,21,0
+6,107,88,0,0,36.8,0.727,31,0
+0,189,104,25,0,34.3,0.435,41,1
+2,83,66,23,50,32.2,0.497,22,0
+4,117,64,27,120,33.2,0.230,24,0
+8,108,70,0,0,30.5,0.955,33,1
+4,117,62,12,0,29.7,0.380,30,1
+0,180,78,63,14,59.4,2.420,25,1
+1,100,72,12,70,25.3,0.658,28,0
+0,95,80,45,92,36.5,0.330,26,0
+0,104,64,37,64,33.6,0.510,22,1
+0,120,74,18,63,30.5,0.285,26,0
+1,82,64,13,95,21.2,0.415,23,0
+2,134,70,0,0,28.9,0.542,23,1
+0,91,68,32,210,39.9,0.381,25,0
+2,119,0,0,0,19.6,0.832,72,0
+2,100,54,28,105,37.8,0.498,24,0
+14,175,62,30,0,33.6,0.212,38,1
+1,135,54,0,0,26.7,0.687,62,0
+5,86,68,28,71,30.2,0.364,24,0
+10,148,84,48,237,37.6,1.001,51,1
+9,134,74,33,60,25.9,0.460,81,0
+9,120,72,22,56,20.8,0.733,48,0
+1,71,62,0,0,21.8,0.416,26,0
+8,74,70,40,49,35.3,0.705,39,0
+5,88,78,30,0,27.6,0.258,37,0
+10,115,98,0,0,24.0,1.022,34,0
+0,124,56,13,105,21.8,0.452,21,0
+0,74,52,10,36,27.8,0.269,22,0
+0,97,64,36,100,36.8,0.600,25,0
+8,120,0,0,0,30.0,0.183,38,1
+6,154,78,41,140,46.1,0.571,27,0
+1,144,82,40,0,41.3,0.607,28,0
+0,137,70,38,0,33.2,0.170,22,0
+0,119,66,27,0,38.8,0.259,22,0
+7,136,90,0,0,29.9,0.210,50,0
+4,114,64,0,0,28.9,0.126,24,0
+0,137,84,27,0,27.3,0.231,59,0
+2,105,80,45,191,33.7,0.711,29,1
+7,114,76,17,110,23.8,0.466,31,0
+8,126,74,38,75,25.9,0.162,39,0
+4,132,86,31,0,28.0,0.419,63,0
+3,158,70,30,328,35.5,0.344,35,1
+0,123,88,37,0,35.2,0.197,29,0
+4,85,58,22,49,27.8,0.306,28,0
+0,84,82,31,125,38.2,0.233,23,0
+0,145,0,0,0,44.2,0.630,31,1
+0,135,68,42,250,42.3,0.365,24,1
+1,139,62,41,480,40.7,0.536,21,0
+0,173,78,32,265,46.5,1.159,58,0
+4,99,72,17,0,25.6,0.294,28,0
+8,194,80,0,0,26.1,0.551,67,0
+2,83,65,28,66,36.8,0.629,24,0
+2,89,90,30,0,33.5,0.292,42,0
+4,99,68,38,0,32.8,0.145,33,0
+4,125,70,18,122,28.9,1.144,45,1
+3,80,0,0,0,0.0,0.174,22,0
+6,166,74,0,0,26.6,0.304,66,0
+5,110,68,0,0,26.0,0.292,30,0
+2,81,72,15,76,30.1,0.547,25,0
+7,195,70,33,145,25.1,0.163,55,1
+6,154,74,32,193,29.3,0.839,39,0
+2,117,90,19,71,25.2,0.313,21,0
+3,84,72,32,0,37.2,0.267,28,0
+6,0,68,41,0,39.0,0.727,41,1
+7,94,64,25,79,33.3,0.738,41,0
+3,96,78,39,0,37.3,0.238,40,0
+10,75,82,0,0,33.3,0.263,38,0
+0,180,90,26,90,36.5,0.314,35,1
+1,130,60,23,170,28.6,0.692,21,0
+2,84,50,23,76,30.4,0.968,21,0
+8,120,78,0,0,25.0,0.409,64,0
+12,84,72,31,0,29.7,0.297,46,1
+0,139,62,17,210,22.1,0.207,21,0
+9,91,68,0,0,24.2,0.200,58,0
+2,91,62,0,0,27.3,0.525,22,0
+3,99,54,19,86,25.6,0.154,24,0
+3,163,70,18,105,31.6,0.268,28,1
+9,145,88,34,165,30.3,0.771,53,1
+7,125,86,0,0,37.6,0.304,51,0
+13,76,60,0,0,32.8,0.180,41,0
+6,129,90,7,326,19.6,0.582,60,0
+2,68,70,32,66,25.0,0.187,25,0
+3,124,80,33,130,33.2,0.305,26,0
+6,114,0,0,0,0.0,0.189,26,0
+9,130,70,0,0,34.2,0.652,45,1
+3,125,58,0,0,31.6,0.151,24,0
+3,87,60,18,0,21.8,0.444,21,0
+1,97,64,19,82,18.2,0.299,21,0
+3,116,74,15,105,26.3,0.107,24,0
+0,117,66,31,188,30.8,0.493,22,0
+0,111,65,0,0,24.6,0.660,31,0
+2,122,60,18,106,29.8,0.717,22,0
+0,107,76,0,0,45.3,0.686,24,0
+1,86,66,52,65,41.3,0.917,29,0
+6,91,0,0,0,29.8,0.501,31,0
+1,77,56,30,56,33.3,1.251,24,0
+4,132,0,0,0,32.9,0.302,23,1
+0,105,90,0,0,29.6,0.197,46,0
+0,57,60,0,0,21.7,0.735,67,0
+0,127,80,37,210,36.3,0.804,23,0
+3,129,92,49,155,36.4,0.968,32,1
+8,100,74,40,215,39.4,0.661,43,1
+3,128,72,25,190,32.4,0.549,27,1
+10,90,85,32,0,34.9,0.825,56,1
+4,84,90,23,56,39.5,0.159,25,0
+1,88,78,29,76,32.0,0.365,29,0
+8,186,90,35,225,34.5,0.423,37,1
+5,187,76,27,207,43.6,1.034,53,1
+4,131,68,21,166,33.1,0.160,28,0
+1,164,82,43,67,32.8,0.341,50,0
+4,189,110,31,0,28.5,0.680,37,0
+1,116,70,28,0,27.4,0.204,21,0
+3,84,68,30,106,31.9,0.591,25,0
+6,114,88,0,0,27.8,0.247,66,0
+1,88,62,24,44,29.9,0.422,23,0
+1,84,64,23,115,36.9,0.471,28,0
+7,124,70,33,215,25.5,0.161,37,0
+1,97,70,40,0,38.1,0.218,30,0
+8,110,76,0,0,27.8,0.237,58,0
+11,103,68,40,0,46.2,0.126,42,0
+11,85,74,0,0,30.1,0.300,35,0
+6,125,76,0,0,33.8,0.121,54,1
+0,198,66,32,274,41.3,0.502,28,1
+1,87,68,34,77,37.6,0.401,24,0
+6,99,60,19,54,26.9,0.497,32,0
+0,91,80,0,0,32.4,0.601,27,0
+2,95,54,14,88,26.1,0.748,22,0
+1,99,72,30,18,38.6,0.412,21,0
+6,92,62,32,126,32.0,0.085,46,0
+4,154,72,29,126,31.3,0.338,37,0
+0,121,66,30,165,34.3,0.203,33,1
+3,78,70,0,0,32.5,0.270,39,0
+2,130,96,0,0,22.6,0.268,21,0
+3,111,58,31,44,29.5,0.430,22,0
+2,98,60,17,120,34.7,0.198,22,0
+1,143,86,30,330,30.1,0.892,23,0
+1,119,44,47,63,35.5,0.280,25,0
+6,108,44,20,130,24.0,0.813,35,0
+2,118,80,0,0,42.9,0.693,21,1
+10,133,68,0,0,27.0,0.245,36,0
+2,197,70,99,0,34.7,0.575,62,1
+0,151,90,46,0,42.1,0.371,21,1
+6,109,60,27,0,25.0,0.206,27,0
+12,121,78,17,0,26.5,0.259,62,0
+8,100,76,0,0,38.7,0.190,42,0
+8,124,76,24,600,28.7,0.687,52,1
+1,93,56,11,0,22.5,0.417,22,0
+8,143,66,0,0,34.9,0.129,41,1
+6,103,66,0,0,24.3,0.249,29,0
+3,176,86,27,156,33.3,1.154,52,1
+0,73,0,0,0,21.1,0.342,25,0
+11,111,84,40,0,46.8,0.925,45,1
+2,112,78,50,140,39.4,0.175,24,0
+3,132,80,0,0,34.4,0.402,44,1
+2,82,52,22,115,28.5,1.699,25,0
+6,123,72,45,230,33.6,0.733,34,0
+0,188,82,14,185,32.0,0.682,22,1
+0,67,76,0,0,45.3,0.194,46,0
+1,89,24,19,25,27.8,0.559,21,0
+1,173,74,0,0,36.8,0.088,38,1
+1,109,38,18,120,23.1,0.407,26,0
+1,108,88,19,0,27.1,0.400,24,0
+6,96,0,0,0,23.7,0.190,28,0
+1,124,74,36,0,27.8,0.100,30,0
+7,150,78,29,126,35.2,0.692,54,1
+4,183,0,0,0,28.4,0.212,36,1
+1,124,60,32,0,35.8,0.514,21,0
+1,181,78,42,293,40.0,1.258,22,1
+1,92,62,25,41,19.5,0.482,25,0
+0,152,82,39,272,41.5,0.270,27,0
+1,111,62,13,182,24.0,0.138,23,0
+3,106,54,21,158,30.9,0.292,24,0
+3,174,58,22,194,32.9,0.593,36,1
+7,168,88,42,321,38.2,0.787,40,1
+6,105,80,28,0,32.5,0.878,26,0
+11,138,74,26,144,36.1,0.557,50,1
+3,106,72,0,0,25.8,0.207,27,0
+6,117,96,0,0,28.7,0.157,30,0
+2,68,62,13,15,20.1,0.257,23,0
+9,112,82,24,0,28.2,1.282,50,1
+0,119,0,0,0,32.4,0.141,24,1
+2,112,86,42,160,38.4,0.246,28,0
+2,92,76,20,0,24.2,1.698,28,0
+6,183,94,0,0,40.8,1.461,45,0
+0,94,70,27,115,43.5,0.347,21,0
+2,108,64,0,0,30.8,0.158,21,0
+4,90,88,47,54,37.7,0.362,29,0
+0,125,68,0,0,24.7,0.206,21,0
+0,132,78,0,0,32.4,0.393,21,0
+5,128,80,0,0,34.6,0.144,45,0
+4,94,65,22,0,24.7,0.148,21,0
+7,114,64,0,0,27.4,0.732,34,1
+0,102,78,40,90,34.5,0.238,24,0
+2,111,60,0,0,26.2,0.343,23,0
+1,128,82,17,183,27.5,0.115,22,0
+10,92,62,0,0,25.9,0.167,31,0
+13,104,72,0,0,31.2,0.465,38,1
+5,104,74,0,0,28.8,0.153,48,0
+2,94,76,18,66,31.6,0.649,23,0
+7,97,76,32,91,40.9,0.871,32,1
+1,100,74,12,46,19.5,0.149,28,0
+0,102,86,17,105,29.3,0.695,27,0
+4,128,70,0,0,34.3,0.303,24,0
+6,147,80,0,0,29.5,0.178,50,1
+4,90,0,0,0,28.0,0.610,31,0
+3,103,72,30,152,27.6,0.730,27,0
+2,157,74,35,440,39.4,0.134,30,0
+1,167,74,17,144,23.4,0.447,33,1
+0,179,50,36,159,37.8,0.455,22,1
+11,136,84,35,130,28.3,0.260,42,1
+0,107,60,25,0,26.4,0.133,23,0
+1,91,54,25,100,25.2,0.234,23,0
+1,117,60,23,106,33.8,0.466,27,0
+5,123,74,40,77,34.1,0.269,28,0
+2,120,54,0,0,26.8,0.455,27,0
+1,106,70,28,135,34.2,0.142,22,0
+2,155,52,27,540,38.7,0.240,25,1
+2,101,58,35,90,21.8,0.155,22,0
+1,120,80,48,200,38.9,1.162,41,0
+11,127,106,0,0,39.0,0.190,51,0
+3,80,82,31,70,34.2,1.292,27,1
+10,162,84,0,0,27.7,0.182,54,0
+1,199,76,43,0,42.9,1.394,22,1
+8,167,106,46,231,37.6,0.165,43,1
+9,145,80,46,130,37.9,0.637,40,1
+6,115,60,39,0,33.7,0.245,40,1
+1,112,80,45,132,34.8,0.217,24,0
+4,145,82,18,0,32.5,0.235,70,1
+10,111,70,27,0,27.5,0.141,40,1
+6,98,58,33,190,34.0,0.430,43,0
+9,154,78,30,100,30.9,0.164,45,0
+6,165,68,26,168,33.6,0.631,49,0
+1,99,58,10,0,25.4,0.551,21,0
+10,68,106,23,49,35.5,0.285,47,0
+3,123,100,35,240,57.3,0.880,22,0
+8,91,82,0,0,35.6,0.587,68,0
+6,195,70,0,0,30.9,0.328,31,1
+9,156,86,0,0,24.8,0.230,53,1
+0,93,60,0,0,35.3,0.263,25,0
+3,121,52,0,0,36.0,0.127,25,1
+2,101,58,17,265,24.2,0.614,23,0
+2,56,56,28,45,24.2,0.332,22,0
+0,162,76,36,0,49.6,0.364,26,1
+0,95,64,39,105,44.6,0.366,22,0
+4,125,80,0,0,32.3,0.536,27,1
+5,136,82,0,0,0.0,0.640,69,0
+2,129,74,26,205,33.2,0.591,25,0
+3,130,64,0,0,23.1,0.314,22,0
+1,107,50,19,0,28.3,0.181,29,0
+1,140,74,26,180,24.1,0.828,23,0
+1,144,82,46,180,46.1,0.335,46,1
+8,107,80,0,0,24.6,0.856,34,0
+13,158,114,0,0,42.3,0.257,44,1
+2,121,70,32,95,39.1,0.886,23,0
+7,129,68,49,125,38.5,0.439,43,1
+2,90,60,0,0,23.5,0.191,25,0
+7,142,90,24,480,30.4,0.128,43,1
+3,169,74,19,125,29.9,0.268,31,1
+0,99,0,0,0,25.0,0.253,22,0
+4,127,88,11,155,34.5,0.598,28,0
+4,118,70,0,0,44.5,0.904,26,0
+2,122,76,27,200,35.9,0.483,26,0
+6,125,78,31,0,27.6,0.565,49,1
+1,168,88,29,0,35.0,0.905,52,1
+2,129,0,0,0,38.5,0.304,41,0
+4,110,76,20,100,28.4,0.118,27,0
+6,80,80,36,0,39.8,0.177,28,0
+10,115,0,0,0,0.0,0.261,30,1
+2,127,46,21,335,34.4,0.176,22,0
+9,164,78,0,0,32.8,0.148,45,1
+2,93,64,32,160,38.0,0.674,23,1
+3,158,64,13,387,31.2,0.295,24,0
+5,126,78,27,22,29.6,0.439,40,0
+10,129,62,36,0,41.2,0.441,38,1
+0,134,58,20,291,26.4,0.352,21,0
+3,102,74,0,0,29.5,0.121,32,0
+7,187,50,33,392,33.9,0.826,34,1
+3,173,78,39,185,33.8,0.970,31,1
+10,94,72,18,0,23.1,0.595,56,0
+1,108,60,46,178,35.5,0.415,24,0
+5,97,76,27,0,35.6,0.378,52,1
+4,83,86,19,0,29.3,0.317,34,0
+1,114,66,36,200,38.1,0.289,21,0
+1,149,68,29,127,29.3,0.349,42,1
+5,117,86,30,105,39.1,0.251,42,0
+1,111,94,0,0,32.8,0.265,45,0
+4,112,78,40,0,39.4,0.236,38,0
+1,116,78,29,180,36.1,0.496,25,0
+0,141,84,26,0,32.4,0.433,22,0
+2,175,88,0,0,22.9,0.326,22,0
+2,92,52,0,0,30.1,0.141,22,0
+3,130,78,23,79,28.4,0.323,34,1
+8,120,86,0,0,28.4,0.259,22,1
+2,174,88,37,120,44.5,0.646,24,1
+2,106,56,27,165,29.0,0.426,22,0
+2,105,75,0,0,23.3,0.560,53,0
+4,95,60,32,0,35.4,0.284,28,0
+0,126,86,27,120,27.4,0.515,21,0
+8,65,72,23,0,32.0,0.600,42,0
+2,99,60,17,160,36.6,0.453,21,0
+1,102,74,0,0,39.5,0.293,42,1
+11,120,80,37,150,42.3,0.785,48,1
+3,102,44,20,94,30.8,0.400,26,0
+1,109,58,18,116,28.5,0.219,22,0
+9,140,94,0,0,32.7,0.734,45,1
+13,153,88,37,140,40.6,1.174,39,0
+12,100,84,33,105,30.0,0.488,46,0
+1,147,94,41,0,49.3,0.358,27,1
+1,81,74,41,57,46.3,1.096,32,0
+3,187,70,22,200,36.4,0.408,36,1
+6,162,62,0,0,24.3,0.178,50,1
+4,136,70,0,0,31.2,1.182,22,1
+1,121,78,39,74,39.0,0.261,28,0
+3,108,62,24,0,26.0,0.223,25,0
+0,181,88,44,510,43.3,0.222,26,1
+8,154,78,32,0,32.4,0.443,45,1
+1,128,88,39,110,36.5,1.057,37,1
+7,137,90,41,0,32.0,0.391,39,0
+0,123,72,0,0,36.3,0.258,52,1
+1,106,76,0,0,37.5,0.197,26,0
+6,190,92,0,0,35.5,0.278,66,1
+2,88,58,26,16,28.4,0.766,22,0
+9,170,74,31,0,44.0,0.403,43,1
+9,89,62,0,0,22.5,0.142,33,0
+10,101,76,48,180,32.9,0.171,63,0
+2,122,70,27,0,36.8,0.340,27,0
+5,121,72,23,112,26.2,0.245,30,0
+1,126,60,0,0,30.1,0.349,47,1
+1,93,70,31,0,30.4,0.315,23,0

data_cache/hepatology_liver.csv ADDED Viewed

	@@ -0,0 +1,583 @@

+65,Female,0.7,0.1,187,16,18,6.8,3.3,0.9,1
+62,Male,10.9,5.5,699,64,100,7.5,3.2,0.74,1
+62,Male,7.3,4.1,490,60,68,7,3.3,0.89,1
+58,Male,1,0.4,182,14,20,6.8,3.4,1,1
+72,Male,3.9,2,195,27,59,7.3,2.4,0.4,1
+46,Male,1.8,0.7,208,19,14,7.6,4.4,1.3,1
+26,Female,0.9,0.2,154,16,12,7,3.5,1,1
+29,Female,0.9,0.3,202,14,11,6.7,3.6,1.1,1
+17,Male,0.9,0.3,202,22,19,7.4,4.1,1.2,2
+55,Male,0.7,0.2,290,53,58,6.8,3.4,1,1
+57,Male,0.6,0.1,210,51,59,5.9,2.7,0.8,1
+72,Male,2.7,1.3,260,31,56,7.4,3,0.6,1
+64,Male,0.9,0.3,310,61,58,7,3.4,0.9,2
+74,Female,1.1,0.4,214,22,30,8.1,4.1,1,1
+61,Male,0.7,0.2,145,53,41,5.8,2.7,0.87,1
+25,Male,0.6,0.1,183,91,53,5.5,2.3,0.7,2
+38,Male,1.8,0.8,342,168,441,7.6,4.4,1.3,1
+33,Male,1.6,0.5,165,15,23,7.3,3.5,0.92,2
+40,Female,0.9,0.3,293,232,245,6.8,3.1,0.8,1
+40,Female,0.9,0.3,293,232,245,6.8,3.1,0.8,1
+51,Male,2.2,1,610,17,28,7.3,2.6,0.55,1
+51,Male,2.9,1.3,482,22,34,7,2.4,0.5,1
+62,Male,6.8,3,542,116,66,6.4,3.1,0.9,1
+40,Male,1.9,1,231,16,55,4.3,1.6,0.6,1
+63,Male,0.9,0.2,194,52,45,6,3.9,1.85,2
+34,Male,4.1,2,289,875,731,5,2.7,1.1,1
+34,Male,4.1,2,289,875,731,5,2.7,1.1,1
+34,Male,6.2,3,240,1680,850,7.2,4,1.2,1
+20,Male,1.1,0.5,128,20,30,3.9,1.9,0.95,2
+84,Female,0.7,0.2,188,13,21,6,3.2,1.1,2
+57,Male,4,1.9,190,45,111,5.2,1.5,0.4,1
+52,Male,0.9,0.2,156,35,44,4.9,2.9,1.4,1
+57,Male,1,0.3,187,19,23,5.2,2.9,1.2,2
+38,Female,2.6,1.2,410,59,57,5.6,3,0.8,2
+38,Female,2.6,1.2,410,59,57,5.6,3,0.8,2
+30,Male,1.3,0.4,482,102,80,6.9,3.3,0.9,1
+17,Female,0.7,0.2,145,18,36,7.2,3.9,1.18,2
+46,Female,14.2,7.8,374,38,77,4.3,2,0.8,1
+48,Male,1.4,0.6,263,38,66,5.8,2.2,0.61,1
+47,Male,2.7,1.3,275,123,73,6.2,3.3,1.1,1
+45,Male,2.4,1.1,168,33,50,5.1,2.6,1,1
+62,Male,0.6,0.1,160,42,110,4.9,2.6,1.1,2
+42,Male,6.8,3.2,630,25,47,6.1,2.3,0.6,2
+50,Male,2.6,1.2,415,407,576,6.4,3.2,1,1
+85,Female,1,0.3,208,17,15,7,3.6,1,2
+35,Male,1.8,0.6,275,48,178,6.5,3.2,0.9,2
+21,Male,3.9,1.8,150,36,27,6.8,3.9,1.34,1
+40,Male,1.1,0.3,230,1630,960,4.9,2.8,1.3,1
+32,Female,0.6,0.1,176,39,28,6,3,1,1
+55,Male,18.4,8.8,206,64,178,6.2,1.8,0.4,1
+45,Female,0.7,0.2,170,21,14,5.7,2.5,0.7,1
+34,Female,0.6,0.1,161,15,19,6.6,3.4,1,1
+38,Male,3.1,1.6,253,80,406,6.8,3.9,1.3,1
+38,Male,1.1,0.3,198,86,150,6.3,3.5,1.2,1
+42,Male,8.9,4.5,272,31,61,5.8,2,0.5,1
+42,Male,8.9,4.5,272,31,61,5.8,2,0.5,1
+33,Male,0.8,0.2,198,26,23,8,4,1,2
+48,Female,0.9,0.2,175,24,54,5.5,2.7,0.9,2
+51,Male,0.8,0.2,367,42,18,5.2,2,0.6,1
+64,Male,1.1,0.5,145,20,24,5.5,3.2,1.39,2
+31,Female,0.8,0.2,158,21,16,6,3,1,1
+58,Male,1,0.5,158,37,43,7.2,3.6,1,1
+58,Male,1,0.5,158,37,43,7.2,3.6,1,1
+57,Male,0.7,0.2,208,35,97,5.1,2.1,0.7,1
+57,Male,1.3,0.4,259,40,86,6.5,2.5,0.6,1
+57,Male,1.4,0.7,470,62,88,5.6,2.5,0.8,1
+54,Male,2.2,1.2,195,55,95,6,3.7,1.6,1
+37,Male,1.8,0.8,215,53,58,6.4,3.8,1.4,1
+66,Male,0.7,0.2,239,27,26,6.3,3.7,1.4,1
+60,Male,0.8,0.2,215,24,17,6.3,3,0.9,2
+19,Female,0.7,0.2,186,166,397,5.5,3,1.2,1
+75,Female,0.8,0.2,188,20,29,4.4,1.8,0.6,1
+75,Female,0.8,0.2,205,27,24,4.4,2,0.8,1
+52,Male,0.6,0.1,171,22,16,6.6,3.6,1.2,1
+68,Male,0.7,0.1,145,20,22,5.8,2.9,1,1
+29,Female,0.7,0.1,162,52,41,5.2,2.5,0.9,2
+31,Male,0.9,0.2,518,189,17,5.3,2.3,0.7,1
+68,Female,0.6,0.1,1620,95,127,4.6,2.1,0.8,1
+70,Male,1.4,0.6,146,12,24,6.2,3.8,1.58,2
+58,Female,2.8,1.3,670,48,79,4.7,1.6,0.5,1
+58,Female,2.4,1.1,915,60,142,4.7,1.8,0.6,1
+29,Male,1,0.3,75,25,26,5.1,2.9,1.3,1
+49,Male,0.7,0.1,148,14,12,5.4,2.8,1,2
+33,Male,2,1,258,194,152,5.4,3,1.25,1
+32,Male,0.6,0.1,237,45,31,7.5,4.3,1.34,1
+14,Male,1.4,0.5,269,58,45,6.7,3.9,1.4,1
+13,Male,0.6,0.1,320,28,56,7.2,3.6,1,2
+58,Male,0.8,0.2,298,33,59,6.2,3.1,1,1
+18,Male,0.6,0.2,538,33,34,7.5,3.2,0.7,1
+60,Male,4,1.9,238,119,350,7.1,3.3,0.8,1
+60,Male,5.7,2.8,214,412,850,7.3,3.2,0.78,1
+60,Male,6.8,3.2,308,404,794,6.8,3,0.7,1
+60,Male,8.6,4,298,412,850,7.4,3,0.6,1
+60,Male,5.8,2.7,204,220,400,7,3,0.7,1
+60,Male,5.2,2.4,168,126,202,6.8,2.9,0.7,1
+75,Male,0.9,0.2,282,25,23,4.4,2.2,1,1
+39,Male,3.8,1.5,298,102,630,7.1,3.3,0.8,1
+39,Male,6.6,3,215,190,950,4,1.7,0.7,1
+18,Male,0.6,0.1,265,97,161,5.9,3.1,1.1,1
+18,Male,0.7,0.1,312,308,405,6.9,3.7,1.1,1
+27,Male,0.6,0.2,161,27,28,3.7,1.6,0.76,2
+27,Male,0.7,0.2,243,21,23,5.3,2.3,0.7,2
+17,Male,0.9,0.2,224,36,45,6.9,4.2,1.55,1
+55,Female,0.8,0.2,225,14,23,6.1,3.3,1.2,2
+63,Male,0.5,0.1,170,21,28,5.5,2.5,0.8,1
+36,Male,5.3,2.3,145,32,92,5.1,2.6,1,2
+36,Male,5.3,2.3,145,32,92,5.1,2.6,1,2
+36,Male,0.8,0.2,158,29,39,6,2.2,0.5,2
+36,Male,0.8,0.2,158,29,39,6,2.2,0.5,2
+36,Male,0.9,0.1,486,25,34,5.9,2.8,0.9,2
+24,Female,0.7,0.2,188,11,10,5.5,2.3,0.71,2
+48,Male,3.2,1.6,257,33,116,5.7,2.2,0.62,1
+27,Male,1.2,0.4,179,63,39,6.1,3.3,1.1,2
+74,Male,0.6,0.1,272,24,98,5,2,0.6,1
+50,Male,5.8,3,661,181,285,5.7,2.3,0.67,2
+50,Male,7.3,3.6,1580,88,64,5.6,2.3,0.6,2
+48,Male,0.7,0.1,1630,74,149,5.3,2,0.6,1
+32,Male,12.7,6.2,194,2000,2946,5.7,3.3,1.3,1
+32,Male,15.9,7,280,1350,1600,5.6,2.8,1,1
+32,Male,18,8.2,298,1250,1050,5.4,2.6,0.9,1
+32,Male,23,11.3,300,482,275,7.1,3.5,0.9,1
+32,Male,22.7,10.2,290,322,113,6.6,2.8,0.7,1
+58,Male,1.7,0.8,188,60,84,5.9,3.5,1.4,2
+64,Female,0.8,0.2,178,17,18,6.3,3.1,0.9,1
+28,Male,0.6,0.1,177,36,29,6.9,4.1,1.4,2
+60,Male,1.8,0.5,201,45,25,3.9,1.7,0.7,2
+48,Male,5.8,2.5,802,133,88,6,2.8,0.8,1
+64,Male,3,1.4,248,46,40,6.5,3.2,0.9,1
+58,Female,1.7,0.8,1896,61,83,8,3.9,0.95,1
+45,Male,2.8,1.7,263,57,65,5.1,2.3,0.8,1
+45,Male,3.2,1.4,512,50,58,6,2.7,0.8,1
+70,Female,0.7,0.2,237,18,28,5.8,2.5,0.75,2
+18,Female,0.8,0.2,199,34,31,6.5,3.5,1.16,2
+53,Male,0.9,0.4,238,17,14,6.6,2.9,0.8,1
+18,Male,1.8,0.7,178,35,36,6.8,3.6,1.1,1
+66,Male,11.3,5.6,1110,1250,4929,7,2.4,0.5,1
+46,Female,4.7,2.2,310,62,90,6.4,2.5,0.6,1
+18,Male,0.8,0.2,282,72,140,5.5,2.5,0.8,1
+18,Male,0.8,0.2,282,72,140,5.5,2.5,0.8,1
+15,Male,0.8,0.2,380,25,66,6.1,3.7,1.5,1
+60,Male,0.6,0.1,186,20,21,6.2,3.3,1.1,2
+66,Female,4.2,2.1,159,15,30,7.1,2.2,0.4,1
+30,Male,1.6,0.4,332,84,139,5.6,2.7,0.9,1
+30,Male,1.6,0.4,332,84,139,5.6,2.7,0.9,1
+45,Female,3.5,1.5,189,63,87,5.6,2.9,1,1
+65,Male,0.8,0.2,201,18,22,5.4,2.9,1.1,2
+66,Female,2.9,1.3,168,21,38,5.5,1.8,0.4,1
+65,Male,0.7,0.1,392,20,30,5.3,2.8,1.1,1
+50,Male,0.9,0.2,202,20,26,7.2,4.5,1.66,1
+60,Male,0.8,0.2,286,21,27,7.1,4,1.2,1
+56,Male,1.1,0.5,180,30,42,6.9,3.8,1.2,2
+50,Male,1.6,0.8,218,18,20,5.9,2.9,0.96,1
+46,Female,0.8,0.2,182,20,40,6,2.9,0.9,1
+52,Male,0.6,0.1,178,26,27,6.5,3.6,1.2,2
+34,Male,5.9,2.5,290,45,233,5.6,2.7,0.9,1
+34,Male,8.7,4,298,58,138,5.8,2.4,0.7,1
+32,Male,0.9,0.3,462,70,82,6.2,3.1,1,1
+72,Male,0.7,0.1,196,20,35,5.8,2,0.5,1
+72,Male,0.7,0.1,196,20,35,5.8,2,0.5,1
+50,Male,1.2,0.4,282,36,32,7.2,3.9,1.1,1
+60,Male,11,4.9,750,140,350,5.5,2.1,0.6,1
+60,Male,11.5,5,1050,99,187,6.2,2.8,0.8,1
+60,Male,5.8,2.7,599,43,66,5.4,1.8,0.5,1
+39,Male,1.9,0.9,180,42,62,7.4,4.3,1.38,1
+39,Male,1.9,0.9,180,42,62,7.4,4.3,1.38,1
+48,Male,4.5,2.3,282,13,74,7,2.4,0.52,1
+55,Male,75,3.6,332,40,66,6.2,2.5,0.6,1
+47,Female,3,1.5,292,64,67,5.6,1.8,0.47,1
+60,Male,22.8,12.6,962,53,41,6.9,3.3,0.9,1
+60,Male,8.9,4,950,33,32,6.8,3.1,0.8,1
+72,Male,1.7,0.8,200,28,37,6.2,3,0.93,1
+44,Female,1.9,0.6,298,378,602,6.6,3.3,1,1
+55,Male,14.1,7.6,750,35,63,5,1.6,0.47,1
+31,Male,0.6,0.1,175,48,34,6,3.7,1.6,1
+31,Male,0.6,0.1,175,48,34,6,3.7,1.6,1
+31,Male,0.8,0.2,198,43,31,7.3,4,1.2,1
+55,Male,0.8,0.2,482,112,99,5.7,2.6,0.8,1
+75,Male,14.8,9,1020,71,42,5.3,2.2,0.7,1
+75,Male,10.6,5,562,37,29,5.1,1.8,0.5,1
+75,Male,8,4.6,386,30,25,5.5,1.8,0.48,1
+75,Male,2.8,1.3,250,23,29,2.7,0.9,0.5,1
+75,Male,2.9,1.3,218,33,37,3,1.5,1,1
+65,Male,1.9,0.8,170,36,43,3.8,1.4,0.58,2
+40,Male,0.6,0.1,171,20,17,5.4,2.5,0.8,1
+64,Male,1.1,0.4,201,18,19,6.9,4.1,1.4,1
+38,Male,1.5,0.4,298,60,103,6,3,1,2
+60,Male,3.2,1.8,750,79,145,7.8,3.2,0.69,1
+60,Male,2.1,1,191,114,247,4,1.6,0.6,1
+60,Male,1.9,0.8,614,42,38,4.5,1.8,0.6,1
+48,Female,0.8,0.2,218,32,28,5.2,2.5,0.9,2
+60,Male,6.3,3.2,314,118,114,6.6,3.7,1.27,1
+60,Male,5.8,3,257,107,104,6.6,3.5,1.12,1
+60,Male,2.3,0.6,272,79,51,6.6,3.5,1.1,1
+49,Male,1.3,0.4,206,30,25,6,3.1,1.06,2
+49,Male,2,0.6,209,48,32,5.7,3,1.1,2
+60,Male,2.4,1,1124,30,54,5.2,1.9,0.5,1
+60,Male,2,1.1,664,52,104,6,2.1,0.53,1
+26,Female,0.6,0.2,142,12,32,5.7,2.4,0.75,1
+41,Male,0.9,0.2,169,22,18,6.1,3,0.9,2
+7,Female,27.2,11.8,1420,790,1050,6.1,2,0.4,1
+49,Male,0.6,0.1,218,50,53,5,2.4,0.9,1
+49,Male,0.6,0.1,218,50,53,5,2.4,0.9,1
+38,Female,0.8,0.2,145,19,23,6.1,3.1,1.03,2
+21,Male,1,0.3,142,27,21,6.4,3.5,1.2,2
+21,Male,0.7,0.2,135,27,26,6.4,3.3,1,2
+45,Male,2.5,1.2,163,28,22,7.6,4,1.1,1
+40,Male,3.6,1.8,285,50,60,7,2.9,0.7,1
+40,Male,3.9,1.7,350,950,1500,6.7,3.8,1.3,1
+70,Female,0.9,0.3,220,53,95,6.1,2.8,0.68,1
+45,Female,0.9,0.3,189,23,33,6.6,3.9,,1
+28,Male,0.8,0.3,190,20,14,4.1,2.4,1.4,1
+42,Male,2.7,1.3,219,60,180,7,3.2,0.8,1
+22,Male,2.7,1,160,82,127,5.5,3.1,1.2,2
+8,Female,0.9,0.2,401,25,58,7.5,3.4,0.8,1
+38,Male,1.7,1,180,18,34,7.2,3.6,1,1
+66,Male,0.6,0.2,100,17,148,5,3.3,1.9,2
+55,Male,0.9,0.2,116,36,16,6.2,3.2,1,2
+49,Male,1.1,0.5,159,30,31,7,4.3,1.5,1
+6,Male,0.6,0.1,289,38,30,4.8,2,0.7,2
+37,Male,0.8,0.2,125,41,39,6.4,3.4,1.1,1
+37,Male,0.8,0.2,147,27,46,5,2.5,1,1
+47,Male,0.9,0.2,192,38,24,7.3,4.3,1.4,1
+47,Male,0.9,0.2,265,40,28,8,4,1,1
+50,Male,1.1,0.3,175,20,19,7.1,4.5,1.7,2
+70,Male,1.7,0.5,400,56,44,5.7,3.1,1.1,1
+26,Male,0.6,0.2,120,45,51,7.9,4,1,1
+26,Male,1.3,0.4,173,38,62,8,4,1,1
+68,Female,0.7,0.2,186,18,15,6.4,3.8,1.4,1
+65,Female,1,0.3,202,26,13,5.3,2.6,0.9,2
+46,Male,0.6,0.2,290,26,21,6,3,1,1
+61,Male,1.5,0.6,196,61,85,6.7,3.8,1.3,2
+61,Male,0.8,0.1,282,85,231,8.5,4.3,1,1
+50,Male,2.7,1.6,157,149,156,7.9,3.1,0.6,1
+33,Male,2,1.4,2110,48,89,6.2,3,0.9,1
+40,Female,0.9,0.2,285,32,27,7.7,3.5,0.8,1
+60,Male,1.5,0.6,360,230,298,4.5,2,0.8,1
+22,Male,0.8,0.2,300,57,40,7.9,3.8,0.9,2
+35,Female,0.9,0.3,158,20,16,8,4,1,1
+35,Female,0.9,0.2,190,40,35,7.3,4.7,1.8,2
+40,Male,0.9,0.3,196,69,48,6.8,3.1,0.8,1
+48,Male,0.7,0.2,165,32,30,8,4,1,2
+51,Male,0.8,0.2,230,24,46,6.5,3.1,,1
+29,Female,0.8,0.2,205,30,23,8.2,4.1,1,1
+28,Female,0.9,0.2,316,25,23,8.5,5.5,1.8,1
+54,Male,0.8,0.2,218,20,19,6.3,2.5,0.6,1
+54,Male,0.9,0.2,290,15,18,6.1,2.8,0.8,1
+55,Male,1.8,9,272,22,79,6.1,2.7,0.7,1
+55,Male,0.9,0.2,190,25,28,5.9,2.7,0.8,1
+40,Male,0.7,0.1,202,37,29,5,2.6,1,1
+33,Male,1.2,0.3,498,28,25,7,3,0.7,1
+33,Male,2.1,1.3,480,38,22,6.5,3,0.8,1
+33,Male,0.9,0.8,680,37,40,5.9,2.6,0.8,1
+65,Male,1.1,0.3,258,48,40,7,3.9,1.2,2
+35,Female,0.6,0.2,180,12,15,5.2,2.7,,2
+38,Female,0.7,0.1,152,90,21,7.1,4.2,1.4,2
+38,Male,1.7,0.7,859,89,48,6,3,1,1
+50,Male,0.9,0.3,901,23,17,6.2,3.5,1.2,1
+44,Male,0.8,0.2,335,148,86,5.6,3,1.1,1
+36,Male,0.8,0.2,182,31,34,6.4,3.8,1.4,2
+42,Male,30.5,14.2,285,65,130,5.2,2.1,0.6,1
+42,Male,16.4,8.9,245,56,87,5.4,2,0.5,1
+33,Male,1.5,7,505,205,140,7.5,3.9,1,1
+18,Male,0.8,0.2,228,55,54,6.9,4,1.3,1
+38,Female,0.8,0.2,185,25,21,7,3,0.7,1
+38,Male,0.8,0.2,247,55,92,7.4,4.3,1.38,2
+4,Male,0.9,0.2,348,30,34,8,4,1,2
+62,Male,1.2,0.4,195,38,54,6.3,3.8,1.5,1
+43,Female,0.9,0.3,140,12,29,7.4,3.5,1.8,1
+40,Male,14.5,6.4,358,50,75,5.7,2.1,0.5,1
+26,Male,0.6,0.1,110,15,20,2.8,1.6,1.3,1
+37,Male,0.7,0.2,235,96,54,9.5,4.9,1,1
+4,Male,0.8,0.2,460,152,231,6.5,3.2,0.9,2
+21,Male,18.5,9.5,380,390,500,8.2,4.1,1,1
+30,Male,0.7,0.2,262,15,18,9.6,4.7,1.2,1
+33,Male,1.8,0.8,196,25,22,8,4,1,1
+26,Male,1.9,0.8,180,22,19,8.2,4.1,1,2
+35,Male,0.9,0.2,190,25,20,6.4,3.6,1.2,2
+60,Male,2,0.8,190,45,40,6,2.8,0.8,1
+45,Male,2.2,0.8,209,25,20,8,4,1,1
+48,Female,1,1.4,144,18,14,8.3,4.2,1,1
+58,Male,0.8,0.2,123,56,48,6,3,1,1
+50,Male,0.7,0.2,192,18,15,7.4,4.2,1.3,2
+50,Male,0.7,0.2,188,12,14,7,3.4,0.9,1
+18,Male,1.3,0.7,316,10,21,6,2.1,0.5,2
+18,Male,0.9,0.3,300,30,48,8,4,1,1
+13,Male,1.5,0.5,575,29,24,7.9,3.9,0.9,1
+34,Female,0.8,0.2,192,15,12,8.6,4.7,1.2,1
+43,Male,1.3,0.6,155,15,20,8,4,1,2
+50,Female,1,0.5,239,16,39,7.5,3.7,0.9,1
+57,Male,4.5,2.3,315,120,105,7,4,1.3,1
+45,Female,1,0.3,250,48,44,8.6,4.3,1,1
+60,Male,0.7,0.2,174,32,14,7.8,4.2,1.1,2
+45,Male,0.6,0.2,245,22,24,7.1,3.4,0.9,1
+23,Male,1.1,0.5,191,37,41,7.7,4.3,1.2,2
+22,Male,2.4,1,340,25,21,8.3,4.5,1.1,1
+22,Male,0.6,0.2,202,78,41,8,3.9,0.9,1
+74,Female,0.9,0.3,234,16,19,7.9,4,1,1
+25,Female,0.9,0.3,159,24,25,6.9,4.4,1.7,2
+31,Female,1.1,0.3,190,26,15,7.9,3.8,0.9,1
+24,Female,0.9,0.2,195,40,35,7.4,4.1,1.2,2
+58,Male,0.8,0.2,180,32,25,8.2,4.4,1.1,2
+51,Female,0.9,0.2,280,21,30,6.7,3.2,0.8,1
+50,Female,1.7,0.6,430,28,32,6.8,3.5,1,1
+50,Male,0.7,0.2,206,18,17,8.4,4.2,1,2
+55,Female,0.8,0.2,155,21,17,6.9,3.8,1.4,1
+54,Female,1.4,0.7,195,36,16,7.9,3.7,0.9,2
+48,Male,1.6,1,588,74,113,7.3,2.4,0.4,1
+30,Male,0.8,0.2,174,21,47,4.6,2.3,1,1
+45,Female,0.8,0.2,165,22,18,8.2,4.1,1,1
+48,Female,1.1,0.7,527,178,250,8,4.2,1.1,1
+51,Male,0.8,0.2,175,48,22,8.1,4.6,1.3,1
+54,Female,23.2,12.6,574,43,47,7.2,3.5,0.9,1
+27,Male,1.3,0.6,106,25,54,8.5,4.8,,2
+30,Female,0.8,0.2,158,25,22,7.9,4.5,1.3,2
+26,Male,2,0.9,195,24,65,7.8,4.3,1.2,1
+22,Male,0.9,0.3,179,18,21,6.7,3.7,1.2,2
+44,Male,0.9,0.2,182,29,82,7.1,3.7,1,2
+35,Male,0.7,0.2,198,42,30,6.8,3.4,1,1
+38,Male,3.7,2.2,216,179,232,7.8,4.5,1.3,1
+14,Male,0.9,0.3,310,21,16,8.1,4.2,1,2
+30,Female,0.7,0.2,63,31,27,5.8,3.4,1.4,1
+30,Female,0.8,0.2,198,30,58,5.2,2.8,1.1,1
+36,Male,1.7,0.5,205,36,34,7.1,3.9,1.2,1
+12,Male,0.8,0.2,302,47,67,6.7,3.5,1.1,2
+60,Male,2.6,1.2,171,42,37,5.4,2.7,1,1
+42,Male,0.8,0.2,158,27,23,6.7,3.1,0.8,2
+36,Female,1.2,0.4,358,160,90,8.3,4.4,1.1,2
+24,Male,3.3,1.6,174,11,33,7.6,3.9,1,2
+43,Male,0.8,0.2,192,29,20,6,2.9,0.9,2
+21,Male,0.7,0.2,211,14,23,7.3,4.1,1.2,2
+26,Male,2,0.9,157,54,68,6.1,2.7,0.8,1
+26,Male,1.7,0.6,210,62,56,5.4,2.2,0.6,1
+26,Male,7.1,3.3,258,80,113,6.2,2.9,0.8,1
+36,Female,0.7,0.2,152,21,25,5.9,3.1,1.1,2
+13,Female,0.7,0.2,350,17,24,7.4,4,1.1,1
+13,Female,0.7,0.1,182,24,19,8.9,4.9,1.2,1
+75,Male,6.7,3.6,458,198,143,6.2,3.2,1,1
+75,Male,2.5,1.2,375,85,68,6.4,2.9,0.8,1
+75,Male,1.8,0.8,405,79,50,6.1,2.9,0.9,1
+75,Male,1.4,0.4,215,50,30,5.9,2.6,0.7,1
+75,Male,0.9,0.2,206,44,33,6.2,2.9,0.8,1
+36,Female,0.8,0.2,650,70,138,6.6,3.1,0.8,1
+35,Male,0.8,0.2,198,36,32,7,4,1.3,2
+70,Male,3.1,1.6,198,40,28,5.6,2,0.5,1
+37,Male,0.8,0.2,195,60,40,8.2,5,1.5,2
+60,Male,2.9,1.3,230,32,44,5.6,2,0.5,1
+46,Male,0.6,0.2,115,14,11,6.9,3.4,0.9,1
+38,Male,0.7,0.2,216,349,105,7,3.5,1,1
+70,Male,1.3,0.4,358,19,14,6.1,2.8,0.8,1
+49,Female,0.8,0.2,158,19,15,6.6,3.6,1.2,2
+37,Male,1.8,0.8,145,62,58,5.7,2.9,1,1
+37,Male,1.3,0.4,195,41,38,5.3,2.1,0.6,1
+26,Female,0.7,0.2,144,36,33,8.2,4.3,1.1,1
+48,Female,1.4,0.8,621,110,176,7.2,3.9,1.1,1
+48,Female,0.8,0.2,150,25,23,7.5,3.9,1,1
+19,Male,1.4,0.8,178,13,26,8,4.6,1.3,2
+33,Male,0.7,0.2,256,21,30,8.5,3.9,0.8,1
+33,Male,2.1,0.7,205,50,38,6.8,3,0.7,1
+37,Male,0.7,0.2,176,28,34,5.6,2.6,0.8,1
+69,Female,0.8,0.2,146,42,70,8.4,4.9,1.4,2
+24,Male,0.7,0.2,218,47,26,6.6,3.3,1,1
+65,Female,0.7,0.2,182,23,28,6.8,2.9,0.7,2
+55,Male,1.1,0.3,215,21,15,6.2,2.9,0.8,2
+42,Female,0.9,0.2,165,26,29,8.5,4.4,1,2
+21,Male,0.8,0.2,183,33,57,6.8,3.5,1,2
+40,Male,0.7,0.2,176,28,43,5.3,2.4,0.8,2
+16,Male,0.7,0.2,418,28,35,7.2,4.1,1.3,2
+60,Male,2.2,1,271,45,52,6.1,2.9,0.9,2
+42,Female,0.8,0.2,182,22,20,7.2,3.9,1.1,1
+58,Female,0.8,0.2,130,24,25,7,4,1.3,1
+54,Female,22.6,11.4,558,30,37,7.8,3.4,0.8,1
+33,Male,0.8,0.2,135,30,29,7.2,4.4,1.5,2
+48,Male,0.7,0.2,326,29,17,8.7,5.5,1.7,1
+25,Female,0.7,0.1,140,32,25,7.6,4.3,1.3,2
+56,Female,0.7,0.1,145,26,23,7,4,1.3,2
+47,Male,3.5,1.6,206,32,31,6.8,3.4,1,1
+33,Male,0.7,0.1,168,35,33,7,3.7,1.1,1
+20,Female,0.6,0.2,202,12,13,6.1,3,0.9,2
+50,Female,0.7,0.1,192,20,41,7.3,3.3,0.8,1
+72,Male,0.7,0.2,185,16,22,7.3,3.7,1,2
+50,Male,1.7,0.8,331,36,53,7.3,3.4,0.9,1
+39,Male,0.6,0.2,188,28,43,8.1,3.3,0.6,1
+58,Female,0.7,0.1,172,27,22,6.7,3.2,0.9,1
+60,Female,1.4,0.7,159,10,12,4.9,2.5,1,2
+34,Male,3.7,2.1,490,115,91,6.5,2.8,0.7,1
+50,Male,0.8,0.2,152,29,30,7.4,4.1,1.3,1
+38,Male,2.7,1.4,105,25,21,7.5,4.2,1.2,2
+51,Male,0.8,0.2,160,34,20,6.9,3.7,1.1,1
+46,Male,0.8,0.2,160,31,40,7.3,3.8,1.1,1
+72,Male,0.6,0.1,102,31,35,6.3,3.2,1,1
+72,Male,0.8,0.2,148,23,35,6,3,1,1
+75,Male,0.9,0.2,162,25,20,6.9,3.7,1.1,1
+41,Male,7.5,4.3,149,94,92,6.3,3.1,0.9,1
+41,Male,2.7,1.3,580,142,68,8,4,1,1
+48,Female,1,0.3,310,37,56,5.9,2.5,0.7,1
+45,Male,0.8,0.2,140,24,20,6.3,3.2,1,2
+74,Male,1,0.3,175,30,32,6.4,3.4,1.1,1
+78,Male,1,0.3,152,28,70,6.3,3.1,0.9,1
+38,Male,0.8,0.2,208,25,50,7.1,3.7,1,1
+27,Male,1,0.2,205,137,145,6,3,1,1
+66,Female,0.7,0.2,162,24,20,6.4,3.2,1,2
+50,Male,7.3,3.7,92,44,236,6.8,1.6,0.3,1
+42,Female,0.5,0.1,162,155,108,8.1,4,0.9,1
+65,Male,0.7,0.2,199,19,22,6.3,3.6,1.3,2
+22,Male,0.8,0.2,198,20,26,6.8,3.9,1.3,1
+31,Female,0.8,0.2,215,15,21,7.6,4,1.1,1
+45,Male,0.7,0.2,180,18,58,6.7,3.7,1.2,2
+12,Male,1,0.2,719,157,108,7.2,3.7,1,1
+48,Male,2.4,1.1,554,141,73,7.5,3.6,0.9,1
+48,Male,5,2.6,555,284,190,6.5,3.3,1,1
+18,Male,1.4,0.6,215,440,850,5,1.9,0.6,1
+23,Female,2.3,0.8,509,28,44,6.9,2.9,0.7,2
+65,Male,4.9,2.7,190,33,71,7.1,2.9,0.7,1
+48,Male,0.7,0.2,208,15,30,4.6,2.1,0.8,2
+65,Male,1.4,0.6,260,28,24,5.2,2.2,0.7,2
+70,Male,1.3,0.3,690,93,40,3.6,2.7,0.7,1
+70,Male,0.6,0.1,862,76,180,6.3,2.7,0.75,1
+11,Male,0.7,0.1,592,26,29,7.1,4.2,1.4,2
+50,Male,4.2,2.3,450,69,50,7,3,0.7,1
+55,Female,8.2,3.9,1350,52,65,6.7,2.9,0.7,1
+55,Female,10.9,5.1,1350,48,57,6.4,2.3,0.5,1
+26,Male,1,0.3,163,48,71,7.1,3.7,1,2
+41,Male,1.2,0.5,246,34,42,6.9,3.4,0.97,1
+53,Male,1.6,0.9,178,44,59,6.5,3.9,1.5,2
+32,Female,0.7,0.1,240,12,15,7,3,0.7,1
+58,Male,0.4,0.1,100,59,126,4.3,2.5,1.4,1
+45,Male,1.3,0.6,166,49,42,5.6,2.5,0.8,2
+65,Male,0.9,0.2,170,33,66,7,3,0.75,1
+52,Female,0.6,0.1,194,10,12,6.9,3.3,0.9,2
+73,Male,1.9,0.7,1750,102,141,5.5,2,0.5,1
+53,Female,0.7,0.1,182,20,33,4.8,1.9,0.6,1
+47,Female,0.8,0.2,236,10,13,6.7,2.9,0.76,2
+29,Male,0.7,0.2,165,55,87,7.5,4.6,1.58,1
+41,Female,0.9,0.2,201,31,24,7.6,3.8,1,2
+30,Female,0.7,0.2,194,32,36,7.5,3.6,0.92,2
+17,Female,0.5,0.1,206,28,21,7.1,4.5,1.7,2
+23,Male,1,0.3,212,41,80,6.2,3.1,1,1
+35,Male,1.6,0.7,157,15,44,5.2,2.5,0.9,1
+65,Male,0.8,0.2,162,30,90,3.8,1.4,0.5,1
+42,Female,0.8,0.2,168,25,18,6.2,3.1,1,1
+49,Female,0.8,0.2,198,23,20,7,4.3,1.5,1
+42,Female,2.3,1.1,292,29,39,4.1,1.8,0.7,1
+42,Female,7.4,3.6,298,52,102,4.6,1.9,0.7,1
+42,Female,0.7,0.2,152,35,81,6.2,3.2,1.06,1
+61,Male,0.8,0.2,163,18,19,6.3,2.8,0.8,2
+17,Male,0.9,0.2,279,40,46,7.3,4,1.2,2
+54,Male,0.8,0.2,181,35,20,5.5,2.7,0.96,1
+45,Female,23.3,12.8,1550,425,511,7.7,3.5,0.8,1
+48,Female,0.8,0.2,142,26,25,6,2.6,0.7,1
+48,Female,0.9,0.2,173,26,27,6.2,3.1,1,1
+65,Male,7.9,4.3,282,50,72,6,3,1,1
+35,Male,0.8,0.2,279,20,25,7.2,3.2,0.8,1
+58,Male,0.9,0.2,1100,25,36,7.1,3.5,0.9,1
+46,Male,0.7,0.2,224,40,23,7.1,3,0.7,1
+28,Male,0.6,0.2,159,15,16,7,3.5,1,2
+21,Female,0.6,0.1,186,25,22,6.8,3.4,1,1
+32,Male,0.7,0.2,189,22,43,7.4,3.1,0.7,2
+61,Male,0.8,0.2,192,28,35,6.9,3.4,0.9,2
+26,Male,6.8,3.2,140,37,19,3.6,0.9,0.3,1
+65,Male,1.1,0.5,686,16,46,5.7,1.5,0.35,1
+22,Female,2.2,1,215,159,51,5.5,2.5,0.8,1
+28,Female,0.8,0.2,309,55,23,6.8,4.1,1.51,1
+38,Male,0.7,0.2,110,22,18,6.4,2.5,0.64,1
+25,Male,0.8,0.1,130,23,42,8,4,1,1
+45,Female,0.7,0.2,164,21,53,4.5,1.4,0.45,2
+45,Female,0.6,0.1,270,23,42,5.1,2,0.5,2
+28,Female,0.6,0.1,137,22,16,4.9,1.9,0.6,2
+28,Female,1,0.3,90,18,108,6.8,3.1,0.8,2
+66,Male,1,0.3,190,30,54,5.3,2.1,0.6,1
+66,Male,0.8,0.2,165,22,32,4.4,2,0.8,1
+66,Male,1.1,0.5,167,13,56,7.1,4.1,1.36,1
+49,Female,0.6,0.1,185,17,26,6.6,2.9,0.7,2
+42,Male,0.7,0.2,197,64,33,5.8,2.4,0.7,2
+42,Male,1,0.3,154,38,21,6.8,3.9,1.3,2
+35,Male,2,1.1,226,33,135,6,2.7,0.8,2
+38,Male,2.2,1,310,119,42,7.9,4.1,1,2
+38,Male,0.9,0.3,310,15,25,5.5,2.7,1,1
+55,Male,0.6,0.2,220,24,32,5.1,2.4,0.88,1
+33,Male,7.1,3.7,196,622,497,6.9,3.6,1.09,1
+33,Male,3.4,1.6,186,779,844,7.3,3.2,0.7,1
+7,Male,0.5,0.1,352,28,51,7.9,4.2,1.1,2
+45,Male,2.3,1.3,282,132,368,7.3,4,1.2,1
+45,Male,1.1,0.4,92,91,188,7.2,3.8,1.11,1
+30,Male,0.8,0.2,182,46,57,7.8,4.3,1.2,2
+62,Male,5,2.1,103,18,40,5,2.1,1.72,1
+22,Female,6.7,3.2,850,154,248,6.2,2.8,0.8,1
+42,Female,0.8,0.2,195,18,15,6.7,3,0.8,1
+32,Male,0.7,0.2,276,102,190,6,2.9,0.93,1
+60,Male,0.7,0.2,171,31,26,7,3.5,1,2
+65,Male,0.8,0.1,146,17,29,5.9,3.2,1.18,2
+53,Female,0.8,0.2,193,96,57,6.7,3.6,1.16,1
+27,Male,1,0.3,180,56,111,6.8,3.9,1.85,2
+35,Female,1,0.3,805,133,103,7.9,3.3,0.7,1
+65,Male,0.7,0.2,265,30,28,5.2,1.8,0.52,2
+25,Male,0.7,0.2,185,196,401,6.5,3.9,1.5,1
+32,Male,0.7,0.2,165,31,29,6.1,3,0.96,2
+24,Male,1,0.2,189,52,31,8,4.8,1.5,1
+67,Male,2.2,1.1,198,42,39,7.2,3,0.7,1
+68,Male,1.8,0.5,151,18,22,6.5,4,1.6,1
+55,Male,3.6,1.6,349,40,70,7.2,2.9,0.6,1
+70,Male,2.7,1.2,365,62,55,6,2.4,0.6,1
+36,Male,2.8,1.5,305,28,76,5.9,2.5,0.7,1
+42,Male,0.8,0.2,127,29,30,4.9,2.7,1.2,1
+53,Male,19.8,10.4,238,39,221,8.1,2.5,0.4,1
+32,Male,30.5,17.1,218,39,79,5.5,2.7,0.9,1
+32,Male,32.6,14.1,219,95,235,5.8,3.1,1.1,1
+56,Male,17.7,8.8,239,43,185,5.6,2.4,0.7,1
+50,Male,0.9,0.3,194,190,73,7.5,3.9,1,1
+46,Male,18.4,8.5,450,119,230,7.5,3.3,0.7,1
+46,Male,20,10,254,140,540,5.4,3,1.2,1
+37,Female,0.8,0.2,205,31,36,9.2,4.6,1,2
+45,Male,2.2,1.6,320,37,48,6.8,3.4,1,1
+56,Male,1,0.3,195,22,28,5.8,2.6,0.8,2
+69,Male,0.9,0.2,215,32,24,6.9,3,0.7,1
+49,Male,1,0.3,230,48,58,8.4,4.2,1,1
+49,Male,3.9,2.1,189,65,181,6.9,3,0.7,1
+60,Male,0.9,0.3,168,16,24,6.7,3,0.8,1
+28,Male,0.9,0.2,215,50,28,8,4,1,1
+45,Male,2.9,1.4,210,74,68,7.2,3.6,1,1
+35,Male,26.3,12.1,108,168,630,9.2,2,0.3,1
+62,Male,1.8,0.9,224,69,155,8.6,4,0.8,1
+55,Male,4.4,2.9,230,14,25,7.1,2.1,0.4,1
+46,Female,0.8,0.2,185,24,15,7.9,3.7,0.8,1
+50,Male,0.6,0.2,137,15,16,4.8,2.6,1.1,1
+29,Male,0.8,0.2,156,12,15,6.8,3.7,1.1,2
+53,Female,0.9,0.2,210,35,32,8,3.9,0.9,2
+46,Male,9.4,5.2,268,21,63,6.4,2.8,0.8,1
+40,Male,3.5,1.6,298,68,200,7.1,3.4,0.9,1
+45,Male,1.7,0.8,315,12,38,6.3,2.1,0.5,1
+55,Male,3.3,1.5,214,54,152,5.1,1.8,0.5,1
+22,Female,1.1,0.3,138,14,21,7,3.8,1.1,2
+40,Male,30.8,18.3,285,110,186,7.9,2.7,0.5,1
+62,Male,0.7,0.2,162,12,17,8.2,3.2,0.6,2
+46,Female,1.4,0.4,298,509,623,3.6,1,0.3,1
+39,Male,1.6,0.8,230,88,74,8,4,1,2
+60,Male,19.6,9.5,466,46,52,6.1,2,0.4,1
+46,Male,15.8,7.2,227,67,220,6.9,2.6,0.6,1
+10,Female,0.8,0.1,395,25,75,7.6,3.6,0.9,1
+52,Male,1.8,0.8,97,85,78,6.4,2.7,0.7,1
+65,Female,0.7,0.2,406,24,45,7.2,3.5,0.9,2
+42,Male,0.8,0.2,114,21,23,7,3,0.7,2
+42,Male,0.8,0.2,198,29,19,6.6,3,0.8,2
+62,Male,0.7,0.2,173,46,47,7.3,4.1,1.2,2
+40,Male,1.2,0.6,204,23,27,7.6,4,1.1,1
+54,Female,5.5,3.2,350,67,42,7,3.2,0.8,1
+45,Female,0.7,0.2,153,41,42,4.5,2.2,0.9,2
+45,Male,20.2,11.7,188,47,32,5.4,2.3,0.7,1
+50,Female,27.7,10.8,380,39,348,7.1,2.3,0.4,1
+42,Male,11.1,6.1,214,60,186,6.9,2.8,2.8,1
+40,Female,2.1,1,768,74,141,7.8,4.9,1.6,1
+46,Male,3.3,1.5,172,25,41,5.6,2.4,0.7,1
+29,Male,1.2,0.4,160,20,22,6.2,3,0.9,2
+45,Male,0.6,0.1,196,29,30,5.8,2.9,1,1
+46,Male,10.2,4.2,232,58,140,7,2.7,0.6,1
+73,Male,1.8,0.9,220,20,43,6.5,3,0.8,1
+55,Male,0.8,0.2,290,139,87,7,3,0.7,1
+51,Male,0.7,0.1,180,25,27,6.1,3.1,1,1
+51,Male,2.9,1.2,189,80,125,6.2,3.1,1,1
+51,Male,4,2.5,275,382,330,7.5,4,1.1,1
+26,Male,42.8,19.7,390,75,138,7.5,2.6,0.5,1
+66,Male,15.2,7.7,356,321,562,6.5,2.2,0.4,1
+66,Male,16.6,7.6,315,233,384,6.9,2,0.4,1
+66,Male,17.3,8.5,388,173,367,7.8,2.6,0.5,1
+64,Male,1.4,0.5,298,31,83,7.2,2.6,0.5,1
+38,Female,0.6,0.1,165,22,34,5.9,2.9,0.9,2
+43,Male,22.5,11.8,143,22,143,6.6,2.1,0.46,1
+50,Female,1,0.3,191,22,31,7.8,4,1,2
+52,Male,2.7,1.4,251,20,40,6,1.7,0.39,1
+20,Female,16.7,8.4,200,91,101,6.9,3.5,1.02,1
+16,Male,7.7,4.1,268,213,168,7.1,4,1.2,1
+16,Male,2.6,1.2,236,131,90,5.4,2.6,0.9,1
+90,Male,1.1,0.3,215,46,134,6.9,3,0.7,1
+32,Male,15.6,9.5,134,54,125,5.6,4,2.5,1
+32,Male,3.7,1.6,612,50,88,6.2,1.9,0.4,1
+32,Male,12.1,6,515,48,92,6.6,2.4,0.5,1
+32,Male,25,13.7,560,41,88,7.9,2.5,2.5,1
+32,Male,15,8.2,289,58,80,5.3,2.2,0.7,1
+32,Male,12.7,8.4,190,28,47,5.4,2.6,0.9,1
+60,Male,0.5,0.1,500,20,34,5.9,1.6,0.37,2
+40,Male,0.6,0.1,98,35,31,6,3.2,1.1,1
+52,Male,0.8,0.2,245,48,49,6.4,3.2,1,1
+31,Male,1.3,0.5,184,29,32,6.8,3.4,1,1
+38,Male,1,0.3,216,21,24,7.3,4.4,1.5,2

data_cache/icu_sepsis.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

data_cache/nephrology_ckd.csv ADDED Viewed

	@@ -0,0 +1,363 @@

+age,bp,sg,al,su,rbc,pc,pcc,ba,bgr,bu,sc,sod,pot,hemo,pcv,wbcc,rbcc,htn,dm,cad,appet,pe,ane,class
+48,80,1.02,1,0,?,normal,notpresent,notpresent,121,36,1.2,?,?,15.4,44,7800,5.2,yes,yes,no,good,no,no,ckd
+7,50,1.02,4,0,?,normal,notpresent,notpresent,?,18,0.8,?,?,11.3,38,6000,?,no,no,no,good,no,no,ckd
+62,80,1.01,2,3,normal,normal,notpresent,notpresent,423,53,1.8,?,?,9.6,31,7500,?,no,yes,no,poor,no,yes,ckd
+48,70,1.005,4,0,normal,abnormal,present,notpresent,117,56,3.8,111,2.5,11.2,32,6700,3.9,yes,no,no,poor,yes,yes,ckd
+51,80,1.01,2,0,normal,normal,notpresent,notpresent,106,26,1.4,?,?,11.6,35,7300,4.6,no,no,no,good,no,no,ckd
+60,90,1.015,3,0,?,?,notpresent,notpresent,74,25,1.1,142,3.2,12.2,39,7800,4.4,yes,yes,no,good,yes,no,ckd
+68,70,1.01,0,0,?,normal,notpresent,notpresent,100,54,24,104,4,12.4,36,?,?,no,no,no,good,no,no,ckd
+52,100,1.015,3,0,normal,abnormal,present,notpresent,138,60,1.9,?,?,10.8,33,9600,4,yes,yes,no,good,no,yes,ckd
+53,90,1.02,2,0,abnormal,abnormal,present,notpresent,70,107,7.2,114,3.7,9.5,29,12100,3.7,yes,yes,no,poor,no,yes,ckd
+50,60,1.01,2,4,?,abnormal,present,notpresent,490,55,4,?,?,9.4,28,?,?,yes,yes,no,good,no,yes,ckd
+63,70,1.01,3,0,abnormal,abnormal,present,notpresent,380,60,2.7,131,4.2,10.8,32,4500,3.8,yes,yes,no,poor,yes,no,ckd
+68,70,1.015,3,1,?,normal,present,notpresent,208,72,2.1,138,5.8,9.7,28,12200,3.4,yes,yes,yes,poor,yes,no,ckd
+68,70,?,?,?,?,?,notpresent,notpresent,98,86,4.6,135,3.4,9.8,?,?,?,yes,yes,yes,poor,yes,no,ckd
+68,80,1.01,3,2,normal,abnormal,present,present,157,90,4.1,130,6.4,5.6,16,11000,2.6,yes,yes,yes,poor,yes,no,ckd
+40,80,1.015,3,0,?,normal,notpresent,notpresent,76,162,9.6,141,4.9,7.6,24,3800,2.8,yes,no,no,good,no,yes,ckd
+47,70,1.015,2,0,?,normal,notpresent,notpresent,99,46,2.2,138,4.1,12.6,?,?,?,no,no,no,good,no,no,ckd
+47,80,?,?,?,?,?,notpresent,notpresent,114,87,5.2,139,3.7,12.1,?,?,?,yes,no,no,poor,no,no,ckd
+60,100,1.025,0,3,?,normal,notpresent,notpresent,263,27,1.3,135,4.3,12.7,37,11400,4.3,yes,yes,yes,good,no,no,ckd
+62,60,1.015,1,0,?,abnormal,present,notpresent,100,31,1.6,?,?,10.3,30,5300,3.7,yes,no,yes,good,no,no,ckd
+61,80,1.015,2,0,abnormal,abnormal,notpresent,notpresent,173,148,3.9,135,5.2,7.7,24,9200,3.2,yes,yes,yes,poor,yes,yes,ckd
+60,90,?,?,?,?,?,notpresent,notpresent,?,180,76,4.5,?,10.9,32,6200,3.6,yes,yes,yes,good,no,no,ckd
+48,80,1.025,4,0,normal,abnormal,notpresent,notpresent,95,163,7.7,136,3.8,9.8,32,6900,3.4,yes,no,no,good,no,yes,ckd
+42,100,1.015,4,0,normal,abnormal,notpresent,present,?,50,1.4,129,4,11.1,39,8300,4.6,yes,no,no,poor,no,no,ckd
+61,60,1.025,0,0,?,normal,notpresent,notpresent,108,75,1.9,141,5.2,9.9,29,8400,3.7,yes,yes,no,good,no,yes,ckd
+75,80,1.015,0,0,?,normal,notpresent,notpresent,156,45,2.4,140,3.4,11.6,35,10300,4,yes,yes,no,poor,no,no,ckd
+69,70,1.01,3,4,normal,abnormal,notpresent,notpresent,264,87,2.7,130,4,12.5,37,9600,4.1,yes,yes,yes,good,yes,no,ckd
+75,70,?,1,3,?,?,notpresent,notpresent,123,31,1.4,?,?,?,?,?,?,no,yes,no,good,no,no,ckd
+68,70,1.005,1,0,abnormal,abnormal,present,notpresent,?,28,1.4,?,?,12.9,38,?,?,no,no,yes,good,no,no,ckd
+73,90,1.015,3,0,?,abnormal,present,notpresent,107,33,1.5,141,4.6,10.1,30,7800,4,no,no,no,poor,no,no,ckd
+61,90,1.01,1,1,?,normal,notpresent,notpresent,159,39,1.5,133,4.9,11.3,34,9600,4,yes,yes,no,poor,no,no,ckd
+60,100,1.02,2,0,abnormal,abnormal,notpresent,notpresent,140,55,2.5,?,?,10.1,29,?,?,yes,no,no,poor,no,no,ckd
+70,70,1.01,1,0,normal,?,present,present,171,153,5.2,?,?,?,?,?,?,no,yes,no,poor,no,no,ckd
+65,90,1.02,2,1,abnormal,normal,notpresent,notpresent,270,39,2,?,?,12,36,9800,4.9,yes,yes,no,poor,no,yes,ckd
+76,70,1.015,1,0,normal,normal,notpresent,notpresent,92,29,1.8,133,3.9,10.3,32,?,?,yes,no,no,good,no,no,ckd
+72,80,?,?,?,?,?,notpresent,notpresent,137,65,3.4,141,4.7,9.7,28,6900,2.5,yes,yes,no,poor,no,yes,ckd
+69,80,1.02,3,0,abnormal,normal,notpresent,notpresent,?,103,4.1,132,5.9,12.5,?,?,?,yes,no,no,good,no,no,ckd
+82,80,1.01,2,2,normal,?,notpresent,notpresent,140,70,3.4,136,4.2,13,40,9800,4.2,yes,yes,no,good,no,no,ckd
+46,90,1.01,2,0,normal,abnormal,notpresent,notpresent,99,80,2.1,?,?,11.1,32,9100,4.1,yes,no,no,good,no,no,ckd
+45,70,1.01,0,0,?,normal,notpresent,notpresent,?,20,0.7,?,?,?,?,?,?,no,no,no,good,yes,no,ckd
+47,100,1.01,0,0,?,normal,notpresent,notpresent,204,29,1,139,4.2,9.7,33,9200,4.5,yes,no,no,good,no,yes,ckd
+35,80,1.01,1,0,abnormal,?,notpresent,notpresent,79,202,10.8,134,3.4,7.9,24,7900,3.1,no,yes,no,good,no,no,ckd
+54,80,1.01,3,0,abnormal,abnormal,notpresent,notpresent,207,77,6.3,134,4.8,9.7,28,?,?,yes,yes,no,poor,yes,no,ckd
+54,80,1.02,3,0,?,abnormal,notpresent,notpresent,208,89,5.9,130,4.9,9.3,?,?,?,yes,yes,no,poor,yes,no,ckd
+48,70,1.015,0,0,?,normal,notpresent,notpresent,124,24,1.2,142,4.2,12.4,37,6400,4.7,no,yes,no,good,no,no,ckd
+11,80,1.01,3,0,?,normal,notpresent,notpresent,?,17,0.8,?,?,15,45,8600,?,no,no,no,good,no,no,ckd
+73,70,1.005,0,0,normal,normal,notpresent,notpresent,70,32,0.9,125,4,10,29,18900,3.5,yes,yes,no,good,yes,no,ckd
+60,70,1.01,2,0,normal,abnormal,present,notpresent,144,72,3,?,?,9.7,29,21600,3.5,yes,yes,no,poor,no,yes,ckd
+53,60,?,?,?,?,?,notpresent,notpresent,91,114,3.25,142,4.3,8.6,28,11000,3.8,yes,yes,no,poor,yes,yes,ckd
+54,100,1.015,3,0,?,normal,present,notpresent,162,66,1.6,136,4.4,10.3,33,?,?,yes,yes,no,poor,yes,no,ckd
+53,90,1.015,0,0,?,normal,notpresent,notpresent,?,38,2.2,?,?,10.9,34,4300,3.7,no,no,no,poor,no,yes,ckd
+62,80,1.015,0,5,?,?,notpresent,notpresent,246,24,1,?,?,13.6,40,8500,4.7,yes,yes,no,good,no,no,ckd
+63,80,1.01,2,2,normal,?,notpresent,notpresent,?,?,3.4,136,4.2,13,40,9800,4.2,yes,no,yes,good,no,no,ckd
+76,70,1.015,3,4,normal,abnormal,present,notpresent,?,164,9.7,131,4.4,10.2,30,11300,3.4,yes,yes,yes,poor,yes,no,ckd
+76,90,?,?,?,?,normal,notpresent,notpresent,93,155,7.3,132,4.9,?,?,?,?,yes,yes,yes,poor,no,no,ckd
+73,80,1.02,2,0,abnormal,abnormal,notpresent,notpresent,253,142,4.6,138,5.8,10.5,33,7200,4.3,yes,yes,yes,good,no,no,ckd
+59,100,?,?,?,?,?,notpresent,notpresent,?,96,6.4,?,?,6.6,?,?,?,yes,yes,no,good,no,yes,ckd
+67,90,1.02,1,0,?,abnormal,present,notpresent,141,66,3.2,138,6.6,?,?,?,?,yes,no,no,good,no,no,ckd
+67,80,1.01,1,3,normal,abnormal,notpresent,notpresent,182,391,32,163,39,?,?,?,?,no,no,no,good,yes,no,ckd
+15,60,1.02,3,0,?,normal,notpresent,notpresent,86,15,0.6,138,4,11,33,7700,3.8,yes,yes,no,good,no,no,ckd
+46,70,1.015,1,0,abnormal,normal,notpresent,notpresent,150,111,6.1,131,3.7,7.5,27,?,?,no,no,no,good,no,yes,ckd
+44,90,1.01,1,0,?,normal,notpresent,notpresent,?,20,1.1,?,?,15,48,?,?,no,no,no,good,no,no,ckd
+67,70,1.02,2,0,abnormal,normal,notpresent,notpresent,150,55,1.6,131,4.8,?,?,?,?,yes,yes,no,good,yes,no,ckd
+65,70,1.01,2,0,?,normal,present,notpresent,112,73,3.3,?,?,10.9,37,?,?,no,no,no,good,no,no,ckd
+26,70,1.015,0,4,?,normal,notpresent,notpresent,250,20,1.1,?,?,15.6,52,6900,6,no,yes,no,good,no,no,ckd
+61,80,1.015,0,4,?,normal,notpresent,notpresent,360,19,0.7,137,4.4,15.2,44,8300,5.2,yes,yes,no,good,no,no,ckd
+46,60,1.01,1,0,normal,normal,notpresent,notpresent,163,92,3.3,141,4,9.8,28,14600,3.2,yes,yes,no,good,no,no,ckd
+64,90,1.01,3,3,?,abnormal,present,notpresent,?,35,1.3,?,?,10.3,?,?,?,yes,yes,no,good,yes,no,ckd
+56,90,1.015,2,0,abnormal,abnormal,notpresent,notpresent,129,107,6.7,131,4.8,9.1,29,6400,3.4,yes,no,no,good,no,no,ckd
+48,80,1.005,4,0,abnormal,abnormal,notpresent,present,133,139,8.5,132,5.5,10.3,36,6200,4,no,yes,no,good,yes,no,ckd
+67,70,1.01,1,0,?,normal,notpresent,notpresent,102,48,3.2,137,5,11.9,34,7100,3.7,yes,yes,no,good,yes,no,ckd
+70,80,?,?,?,?,?,notpresent,notpresent,158,85,3.2,141,3.5,10.1,30,?,?,yes,no,no,good,yes,no,ckd
+56,80,1.01,1,0,?,normal,notpresent,notpresent,165,55,1.8,?,?,13.5,40,11800,5,yes,yes,no,poor,yes,no,ckd
+74,80,1.01,0,0,?,normal,notpresent,notpresent,132,98,2.8,133,5,10.8,31,9400,3.8,yes,yes,no,good,no,no,ckd
+45,90,?,?,?,?,?,notpresent,notpresent,360,45,2.4,128,4.4,8.3,29,5500,3.7,yes,yes,no,good,no,no,ckd
+38,70,?,?,?,?,?,notpresent,notpresent,104,77,1.9,140,3.9,?,?,?,?,yes,no,no,poor,yes,no,ckd
+48,70,1.015,1,0,normal,normal,notpresent,notpresent,127,19,1,134,3.6,?,?,?,?,yes,yes,no,good,no,no,ckd
+59,70,1.01,3,0,normal,abnormal,notpresent,notpresent,76,186,15,135,7.6,7.1,22,3800,2.1,yes,no,no,poor,yes,yes,ckd
+70,70,1.015,2,?,?,?,notpresent,notpresent,?,46,1.5,?,?,9.9,?,?,?,no,yes,no,poor,yes,no,ckd
+56,80,?,?,?,?,?,notpresent,notpresent,415,37,1.9,?,?,?,?,?,?,no,yes,no,good,no,no,ckd
+70,100,1.005,1,0,normal,abnormal,present,notpresent,169,47,2.9,?,?,11.1,32,5800,5,yes,yes,no,poor,no,no,ckd
+58,110,1.01,4,0,?,normal,notpresent,notpresent,251,52,2.2,?,?,?,?,13200,4.7,yes,yes,no,good,no,no,ckd
+50,70,1.02,0,0,?,normal,notpresent,notpresent,109,32,1.4,139,4.7,?,?,?,?,no,no,no,poor,no,no,ckd
+63,100,1.01,2,2,normal,normal,notpresent,present,280,35,3.2,143,3.5,13,40,9800,4.2,yes,no,yes,good,no,no,ckd
+56,70,1.015,4,1,abnormal,normal,notpresent,notpresent,210,26,1.7,136,3.8,16.1,52,12500,5.6,no,no,no,good,no,no,ckd
+71,70,1.01,3,0,normal,abnormal,present,present,219,82,3.6,133,4.4,10.4,33,5600,3.6,yes,yes,yes,good,no,no,ckd
+73,100,1.01,3,2,abnormal,abnormal,present,notpresent,295,90,5.6,140,2.9,9.2,30,7000,3.2,yes,yes,yes,poor,no,no,ckd
+65,70,1.01,0,0,?,normal,notpresent,notpresent,93,66,1.6,137,4.5,11.6,36,11900,3.9,no,yes,no,good,no,no,ckd
+62,90,1.015,1,0,?,normal,notpresent,notpresent,94,25,1.1,131,3.7,?,?,?,?,yes,no,no,good,yes,yes,ckd
+60,80,1.01,1,1,?,normal,notpresent,notpresent,172,32,2.7,?,?,11.2,36,?,?,no,yes,yes,poor,no,no,ckd
+65,60,1.015,1,0,?,normal,notpresent,notpresent,91,51,2.2,132,3.8,10,32,9100,4,yes,yes,no,poor,yes,no,ckd
+50,140,?,?,?,?,?,notpresent,notpresent,101,106,6.5,135,4.3,6.2,18,5800,2.3,yes,yes,no,poor,no,yes,ckd
+56,180,?,0,4,?,abnormal,notpresent,notpresent,298,24,1.2,139,3.9,11.2,32,10400,4.2,yes,yes,no,poor,yes,no,ckd
+34,70,1.015,4,0,abnormal,abnormal,notpresent,notpresent,153,22,0.9,133,3.8,?,?,?,?,no,no,no,good,yes,no,ckd
+71,90,1.015,2,0,?,abnormal,present,present,88,80,4.4,139,5.7,11.3,33,10700,3.9,no,no,no,good,no,no,ckd
+17,60,1.01,0,0,?,normal,notpresent,notpresent,92,32,2.1,141,4.2,13.9,52,7000,?,no,no,no,good,no,no,ckd
+76,70,1.015,2,0,normal,abnormal,present,notpresent,226,217,10.2,?,?,10.2,36,12700,4.2,yes,no,no,poor,yes,yes,ckd
+55,90,?,?,?,?,?,notpresent,notpresent,143,88,2,?,?,?,?,?,?,yes,yes,no,poor,yes,no,ckd
+65,80,1.015,0,0,?,normal,notpresent,notpresent,115,32,11.5,139,4,14.1,42,6800,5.2,no,no,no,good,no,no,ckd
+50,90,?,?,?,?,?,notpresent,notpresent,89,118,6.1,127,4.4,6,17,6500,?,yes,yes,no,good,yes,yes,ckd
+55,100,1.015,1,4,normal,?,notpresent,notpresent,297,53,2.8,139,4.5,11.2,34,13600,4.4,yes,yes,no,good,no,no,ckd
+45,80,1.015,0,0,?,abnormal,notpresent,notpresent,107,15,1,141,4.2,11.8,37,10200,4.2,no,no,no,good,no,no,ckd
+54,70,?,?,?,?,?,notpresent,notpresent,233,50.1,1.9,?,?,11.7,?,?,?,no,yes,no,good,no,no,ckd
+63,90,1.015,0,0,?,normal,notpresent,notpresent,123,19,2,142,3.8,11.7,34,11400,4.7,no,no,no,good,no,no,ckd
+65,80,1.01,3,3,?,normal,notpresent,notpresent,294,71,4.4,128,5.4,10,32,9000,3.9,yes,yes,yes,good,no,no,ckd
+12,60,1.015,3,0,abnormal,abnormal,present,notpresent,?,51,1.8,?,?,12.1,?,10300,?,no,no,no,good,no,no,ckd
+47,80,1.01,0,0,?,abnormal,notpresent,notpresent,?,28,0.9,?,?,12.4,44,5600,4.3,no,no,no,good,no,yes,ckd
+55,70,1.01,3,0,?,normal,notpresent,notpresent,99,25,1.2,?,?,11.4,?,?,?,no,no,no,poor,yes,no,ckd
+60,70,1.01,0,0,?,normal,notpresent,notpresent,140,27,1.2,?,?,?,?,?,?,no,no,no,good,no,no,ckd
+72,90,1.025,1,3,?,normal,notpresent,notpresent,323,40,2.2,137,5.3,12.6,?,?,?,no,yes,yes,poor,no,no,ckd
+54,60,?,3,?,?,?,notpresent,notpresent,125,21,1.3,137,3.4,15,46,?,?,yes,yes,no,good,yes,no,ckd
+34,70,?,?,?,?,?,notpresent,notpresent,?,219,12.2,130,3.8,6,?,?,?,yes,no,no,good,no,yes,ckd
+43,80,1.015,2,3,?,abnormal,present,present,?,30,1.1,?,?,14,42,14900,?,no,no,no,good,no,no,ckd
+65,100,1.015,0,0,?,normal,notpresent,notpresent,90,98,2.5,?,?,9.1,28,5500,3.6,yes,no,no,good,no,no,ckd
+72,90,?,?,?,?,?,notpresent,notpresent,308,36,2.5,131,4.3,?,?,?,?,yes,yes,no,poor,no,no,ckd
+70,90,1.015,0,0,?,normal,notpresent,notpresent,144,125,4,136,4.6,12,37,8200,4.5,yes,yes,no,poor,yes,no,ckd
+71,60,1.015,4,0,normal,normal,notpresent,notpresent,118,125,5.3,136,4.9,11.4,35,15200,4.3,yes,yes,no,poor,yes,no,ckd
+52,90,1.015,4,3,normal,abnormal,notpresent,notpresent,224,166,5.6,133,47,8.1,23,5000,2.9,yes,yes,no,good,no,yes,ckd
+75,70,1.025,1,0,?,normal,notpresent,notpresent,158,49,1.4,135,4.7,11.1,?,?,?,yes,no,no,poor,yes,no,ckd
+50,90,1.01,2,0,normal,abnormal,present,present,128,208,9.2,134,4.8,8.2,22,16300,2.7,no,no,no,poor,yes,yes,ckd
+5,50,1.01,0,0,?,normal,notpresent,notpresent,?,25,0.6,?,?,11.8,36,12400,?,no,no,no,good,no,no,ckd
+70,100,1.015,4,0,normal,normal,notpresent,notpresent,118,125,5.3,136,4.9,12,37,8400,8,yes,no,no,good,no,no,ckd
+47,100,1.01,?,?,normal,?,notpresent,notpresent,122,?,16.9,138,5.2,10.8,33,10200,3.8,no,yes,no,good,no,no,ckd
+48,80,1.015,0,2,?,normal,notpresent,notpresent,214,24,1.3,140,4,13.2,39,?,?,no,yes,no,poor,no,no,ckd
+46,90,1.02,?,?,?,normal,notpresent,notpresent,213,68,2.8,146,6.3,9.3,?,?,?,yes,yes,no,good,no,no,ckd
+45,60,1.01,2,0,normal,abnormal,present,notpresent,268,86,4,134,5.1,10,29,9200,?,yes,yes,no,good,no,no,ckd
+41,70,1.015,2,0,?,abnormal,notpresent,present,?,68,2.8,132,4.1,11.1,33,?,?,yes,no,no,good,yes,yes,ckd
+69,70,1.01,0,4,?,normal,notpresent,notpresent,256,40,1.2,142,5.6,?,?,?,?,no,no,no,good,no,no,ckd
+67,70,1.01,1,0,normal,normal,notpresent,notpresent,?,106,6,137,4.9,6.1,19,6500,?,yes,no,no,good,no,yes,ckd
+72,90,?,?,?,?,?,notpresent,notpresent,84,145,7.1,135,5.3,?,?,?,?,no,yes,no,good,no,no,ckd
+41,80,1.015,1,4,abnormal,normal,notpresent,notpresent,210,165,18,135,4.7,?,?,?,?,no,yes,no,good,no,no,ckd
+60,90,1.01,2,0,abnormal,normal,notpresent,notpresent,105,53,2.3,136,5.2,11.1,33,10500,4.1,no,no,no,good,no,no,ckd
+57,90,1.015,5,0,abnormal,abnormal,notpresent,present,?,322,13,126,4.8,8,24,4200,3.3,yes,yes,yes,poor,yes,yes,ckd
+53,100,1.01,1,3,abnormal,normal,notpresent,notpresent,213,23,1,139,4,?,?,?,?,no,yes,no,good,no,no,ckd
+60,60,1.01,3,1,normal,abnormal,present,notpresent,288,36,1.7,130,3,7.9,25,15200,3,yes,no,no,poor,no,yes,ckd
+69,60,?,?,?,?,?,notpresent,notpresent,171,26,48.1,?,?,?,?,?,?,yes,no,no,poor,no,no,ckd
+65,70,1.02,1,0,abnormal,abnormal,notpresent,notpresent,139,29,1,?,?,10.5,32,?,?,yes,no,no,good,yes,no,ckd
+8,60,1.025,3,0,normal,normal,notpresent,notpresent,78,27,0.9,?,?,12.3,41,6700,?,no,no,no,poor,yes,no,ckd
+76,90,?,?,?,?,?,notpresent,notpresent,172,46,1.7,141,5.5,9.6,30,?,?,yes,yes,no,good,no,yes,ckd
+39,70,1.01,0,0,?,normal,notpresent,notpresent,121,20,0.8,133,3.5,10.9,32,?,?,no,yes,no,good,no,no,ckd
+55,90,1.01,2,1,abnormal,abnormal,notpresent,notpresent,273,235,14.2,132,3.4,8.3,22,14600,2.9,yes,yes,no,poor,yes,yes,ckd
+56,90,1.005,4,3,abnormal,abnormal,notpresent,notpresent,242,132,16.4,140,4.2,8.4,26,?,3,yes,yes,no,poor,yes,yes,ckd
+50,70,1.02,3,0,abnormal,normal,present,present,123,40,1.8,?,?,11.1,36,4700,?,no,no,no,good,no,no,ckd
+66,90,1.015,2,0,?,normal,notpresent,present,153,76,3.3,?,?,?,?,?,?,no,no,no,poor,no,no,ckd
+62,70,1.025,3,0,normal,abnormal,notpresent,notpresent,122,42,1.7,136,4.7,12.6,39,7900,3.9,yes,yes,no,good,no,no,ckd
+71,60,1.02,3,2,normal,normal,present,notpresent,424,48,1.5,132,4,10.9,31,?,?,yes,yes,yes,good,no,no,ckd
+59,80,1.01,1,0,abnormal,normal,notpresent,notpresent,303,35,1.3,122,3.5,10.4,35,10900,4.3,no,yes,no,poor,no,no,ckd
+81,60,?,?,?,?,?,notpresent,notpresent,148,39,2.1,147,4.2,10.9,35,9400,2.4,yes,yes,yes,poor,yes,no,ckd
+59,70,?,?,?,?,?,notpresent,notpresent,204,34,1.5,124,4.1,9.8,37,6000,?,no,yes,no,good,no,no,ckd
+46,80,1.01,0,0,?,normal,notpresent,notpresent,160,40,2,140,4.1,9,27,8100,3.2,yes,no,no,poor,no,yes,ckd
+27,60,?,?,?,?,?,notpresent,notpresent,76,44,3.9,127,4.3,?,?,?,?,no,no,no,poor,yes,yes,ckd
+34,70,1.02,0,0,abnormal,normal,notpresent,notpresent,139,19,0.9,?,?,12.7,42,2200,?,no,no,no,poor,no,no,ckd
+65,70,1.015,4,4,?,normal,present,notpresent,307,28,1.5,?,?,11,39,6700,?,yes,yes,no,good,no,no,ckd
+66,70,1.015,2,5,?,normal,notpresent,notpresent,447,41,1.7,131,3.9,12.5,33,9600,4.4,yes,yes,no,good,no,no,ckd
+83,70,1.02,3,0,normal,normal,notpresent,notpresent,102,60,2.6,115,5.7,8.7,26,12800,3.1,yes,no,no,poor,no,yes,ckd
+62,80,1.01,1,2,?,?,notpresent,notpresent,309,113,2.9,130,2.5,10.6,34,12800,4.9,no,no,no,good,no,no,ckd
+17,70,1.015,1,0,abnormal,normal,notpresent,notpresent,22,1.5,7.3,145,2.8,13.1,41,11200,?,no,no,no,good,no,no,ckd
+54,70,?,?,?,?,?,notpresent,notpresent,111,146,7.5,141,4.7,11,35,8600,4.6,no,no,no,good,no,no,ckd
+60,50,1.01,0,0,?,normal,notpresent,notpresent,261,58,2.2,113,3,?,?,4200,3.4,yes,no,no,good,no,no,ckd
+21,90,1.01,4,0,normal,abnormal,present,present,107,40,1.7,125,3.5,8.3,23,12400,3.9,no,no,no,good,no,yes,ckd
+65,80,1.015,2,1,normal,normal,present,notpresent,215,133,2.5,?,?,13.2,41,?,?,no,yes,no,good,no,no,ckd
+42,90,1.02,2,0,abnormal,abnormal,present,notpresent,93,153,2.7,139,4.3,9.8,34,9800,?,no,no,no,poor,yes,yes,ckd
+72,90,1.01,2,0,?,abnormal,present,notpresent,124,53,2.3,?,?,11.9,39,?,?,no,no,no,good,no,no,ckd
+73,90,1.01,1,4,abnormal,abnormal,present,notpresent,234,56,1.9,?,?,10.3,28,?,?,no,yes,no,good,no,no,ckd
+45,70,1.025,2,0,normal,abnormal,present,notpresent,117,52,2.2,136,3.8,10,30,19100,3.7,no,no,no,good,no,no,ckd
+61,80,1.02,0,0,?,normal,notpresent,notpresent,131,23,0.8,140,4.1,11.3,35,?,?,no,no,no,good,no,no,ckd
+30,70,1.015,0,0,?,normal,notpresent,notpresent,101,106,6.5,135,4.3,?,?,?,?,no,no,no,poor,no,no,ckd
+54,60,1.015,3,2,?,abnormal,notpresent,notpresent,352,137,3.3,133,4.5,11.3,31,5800,3.6,yes,yes,yes,poor,yes,no,ckd
+8,50,1.02,4,0,normal,normal,notpresent,notpresent,?,46,1,135,3.8,?,?,?,?,no,no,no,good,yes,no,ckd
+64,60,1.01,4,1,abnormal,abnormal,notpresent,present,239,58,4.3,137,5.4,9.5,29,7500,3.4,yes,yes,no,poor,yes,no,ckd
+6,60,1.01,4,0,abnormal,abnormal,notpresent,present,94,67,1,135,4.9,9.9,30,16700,4.8,no,no,no,poor,no,no,ckd
+46,110,1.015,0,0,?,normal,notpresent,notpresent,130,16,0.9,?,?,?,?,?,?,no,no,no,good,no,no,ckd
+32,90,1.025,1,0,abnormal,abnormal,notpresent,notpresent,?,223,18.1,113,6.5,5.5,15,2600,2.8,yes,yes,no,poor,yes,yes,ckd
+80,70,1.01,2,?,?,abnormal,notpresent,notpresent,?,49,1.2,?,?,?,?,?,?,yes,yes,no,good,no,no,ckd
+70,90,1.02,2,1,abnormal,abnormal,notpresent,present,184,98.6,3.3,138,3.9,5.8,?,?,?,yes,yes,yes,poor,no,no,ckd
+49,100,1.01,3,0,abnormal,abnormal,notpresent,notpresent,129,158,11.8,122,3.2,8.1,24,9600,3.5,yes,yes,no,poor,yes,yes,ckd
+57,80,?,?,?,?,?,notpresent,notpresent,?,111,9.3,124,5.3,6.8,?,4300,3,yes,yes,no,good,no,yes,ckd
+59,100,1.02,4,2,normal,normal,notpresent,notpresent,252,40,3.2,137,4.7,11.2,30,26400,3.9,yes,yes,no,poor,yes,no,ckd
+65,80,1.015,0,0,?,normal,notpresent,notpresent,92,37,1.5,140,5.2,8.8,25,10700,3.2,yes,no,yes,good,yes,no,ckd
+90,90,1.025,1,0,?,normal,notpresent,notpresent,139,89,3,140,4.1,12,37,7900,3.9,yes,yes,no,good,no,no,ckd
+64,70,?,?,?,?,?,notpresent,notpresent,113,94,7.3,137,4.3,7.9,21,?,?,yes,yes,yes,good,yes,yes,ckd
+78,60,?,?,?,?,?,notpresent,notpresent,114,74,2.9,135,5.9,8,24,?,?,no,yes,no,good,no,yes,ckd
+65,90,1.01,4,2,normal,normal,notpresent,notpresent,172,82,13.5,145,6.3,8.8,31,?,?,yes,yes,no,good,yes,yes,ckd
+61,70,?,?,?,?,?,notpresent,notpresent,100,28,2.1,?,?,12.6,43,?,?,yes,yes,no,good,no,no,ckd
+60,70,1.01,1,0,?,normal,notpresent,notpresent,109,96,3.9,135,4,13.8,41,?,?,yes,no,no,good,no,no,ckd
+50,70,1.01,0,0,?,normal,notpresent,notpresent,230,50,2.2,?,?,12,41,10400,4.6,yes,yes,no,good,no,no,ckd
+67,80,?,?,?,?,?,notpresent,notpresent,341,37,1.5,?,?,12.3,41,6900,4.9,yes,yes,no,good,no,yes,ckd
+59,100,1.015,4,2,normal,normal,notpresent,notpresent,255,132,12.8,135,5.7,7.3,20,9800,3.9,yes,yes,yes,good,no,yes,ckd
+54,120,1.015,0,0,?,normal,notpresent,notpresent,103,18,1.2,?,?,?,?,?,?,no,no,no,good,no,no,ckd
+40,70,1.015,3,4,normal,normal,notpresent,notpresent,253,150,11.9,132,5.6,10.9,31,8800,3.4,yes,yes,no,poor,yes,no,ckd
+55,80,1.01,3,1,normal,abnormal,present,present,214,73,3.9,137,4.9,10.9,34,7400,3.7,yes,yes,no,good,yes,no,ckd
+68,80,1.015,0,0,?,abnormal,notpresent,notpresent,171,30,1,?,?,13.7,43,4900,5.2,no,yes,no,good,no,no,ckd
+63,100,1.01,1,0,?,normal,notpresent,notpresent,78,61,1.8,141,4.4,12.2,36,10500,4.3,no,yes,no,good,no,no,ckd
+33,90,1.015,0,0,?,normal,notpresent,notpresent,92,19,0.8,?,?,11.8,34,7000,?,no,no,no,good,no,no,ckd
+68,90,1.01,0,0,?,normal,notpresent,notpresent,238,57,2.5,?,?,9.8,28,8000,3.3,yes,yes,no,poor,no,no,ckd
+66,70,1.02,1,0,normal,?,notpresent,notpresent,248,30,1.7,138,5.3,?,?,?,?,yes,yes,no,good,no,no,ckd
+74,60,?,?,?,?,?,notpresent,notpresent,108,68,1.8,?,?,?,?,?,?,yes,yes,no,good,no,no,ckd
+71,90,1.01,0,3,?,normal,notpresent,notpresent,303,30,1.3,136,4.1,13,38,9200,4.6,yes,yes,no,good,no,no,ckd
+34,60,1.02,0,0,?,normal,notpresent,notpresent,117,28,2.2,138,3.8,?,?,?,?,no,no,no,good,yes,no,ckd
+60,90,1.01,3,5,abnormal,normal,notpresent,present,490,95,2.7,131,3.8,11.5,35,12000,4.5,yes,yes,no,good,no,no,ckd
+64,100,1.015,4,2,abnormal,abnormal,notpresent,present,163,54,7.2,140,4.6,7.9,26,7500,3.4,yes,yes,no,good,yes,no,ckd
+57,80,1.015,0,0,?,normal,notpresent,notpresent,120,48,1.6,?,?,11.3,36,7200,3.8,yes,yes,no,good,no,no,ckd
+60,70,?,?,?,?,?,notpresent,notpresent,124,52,2.5,?,?,?,?,?,?,yes,no,no,good,no,no,ckd
+59,50,1.01,3,0,normal,abnormal,notpresent,notpresent,241,191,12,114,2.9,9.6,31,15700,3.8,no,yes,no,good,yes,no,ckd
+65,60,1.01,2,0,normal,abnormal,present,notpresent,192,17,1.7,130,4.3,?,?,9500,?,yes,yes,no,poor,no,no,ckd
+60,90,?,?,?,?,?,notpresent,notpresent,269,51,2.8,138,3.7,11.5,35,?,?,yes,yes,yes,good,yes,no,ckd
+51,100,1.015,2,0,normal,normal,notpresent,present,93,20,1.6,146,4.5,?,?,?,?,no,no,no,poor,no,no,ckd
+37,100,1.01,0,0,abnormal,normal,notpresent,notpresent,?,19,1.3,?,?,15,44,4100,5.2,yes,no,no,good,no,no,ckd
+45,70,1.01,2,0,?,normal,notpresent,notpresent,113,93,2.3,?,?,7.9,26,5700,?,no,no,yes,good,no,yes,ckd
+65,80,?,?,?,?,?,notpresent,notpresent,74,66,2,136,5.4,9.1,25,?,?,yes,yes,yes,good,yes,no,ckd
+80,70,1.015,2,2,?,normal,notpresent,notpresent,141,53,2.2,?,?,12.7,40,9600,?,yes,yes,no,poor,yes,no,ckd
+72,100,?,?,?,?,?,notpresent,notpresent,201,241,13.4,127,4.8,9.4,28,?,?,yes,yes,no,good,no,yes,ckd
+34,90,1.015,2,0,normal,normal,notpresent,notpresent,104,50,1.6,137,4.1,11.9,39,?,?,no,no,no,good,no,no,ckd
+65,70,1.015,1,0,?,normal,notpresent,notpresent,203,46,1.4,?,?,11.4,36,5000,4.1,yes,yes,no,poor,yes,no,ckd
+57,70,1.015,1,0,?,abnormal,notpresent,notpresent,165,45,1.5,140,3.3,10.4,31,4200,3.9,no,no,no,good,no,no,ckd
+69,70,1.01,4,3,normal,abnormal,present,present,214,96,6.3,120,3.9,9.4,28,11500,3.3,yes,yes,yes,good,yes,yes,ckd
+62,90,1.02,2,1,?,normal,notpresent,notpresent,169,48,2.4,138,2.9,13.4,47,11000,6.1,yes,no,no,good,no,no,ckd
+64,90,1.015,3,2,?,abnormal,present,notpresent,463,64,2.8,135,4.1,12.2,40,9800,4.6,yes,yes,no,good,no,yes,ckd
+48,100,?,?,?,?,?,notpresent,notpresent,103,79,5.3,135,6.3,6.3,19,7200,2.6,yes,no,yes,poor,no,no,ckd
+48,110,1.015,3,0,abnormal,normal,present,notpresent,106,215,15.2,120,5.7,8.6,26,5000,2.5,yes,no,yes,good,no,yes,ckd
+54,90,1.025,1,0,normal,abnormal,notpresent,notpresent,150,18,1.2,140,4.2,?,?,?,?,no,no,no,poor,yes,yes,ckd
+59,70,1.01,1,3,abnormal,abnormal,notpresent,notpresent,424,55,1.7,138,4.5,12.6,37,10200,4.1,yes,yes,yes,good,no,no,ckd
+56,90,1.01,4,1,normal,abnormal,present,notpresent,176,309,13.3,124,6.5,3.1,9,5400,2.1,yes,yes,no,poor,yes,yes,ckd
+40,80,1.025,0,0,normal,normal,notpresent,notpresent,140,10,1.2,135,5,15,48,10400,4.5,no,no,no,good,no,no,notckd
+23,80,1.025,0,0,normal,normal,notpresent,notpresent,70,36,1,150,4.6,17,52,9800,5,no,no,no,good,no,no,notckd
+45,80,1.025,0,0,normal,normal,notpresent,notpresent,82,49,0.6,147,4.4,15.9,46,9100,4.7,no,no,no,good,no,no,notckd
+57,80,1.025,0,0,normal,normal,notpresent,notpresent,119,17,1.2,135,4.7,15.4,42,6200,6.2,no,no,no,good,no,no,notckd
+51,60,1.025,0,0,normal,normal,notpresent,notpresent,99,38,0.8,135,3.7,13,49,8300,5.2,no,no,no,good,no,no,notckd
+34,80,1.025,0,0,normal,normal,notpresent,notpresent,121,27,1.2,144,3.9,13.6,52,9200,6.3,no,no,no,good,no,no,notckd
+60,80,1.025,0,0,normal,normal,notpresent,notpresent,131,10,0.5,146,5,14.5,41,10700,5.1,no,no,no,good,no,no,notckd
+38,60,1.02,0,0,normal,normal,notpresent,notpresent,91,36,0.7,135,3.7,14,46,9100,5.8,no,no,no,good,no,no,notckd
+42,80,1.02,0,0,normal,normal,notpresent,notpresent,98,20,0.5,140,3.5,13.9,44,8400,5.5,no,no,no,good,no,no,notckd
+35,80,1.02,0,0,normal,normal,notpresent,notpresent,104,31,1.2,135,5,16.1,45,4300,5.2,no,no,no,good,no,no,notckd
+30,80,1.02,0,0,normal,normal,notpresent,notpresent,131,38,1,147,3.8,14.1,45,9400,5.3,no,no,no,good,no,no,notckd
+49,80,1.02,0,0,normal,normal,notpresent,notpresent,122,32,1.2,139,3.9,17,41,5600,4.9,no,no,no,good,no,no,notckd
+55,80,1.02,0,0,normal,normal,notpresent,notpresent,118,18,0.9,135,3.6,15.5,43,7200,5.4,no,no,no,good,no,no,notckd
+45,80,1.02,0,0,normal,normal,notpresent,notpresent,117,46,1.2,137,5,16.2,45,8600,5.2,no,no,no,good,no,no,notckd
+42,80,1.02,0,0,normal,normal,notpresent,notpresent,132,24,0.7,140,4.1,14.4,50,5000,4.5,no,no,no,good,no,no,notckd
+50,80,1.02,0,0,normal,normal,notpresent,notpresent,97,40,0.6,150,4.5,14.2,48,10500,5,no,no,no,good,no,no,notckd
+55,80,1.02,0,0,normal,normal,notpresent,notpresent,133,17,1.2,135,4.8,13.2,41,6800,5.3,no,no,no,good,no,no,notckd
+48,80,1.025,0,0,normal,normal,notpresent,notpresent,122,33,0.9,146,3.9,13.9,48,9500,4.8,no,no,no,good,no,no,notckd
+25,80,1.025,0,0,normal,normal,notpresent,notpresent,121,19,1.2,142,4.9,15,48,6900,5.3,no,no,no,good,no,no,notckd
+23,80,1.025,0,0,normal,normal,notpresent,notpresent,111,34,1.1,145,4,14.3,41,7200,5,no,no,no,good,no,no,notckd
+30,80,1.025,0,0,normal,normal,notpresent,notpresent,96,25,0.5,144,4.8,13.8,42,9000,4.5,no,no,no,good,no,no,notckd
+56,80,1.025,0,0,normal,normal,notpresent,notpresent,139,15,1.2,135,5,14.8,42,5600,5.5,no,no,no,good,no,no,notckd
+47,80,1.02,0,0,normal,normal,notpresent,notpresent,95,35,0.9,140,4.1,?,?,?,?,no,no,no,good,no,no,notckd
+19,80,1.02,0,0,normal,normal,notpresent,notpresent,107,23,0.7,141,4.2,14.4,44,?,?,no,no,no,good,no,no,notckd
+52,80,1.02,0,0,normal,normal,notpresent,notpresent,125,22,1.2,139,4.6,16.5,43,4700,4.6,no,no,no,good,no,no,notckd
+46,60,1.025,0,0,normal,normal,notpresent,notpresent,123,46,1,135,5,15.7,50,6300,4.8,no,no,no,good,no,no,notckd
+48,60,1.02,0,0,normal,normal,notpresent,notpresent,112,44,1.2,142,4.9,14.5,44,9400,6.4,no,no,no,good,no,no,notckd
+24,70,1.025,0,0,normal,normal,notpresent,notpresent,140,23,0.6,140,4.7,16.3,48,5800,5.6,no,no,no,good,no,no,notckd
+47,80,?,?,?,?,?,notpresent,notpresent,93,33,0.9,144,4.5,13.3,52,8100,5.2,no,no,no,good,no,no,notckd
+55,80,1.025,0,0,normal,normal,notpresent,notpresent,130,50,1.2,147,5,15.5,41,9100,6,no,no,no,good,no,no,notckd
+20,70,1.02,0,0,normal,normal,notpresent,notpresent,123,44,1,135,3.8,14.6,44,5500,4.8,no,no,no,good,no,no,notckd
+33,80,1.025,0,0,normal,normal,notpresent,notpresent,100,37,1.2,142,4,16.9,52,6700,6,no,no,no,good,no,no,notckd
+66,70,1.02,0,0,normal,normal,notpresent,notpresent,94,19,0.7,135,3.9,16,41,5300,5.9,no,no,no,good,no,no,notckd
+71,70,1.02,0,0,normal,normal,notpresent,notpresent,81,18,0.8,145,5,14.7,44,9800,6,no,no,no,good,no,no,notckd
+39,70,1.025,0,0,normal,normal,notpresent,notpresent,124,22,0.6,137,3.8,13.4,43,?,?,no,no,no,good,no,no,notckd
+42,70,1.02,0,0,normal,normal,notpresent,notpresent,93,32,0.9,143,4.7,16.6,43,7100,5.3,no,no,no,good,no,no,notckd
+54,70,1.02,0,0,?,?,?,?,76,28,0.6,146,3.5,14.8,52,8400,5.9,no,no,no,good,no,no,notckd
+47,80,1.025,0,0,normal,normal,notpresent,notpresent,124,44,1,140,4.9,14.9,41,7000,5.7,no,no,no,good,no,no,notckd
+30,80,1.02,0,0,normal,normal,notpresent,notpresent,89,42,0.5,139,5,16.7,52,10200,5,no,no,no,good,no,no,notckd
+75,60,1.02,0,0,normal,normal,notpresent,notpresent,110,50,0.7,135,5,14.3,40,8300,5.8,no,no,no,?,?,?,notckd
+44,70,?,?,?,?,?,notpresent,notpresent,106,25,0.9,150,3.6,15,50,9600,6.5,no,no,no,good,no,no,notckd
+41,70,1.02,0,0,normal,normal,notpresent,notpresent,125,38,0.6,140,5,16.8,41,6300,5.9,no,no,no,good,no,no,notckd
+34,60,1.02,0,0,normal,normal,notpresent,notpresent,91,49,1.2,135,4.5,13.5,48,8600,4.9,no,no,no,good,no,no,notckd
+73,60,1.02,0,0,normal,normal,notpresent,notpresent,127,48,0.5,150,3.5,15.1,52,11000,4.7,no,no,no,good,no,no,notckd
+45,60,1.02,0,0,normal,normal,?,?,114,26,0.7,141,4.2,15,43,9200,5.8,no,no,no,good,no,no,notckd
+44,60,1.025,0,0,normal,normal,notpresent,notpresent,96,33,0.9,147,4.5,16.9,41,7200,5,no,no,no,good,no,no,notckd
+29,70,1.02,0,0,normal,normal,notpresent,notpresent,127,44,1.2,145,5,14.8,48,?,?,no,no,no,good,no,no,notckd
+55,70,1.02,0,0,normal,normal,notpresent,notpresent,107,26,1.1,?,?,17,50,6700,6.1,no,no,no,good,no,no,notckd
+33,80,1.025,0,0,normal,normal,notpresent,notpresent,128,38,0.6,135,3.9,13.1,45,6200,4.5,no,no,no,good,no,no,notckd
+41,80,1.02,0,0,normal,normal,notpresent,notpresent,122,25,0.8,138,5,17.1,41,9100,5.2,no,no,no,good,no,no,notckd
+52,80,1.02,0,0,normal,normal,notpresent,notpresent,128,30,1.2,140,4.5,15.2,52,4300,5.7,no,no,no,good,no,no,notckd
+47,60,1.02,0,0,normal,normal,notpresent,notpresent,137,17,0.5,150,3.5,13.6,44,7900,4.5,no,no,no,good,no,no,notckd
+43,80,1.025,0,0,normal,normal,notpresent,notpresent,81,46,0.6,135,4.9,13.9,48,6900,4.9,no,no,no,good,no,no,notckd
+51,60,1.02,0,0,?,?,notpresent,notpresent,129,25,1.2,139,5,17.2,40,8100,5.9,no,no,no,good,no,no,notckd
+46,60,1.02,0,0,normal,normal,notpresent,notpresent,102,27,0.7,142,4.9,13.2,44,11000,5.4,no,no,no,good,no,no,notckd
+56,60,1.025,0,0,normal,normal,notpresent,notpresent,132,18,1.1,147,4.7,13.7,45,7500,5.6,no,no,no,good,no,no,notckd
+55,80,1.02,0,0,normal,normal,notpresent,notpresent,104,28,0.9,142,4.8,17.3,52,8200,4.8,no,no,no,good,no,no,notckd
+39,70,1.025,0,0,normal,normal,notpresent,notpresent,131,46,0.6,145,5,15.6,41,9400,4.7,no,no,no,good,no,no,notckd
+58,70,1.02,0,0,normal,normal,notpresent,notpresent,102,48,1.2,139,4.3,15,40,8100,4.9,no,no,no,good,no,no,notckd
+61,70,1.025,0,0,normal,normal,notpresent,notpresent,120,29,0.7,137,3.5,17.4,52,7000,5.3,no,no,no,good,no,no,notckd
+30,60,1.02,0,0,normal,normal,notpresent,notpresent,138,15,1.1,135,4.4,?,?,?,?,no,no,no,good,no,no,notckd
+57,60,1.02,0,0,normal,normal,notpresent,notpresent,105,49,1.2,150,4.7,15.7,44,10400,6.2,no,no,no,good,no,no,notckd
+65,60,1.02,0,0,normal,normal,notpresent,notpresent,109,39,1,144,3.5,13.9,48,9600,4.8,no,no,no,good,no,no,notckd
+70,60,?,?,?,?,?,notpresent,notpresent,120,40,0.5,140,4.6,16,43,4500,4.9,no,no,no,good,no,no,notckd
+43,80,1.025,0,0,normal,normal,notpresent,notpresent,130,30,1.1,143,5,15.9,45,7800,4.5,no,no,no,good,no,no,notckd
+40,80,1.02,0,0,normal,normal,notpresent,notpresent,119,15,0.7,150,4.9,?,?,?,?,no,no,no,good,no,no,notckd
+58,80,1.02,0,0,normal,normal,notpresent,notpresent,100,50,1.2,140,3.5,14,50,6700,6.5,no,no,no,good,no,no,notckd
+47,60,1.02,0,0,normal,normal,notpresent,notpresent,109,25,1.1,141,4.7,15.8,41,8300,5.2,no,no,no,good,no,no,notckd
+30,60,1.025,0,0,normal,normal,notpresent,notpresent,120,31,0.8,150,4.6,13.4,44,10700,5.8,no,no,no,good,no,no,notckd
+28,70,1.02,0,0,normal,normal,?,?,131,29,0.6,145,4.9,?,45,8600,6.5,no,no,no,good,no,no,notckd
+33,60,1.025,0,0,normal,normal,notpresent,notpresent,80,25,0.9,146,3.5,14.1,48,7800,5.1,no,no,no,good,no,no,notckd
+43,80,1.02,0,0,normal,normal,notpresent,notpresent,114,32,1.1,135,3.9,?,42,?,?,no,no,no,good,no,no,notckd
+59,70,1.025,0,0,normal,normal,notpresent,notpresent,130,39,0.7,147,4.7,13.5,46,6700,4.5,no,no,no,good,no,no,notckd
+34,70,1.025,0,0,normal,normal,notpresent,notpresent,?,33,1,150,5,15.3,44,10500,6.1,no,no,no,good,no,no,notckd
+23,80,1.02,0,0,normal,normal,notpresent,notpresent,99,46,1.2,142,4,17.7,46,4300,5.5,no,no,no,good,no,no,notckd
+60,60,1.02,0,0,normal,normal,notpresent,notpresent,134,45,0.5,139,4.8,14.2,48,10700,5.6,no,no,no,good,no,no,notckd
+25,60,1.02,0,0,normal,normal,notpresent,notpresent,119,27,0.5,?,?,15.2,40,9200,5.2,no,no,no,good,no,no,notckd
+44,70,1.025,0,0,normal,normal,notpresent,notpresent,92,40,0.9,141,4.9,14,52,7500,6.2,no,no,no,good,no,no,notckd
+62,80,1.02,0,0,normal,normal,notpresent,notpresent,132,34,0.8,147,3.5,17.8,44,4700,4.5,no,no,no,good,no,no,notckd
+25,70,1.02,0,0,normal,normal,notpresent,notpresent,88,42,0.5,136,3.5,13.3,48,7000,4.9,no,no,no,good,no,no,notckd
+32,70,1.025,0,0,normal,normal,notpresent,notpresent,100,29,1.1,142,4.5,14.3,43,6700,5.9,no,no,no,good,no,no,notckd
+63,70,1.025,0,0,normal,normal,notpresent,notpresent,130,37,0.9,150,5,13.4,41,7300,4.7,no,no,no,good,no,no,notckd
+44,60,1.02,0,0,normal,normal,notpresent,notpresent,95,46,0.5,138,4.2,15,50,7700,6.3,no,no,no,good,no,no,notckd
+37,60,1.025,0,0,normal,normal,notpresent,notpresent,111,35,0.8,135,4.1,16.2,50,5500,5.7,no,no,no,good,no,no,notckd
+64,60,1.02,0,0,normal,normal,notpresent,notpresent,106,27,0.7,150,3.3,14.4,42,8100,4.7,no,no,no,good,no,no,notckd
+22,60,1.025,0,0,normal,normal,notpresent,notpresent,97,18,1.2,138,4.3,13.5,42,7900,6.4,no,no,no,good,no,no,notckd
+33,60,?,?,?,normal,normal,notpresent,notpresent,130,41,0.9,141,4.4,15.5,52,4300,5.8,no,no,no,good,no,no,notckd
+43,60,1.025,0,0,normal,normal,notpresent,notpresent,108,25,1,144,5,17.8,43,7200,5.5,no,no,no,good,no,no,notckd
+38,80,1.02,0,0,normal,normal,notpresent,notpresent,99,19,0.5,147,3.5,13.6,44,7300,6.4,no,no,no,good,no,no,notckd
+35,70,1.025,0,0,?,?,notpresent,notpresent,82,36,1.1,150,3.5,14.5,52,9400,6.1,no,no,no,good,no,no,notckd
+65,70,1.025,0,0,?,?,notpresent,notpresent,85,20,1,142,4.8,16.1,43,9600,4.5,no,no,no,good,no,no,notckd
+29,80,1.02,0,0,normal,normal,notpresent,notpresent,83,49,0.9,139,3.3,17.5,40,9900,4.7,no,no,no,good,no,no,notckd
+37,60,1.02,0,0,normal,normal,notpresent,notpresent,109,47,1.1,141,4.9,15,48,7000,5.2,no,no,no,good,no,no,notckd
+39,60,1.02,0,0,normal,normal,notpresent,notpresent,86,37,0.6,150,5,13.6,51,5800,4.5,no,no,no,good,no,no,notckd
+32,60,1.025,0,0,normal,normal,notpresent,notpresent,102,17,0.4,147,4.7,14.6,41,6800,5.1,no,no,no,good,no,no,notckd
+23,60,1.02,0,0,normal,normal,notpresent,notpresent,95,24,0.8,145,5,15,52,6300,4.6,no,no,no,good,no,no,notckd
+34,70,1.025,0,0,normal,normal,notpresent,notpresent,87,38,0.5,144,4.8,17.1,47,7400,6.1,no,no,no,good,no,no,notckd
+66,70,1.025,0,0,normal,normal,notpresent,notpresent,107,16,1.1,140,3.6,13.6,42,11000,4.9,no,no,no,good,no,no,notckd
+47,60,1.02,0,0,normal,normal,notpresent,notpresent,117,22,1.2,138,3.5,13,45,5200,5.6,no,no,no,good,no,no,notckd
+74,60,1.02,0,0,normal,normal,notpresent,notpresent,88,50,0.6,147,3.7,17.2,53,6000,4.5,no,no,no,good,no,no,notckd
+35,60,1.025,0,0,normal,normal,notpresent,notpresent,105,39,0.5,135,3.9,14.7,43,5800,6.2,no,no,no,good,no,no,notckd
+29,80,1.02,0,0,normal,normal,notpresent,notpresent,70,16,0.7,138,3.5,13.7,54,5400,5.8,no,no,no,good,no,no,notckd
+33,80,1.025,0,0,normal,normal,notpresent,notpresent,89,19,1.1,144,5,15,40,10300,4.8,no,no,no,good,no,no,notckd
+67,80,1.025,0,0,normal,normal,notpresent,notpresent,99,40,0.5,?,?,17.8,44,5900,5.2,no,no,no,good,no,no,notckd
+73,80,1.025,0,0,normal,normal,notpresent,notpresent,118,44,0.7,137,3.5,14.8,45,9300,4.7,no,no,no,good,no,no,notckd
+24,80,1.02,0,0,normal,normal,notpresent,notpresent,93,46,1,145,3.5,?,?,10700,6.3,no,no,no,good,no,no,notckd
+60,80,1.025,0,0,normal,normal,notpresent,notpresent,81,15,0.5,141,3.6,15,46,10500,5.3,no,no,no,good,no,no,notckd
+68,60,1.025,0,0,normal,normal,notpresent,notpresent,125,41,1.1,139,3.8,17.4,50,6700,6.1,no,no,no,good,no,no,notckd
+30,80,1.025,0,0,normal,normal,notpresent,notpresent,82,42,0.7,146,5,14.9,45,9400,5.9,no,no,no,good,no,no,notckd
+75,70,1.02,0,0,normal,normal,notpresent,notpresent,107,48,0.8,144,3.5,13.6,46,10300,4.8,no,no,no,good,no,no,notckd
+69,70,1.02,0,0,normal,normal,notpresent,notpresent,83,42,1.2,139,3.7,16.2,50,9300,5.4,no,no,no,good,no,no,notckd
+28,60,1.025,0,0,normal,normal,notpresent,notpresent,79,50,0.5,145,5,17.6,51,6500,5,no,no,no,good,no,no,notckd
+72,60,1.02,0,0,normal,normal,notpresent,notpresent,109,26,0.9,150,4.9,15,52,10500,5.5,no,no,no,good,no,no,notckd
+61,70,1.025,0,0,normal,normal,notpresent,notpresent,133,38,1,142,3.6,13.7,47,9200,4.9,no,no,no,good,no,no,notckd
+79,80,1.025,0,0,normal,normal,notpresent,notpresent,111,44,1.2,146,3.6,16.3,40,8000,6.4,no,no,no,good,no,no,notckd
+70,80,1.02,0,0,normal,normal,notpresent,notpresent,74,41,0.5,143,4.5,15.1,48,9700,5.6,no,no,no,good,no,no,notckd
+58,70,1.025,0,0,normal,normal,notpresent,notpresent,88,16,1.1,147,3.5,16.4,53,9100,5.2,no,no,no,good,no,no,notckd
+64,70,1.02,0,0,normal,normal,notpresent,notpresent,97,27,0.7,145,4.8,13.8,49,6400,4.8,no,no,no,good,no,no,notckd
+71,60,1.025,0,0,normal,normal,notpresent,notpresent,?,?,0.9,140,4.8,15.2,42,7700,5.5,no,no,no,good,no,no,notckd
+62,80,1.025,0,0,normal,normal,notpresent,notpresent,78,45,0.6,138,3.5,16.1,50,5400,5.7,no,no,no,good,no,no,notckd
+59,60,1.02,0,0,normal,normal,notpresent,notpresent,113,23,1.1,139,3.5,15.3,54,6500,4.9,no,no,no,good,no,no,notckd
+71,70,1.025,0,0,?,?,notpresent,notpresent,79,47,0.5,142,4.8,16.6,40,5800,5.9,no,no,no,good,no,no,notckd
+48,80,1.025,0,0,normal,normal,notpresent,notpresent,75,22,0.8,137,5,16.8,51,6000,6.5,no,no,no,good,no,no,notckd
+80,80,1.025,0,0,normal,normal,notpresent,notpresent,119,46,0.7,141,4.9,13.9,49,5100,5,no,no,no,good,no,no,notckd
+57,60,1.02,0,0,normal,normal,notpresent,notpresent,132,18,1.1,150,4.7,15.4,42,11000,4.5,no,no,no,good,no,no,notckd
+63,70,1.02,0,0,normal,normal,notpresent,notpresent,113,25,0.6,146,4.9,16.5,52,8000,5.1,no,no,no,good,no,no,notckd
+46,70,1.025,0,0,normal,normal,notpresent,notpresent,100,47,0.5,142,3.5,16.4,43,5700,6.5,no,no,no,good,no,no,notckd
+15,80,1.025,0,0,normal,normal,notpresent,notpresent,93,17,0.9,136,3.9,16.7,50,6200,5.2,no,no,no,good,no,no,notckd
+51,80,1.02,0,0,normal,normal,notpresent,notpresent,94,15,1.2,144,3.7,15.5,46,9500,6.4,no,no,no,good,no,no,notckd
+41,80,1.025,0,0,normal,normal,notpresent,notpresent,112,48,0.7,140,5,17,52,7200,5.8,no,no,no,good,no,no,notckd
+52,80,1.025,0,0,normal,normal,notpresent,notpresent,99,25,0.8,135,3.7,15,52,6300,5.3,no,no,no,good,no,no,notckd
+36,80,1.025,0,0,normal,normal,notpresent,notpresent,85,16,1.1,142,4.1,15.6,44,5800,6.3,no,no,no,good,no,no,notckd
+57,80,1.02,0,0,normal,normal,notpresent,notpresent,133,48,1.2,147,4.3,14.8,46,6600,5.5,no,no,no,good,no,no,notckd
+43,60,1.025,0,0,normal,normal,notpresent,notpresent,117,45,0.7,141,4.4,13,54,7400,5.4,no,no,no,good,no,no,notckd
+50,80,1.02,0,0,normal,normal,notpresent,notpresent,137,46,0.8,139,5,14.1,45,9500,4.6,no,no,no,good,no,no,notckd
+55,80,1.02,0,0,normal,normal,notpresent,notpresent,140,49,0.5,150,4.9,15.7,47,6700,4.9,no,no,no,good,no,no,notckd
+42,70,1.025,0,0,normal,normal,notpresent,notpresent,75,31,1.2,141,3.5,16.5,54,7800,6.2,no,no,no,good,no,no,notckd
+12,80,1.02,0,0,normal,normal,notpresent,notpresent,100,26,0.6,137,4.4,15.8,49,6600,5.4,no,no,no,good,no,no,notckd
+17,60,1.025,0,0,normal,normal,notpresent,notpresent,114,50,1,135,4.9,14.2,51,7200,5.9,no,no,no,good,no,no,notckd
+58,80,1.025,0,0,normal,normal,notpresent,notpresent,131,18,1.1,141,3.5,15.8,53,6800,6.1,no,no,no,good,no,no,notckd

data_cache/neurology_parkinsons.csv ADDED Viewed

	@@ -0,0 +1,196 @@

+name,MDVP:Fo(Hz),MDVP:Fhi(Hz),MDVP:Flo(Hz),MDVP:Jitter(%),MDVP:Jitter(Abs),MDVP:RAP,MDVP:PPQ,Jitter:DDP,MDVP:Shimmer,MDVP:Shimmer(dB),Shimmer:APQ3,Shimmer:APQ5,MDVP:APQ,Shimmer:DDA,NHR,HNR,status,RPDE,DFA,spread1,spread2,D2,PPE
+phon_R01_S01_1,119.99200,157.30200,74.99700,0.00784,0.00007,0.00370,0.00554,0.01109,0.04374,0.42600,0.02182,0.03130,0.02971,0.06545,0.02211,21.03300,1,0.414783,0.815285,-4.813031,0.266482,2.301442,0.284654
+phon_R01_S01_2,122.40000,148.65000,113.81900,0.00968,0.00008,0.00465,0.00696,0.01394,0.06134,0.62600,0.03134,0.04518,0.04368,0.09403,0.01929,19.08500,1,0.458359,0.819521,-4.075192,0.335590,2.486855,0.368674
+phon_R01_S01_3,116.68200,131.11100,111.55500,0.01050,0.00009,0.00544,0.00781,0.01633,0.05233,0.48200,0.02757,0.03858,0.03590,0.08270,0.01309,20.65100,1,0.429895,0.825288,-4.443179,0.311173,2.342259,0.332634
+phon_R01_S01_4,116.67600,137.87100,111.36600,0.00997,0.00009,0.00502,0.00698,0.01505,0.05492,0.51700,0.02924,0.04005,0.03772,0.08771,0.01353,20.64400,1,0.434969,0.819235,-4.117501,0.334147,2.405554,0.368975
+phon_R01_S01_5,116.01400,141.78100,110.65500,0.01284,0.00011,0.00655,0.00908,0.01966,0.06425,0.58400,0.03490,0.04825,0.04465,0.10470,0.01767,19.64900,1,0.417356,0.823484,-3.747787,0.234513,2.332180,0.410335
+phon_R01_S01_6,120.55200,131.16200,113.78700,0.00968,0.00008,0.00463,0.00750,0.01388,0.04701,0.45600,0.02328,0.03526,0.03243,0.06985,0.01222,21.37800,1,0.415564,0.825069,-4.242867,0.299111,2.187560,0.357775
+phon_R01_S02_1,120.26700,137.24400,114.82000,0.00333,0.00003,0.00155,0.00202,0.00466,0.01608,0.14000,0.00779,0.00937,0.01351,0.02337,0.00607,24.88600,1,0.596040,0.764112,-5.634322,0.257682,1.854785,0.211756
+phon_R01_S02_2,107.33200,113.84000,104.31500,0.00290,0.00003,0.00144,0.00182,0.00431,0.01567,0.13400,0.00829,0.00946,0.01256,0.02487,0.00344,26.89200,1,0.637420,0.763262,-6.167603,0.183721,2.064693,0.163755
+phon_R01_S02_3,95.73000,132.06800,91.75400,0.00551,0.00006,0.00293,0.00332,0.00880,0.02093,0.19100,0.01073,0.01277,0.01717,0.03218,0.01070,21.81200,1,0.615551,0.773587,-5.498678,0.327769,2.322511,0.231571
+phon_R01_S02_4,95.05600,120.10300,91.22600,0.00532,0.00006,0.00268,0.00332,0.00803,0.02838,0.25500,0.01441,0.01725,0.02444,0.04324,0.01022,21.86200,1,0.547037,0.798463,-5.011879,0.325996,2.432792,0.271362
+phon_R01_S02_5,88.33300,112.24000,84.07200,0.00505,0.00006,0.00254,0.00330,0.00763,0.02143,0.19700,0.01079,0.01342,0.01892,0.03237,0.01166,21.11800,1,0.611137,0.776156,-5.249770,0.391002,2.407313,0.249740
+phon_R01_S02_6,91.90400,115.87100,86.29200,0.00540,0.00006,0.00281,0.00336,0.00844,0.02752,0.24900,0.01424,0.01641,0.02214,0.04272,0.01141,21.41400,1,0.583390,0.792520,-4.960234,0.363566,2.642476,0.275931
+phon_R01_S04_1,136.92600,159.86600,131.27600,0.00293,0.00002,0.00118,0.00153,0.00355,0.01259,0.11200,0.00656,0.00717,0.01140,0.01968,0.00581,25.70300,1,0.460600,0.646846,-6.547148,0.152813,2.041277,0.138512
+phon_R01_S04_2,139.17300,179.13900,76.55600,0.00390,0.00003,0.00165,0.00208,0.00496,0.01642,0.15400,0.00728,0.00932,0.01797,0.02184,0.01041,24.88900,1,0.430166,0.665833,-5.660217,0.254989,2.519422,0.199889
+phon_R01_S04_3,152.84500,163.30500,75.83600,0.00294,0.00002,0.00121,0.00149,0.00364,0.01828,0.15800,0.01064,0.00972,0.01246,0.03191,0.00609,24.92200,1,0.474791,0.654027,-6.105098,0.203653,2.125618,0.170100
+phon_R01_S04_4,142.16700,217.45500,83.15900,0.00369,0.00003,0.00157,0.00203,0.00471,0.01503,0.12600,0.00772,0.00888,0.01359,0.02316,0.00839,25.17500,1,0.565924,0.658245,-5.340115,0.210185,2.205546,0.234589
+phon_R01_S04_5,144.18800,349.25900,82.76400,0.00544,0.00004,0.00211,0.00292,0.00632,0.02047,0.19200,0.00969,0.01200,0.02074,0.02908,0.01859,22.33300,1,0.567380,0.644692,-5.440040,0.239764,2.264501,0.218164
+phon_R01_S04_6,168.77800,232.18100,75.60300,0.00718,0.00004,0.00284,0.00387,0.00853,0.03327,0.34800,0.01441,0.01893,0.03430,0.04322,0.02919,20.37600,1,0.631099,0.605417,-2.931070,0.434326,3.007463,0.430788
+phon_R01_S05_1,153.04600,175.82900,68.62300,0.00742,0.00005,0.00364,0.00432,0.01092,0.05517,0.54200,0.02471,0.03572,0.05767,0.07413,0.03160,17.28000,1,0.665318,0.719467,-3.949079,0.357870,3.109010,0.377429
+phon_R01_S05_2,156.40500,189.39800,142.82200,0.00768,0.00005,0.00372,0.00399,0.01116,0.03995,0.34800,0.01721,0.02374,0.04310,0.05164,0.03365,17.15300,1,0.649554,0.686080,-4.554466,0.340176,2.856676,0.322111
+phon_R01_S05_3,153.84800,165.73800,65.78200,0.00840,0.00005,0.00428,0.00450,0.01285,0.03810,0.32800,0.01667,0.02383,0.04055,0.05000,0.03871,17.53600,1,0.660125,0.704087,-4.095442,0.262564,2.739710,0.365391
+phon_R01_S05_4,153.88000,172.86000,78.12800,0.00480,0.00003,0.00232,0.00267,0.00696,0.04137,0.37000,0.02021,0.02591,0.04525,0.06062,0.01849,19.49300,1,0.629017,0.698951,-5.186960,0.237622,2.557536,0.259765
+phon_R01_S05_5,167.93000,193.22100,79.06800,0.00442,0.00003,0.00220,0.00247,0.00661,0.04351,0.37700,0.02228,0.02540,0.04246,0.06685,0.01280,22.46800,1,0.619060,0.679834,-4.330956,0.262384,2.916777,0.285695
+phon_R01_S05_6,173.91700,192.73500,86.18000,0.00476,0.00003,0.00221,0.00258,0.00663,0.04192,0.36400,0.02187,0.02470,0.03772,0.06562,0.01840,20.42200,1,0.537264,0.686894,-5.248776,0.210279,2.547508,0.253556
+phon_R01_S06_1,163.65600,200.84100,76.77900,0.00742,0.00005,0.00380,0.00390,0.01140,0.01659,0.16400,0.00738,0.00948,0.01497,0.02214,0.01778,23.83100,1,0.397937,0.732479,-5.557447,0.220890,2.692176,0.215961
+phon_R01_S06_2,104.40000,206.00200,77.96800,0.00633,0.00006,0.00316,0.00375,0.00948,0.03767,0.38100,0.01732,0.02245,0.03780,0.05197,0.02887,22.06600,1,0.522746,0.737948,-5.571843,0.236853,2.846369,0.219514
+phon_R01_S06_3,171.04100,208.31300,75.50100,0.00455,0.00003,0.00250,0.00234,0.00750,0.01966,0.18600,0.00889,0.01169,0.01872,0.02666,0.01095,25.90800,1,0.418622,0.720916,-6.183590,0.226278,2.589702,0.147403
+phon_R01_S06_4,146.84500,208.70100,81.73700,0.00496,0.00003,0.00250,0.00275,0.00749,0.01919,0.19800,0.00883,0.01144,0.01826,0.02650,0.01328,25.11900,1,0.358773,0.726652,-6.271690,0.196102,2.314209,0.162999
+phon_R01_S06_5,155.35800,227.38300,80.05500,0.00310,0.00002,0.00159,0.00176,0.00476,0.01718,0.16100,0.00769,0.01012,0.01661,0.02307,0.00677,25.97000,1,0.470478,0.676258,-7.120925,0.279789,2.241742,0.108514
+phon_R01_S06_6,162.56800,198.34600,77.63000,0.00502,0.00003,0.00280,0.00253,0.00841,0.01791,0.16800,0.00793,0.01057,0.01799,0.02380,0.01170,25.67800,1,0.427785,0.723797,-6.635729,0.209866,1.957961,0.135242
+phon_R01_S07_1,197.07600,206.89600,192.05500,0.00289,0.00001,0.00166,0.00168,0.00498,0.01098,0.09700,0.00563,0.00680,0.00802,0.01689,0.00339,26.77500,0,0.422229,0.741367,-7.348300,0.177551,1.743867,0.085569
+phon_R01_S07_2,199.22800,209.51200,192.09100,0.00241,0.00001,0.00134,0.00138,0.00402,0.01015,0.08900,0.00504,0.00641,0.00762,0.01513,0.00167,30.94000,0,0.432439,0.742055,-7.682587,0.173319,2.103106,0.068501
+phon_R01_S07_3,198.38300,215.20300,193.10400,0.00212,0.00001,0.00113,0.00135,0.00339,0.01263,0.11100,0.00640,0.00825,0.00951,0.01919,0.00119,30.77500,0,0.465946,0.738703,-7.067931,0.175181,1.512275,0.096320
+phon_R01_S07_4,202.26600,211.60400,197.07900,0.00180,0.000009,0.00093,0.00107,0.00278,0.00954,0.08500,0.00469,0.00606,0.00719,0.01407,0.00072,32.68400,0,0.368535,0.742133,-7.695734,0.178540,1.544609,0.056141
+phon_R01_S07_5,203.18400,211.52600,196.16000,0.00178,0.000009,0.00094,0.00106,0.00283,0.00958,0.08500,0.00468,0.00610,0.00726,0.01403,0.00065,33.04700,0,0.340068,0.741899,-7.964984,0.163519,1.423287,0.044539
+phon_R01_S07_6,201.46400,210.56500,195.70800,0.00198,0.000010,0.00105,0.00115,0.00314,0.01194,0.10700,0.00586,0.00760,0.00957,0.01758,0.00135,31.73200,0,0.344252,0.742737,-7.777685,0.170183,2.447064,0.057610
+phon_R01_S08_1,177.87600,192.92100,168.01300,0.00411,0.00002,0.00233,0.00241,0.00700,0.02126,0.18900,0.01154,0.01347,0.01612,0.03463,0.00586,23.21600,1,0.360148,0.778834,-6.149653,0.218037,2.477082,0.165827
+phon_R01_S08_2,176.17000,185.60400,163.56400,0.00369,0.00002,0.00205,0.00218,0.00616,0.01851,0.16800,0.00938,0.01160,0.01491,0.02814,0.00340,24.95100,1,0.341435,0.783626,-6.006414,0.196371,2.536527,0.173218
+phon_R01_S08_3,180.19800,201.24900,175.45600,0.00284,0.00002,0.00153,0.00166,0.00459,0.01444,0.13100,0.00726,0.00885,0.01190,0.02177,0.00231,26.73800,1,0.403884,0.766209,-6.452058,0.212294,2.269398,0.141929
+phon_R01_S08_4,187.73300,202.32400,173.01500,0.00316,0.00002,0.00168,0.00182,0.00504,0.01663,0.15100,0.00829,0.01003,0.01366,0.02488,0.00265,26.31000,1,0.396793,0.758324,-6.006647,0.266892,2.382544,0.160691
+phon_R01_S08_5,186.16300,197.72400,177.58400,0.00298,0.00002,0.00165,0.00175,0.00496,0.01495,0.13500,0.00774,0.00941,0.01233,0.02321,0.00231,26.82200,1,0.326480,0.765623,-6.647379,0.201095,2.374073,0.130554
+phon_R01_S08_6,184.05500,196.53700,166.97700,0.00258,0.00001,0.00134,0.00147,0.00403,0.01463,0.13200,0.00742,0.00901,0.01234,0.02226,0.00257,26.45300,1,0.306443,0.759203,-7.044105,0.063412,2.361532,0.115730
+phon_R01_S10_1,237.22600,247.32600,225.22700,0.00298,0.00001,0.00169,0.00182,0.00507,0.01752,0.16400,0.01035,0.01024,0.01133,0.03104,0.00740,22.73600,0,0.305062,0.654172,-7.310550,0.098648,2.416838,0.095032
+phon_R01_S10_2,241.40400,248.83400,232.48300,0.00281,0.00001,0.00157,0.00173,0.00470,0.01760,0.15400,0.01006,0.01038,0.01251,0.03017,0.00675,23.14500,0,0.457702,0.634267,-6.793547,0.158266,2.256699,0.117399
+phon_R01_S10_3,243.43900,250.91200,232.43500,0.00210,0.000009,0.00109,0.00137,0.00327,0.01419,0.12600,0.00777,0.00898,0.01033,0.02330,0.00454,25.36800,0,0.438296,0.635285,-7.057869,0.091608,2.330716,0.091470
+phon_R01_S10_4,242.85200,255.03400,227.91100,0.00225,0.000009,0.00117,0.00139,0.00350,0.01494,0.13400,0.00847,0.00879,0.01014,0.02542,0.00476,25.03200,0,0.431285,0.638928,-6.995820,0.102083,2.365800,0.102706
+phon_R01_S10_5,245.51000,262.09000,231.84800,0.00235,0.000010,0.00127,0.00148,0.00380,0.01608,0.14100,0.00906,0.00977,0.01149,0.02719,0.00476,24.60200,0,0.467489,0.631653,-7.156076,0.127642,2.392122,0.097336
+phon_R01_S10_6,252.45500,261.48700,182.78600,0.00185,0.000007,0.00092,0.00113,0.00276,0.01152,0.10300,0.00614,0.00730,0.00860,0.01841,0.00432,26.80500,0,0.610367,0.635204,-7.319510,0.200873,2.028612,0.086398
+phon_R01_S13_1,122.18800,128.61100,115.76500,0.00524,0.00004,0.00169,0.00203,0.00507,0.01613,0.14300,0.00855,0.00776,0.01433,0.02566,0.00839,23.16200,0,0.579597,0.733659,-6.439398,0.266392,2.079922,0.133867
+phon_R01_S13_2,122.96400,130.04900,114.67600,0.00428,0.00003,0.00124,0.00155,0.00373,0.01681,0.15400,0.00930,0.00802,0.01400,0.02789,0.00462,24.97100,0,0.538688,0.754073,-6.482096,0.264967,2.054419,0.128872
+phon_R01_S13_3,124.44500,135.06900,117.49500,0.00431,0.00003,0.00141,0.00167,0.00422,0.02184,0.19700,0.01241,0.01024,0.01685,0.03724,0.00479,25.13500,0,0.553134,0.775933,-6.650471,0.254498,1.840198,0.103561
+phon_R01_S13_4,126.34400,134.23100,112.77300,0.00448,0.00004,0.00131,0.00169,0.00393,0.02033,0.18500,0.01143,0.00959,0.01614,0.03429,0.00474,25.03000,0,0.507504,0.760361,-6.689151,0.291954,2.431854,0.105993
+phon_R01_S13_5,128.00100,138.05200,122.08000,0.00436,0.00003,0.00137,0.00166,0.00411,0.02297,0.21000,0.01323,0.01072,0.01677,0.03969,0.00481,24.69200,0,0.459766,0.766204,-7.072419,0.220434,1.972297,0.119308
+phon_R01_S13_6,129.33600,139.86700,118.60400,0.00490,0.00004,0.00165,0.00183,0.00495,0.02498,0.22800,0.01396,0.01219,0.01947,0.04188,0.00484,25.42900,0,0.420383,0.785714,-6.836811,0.269866,2.223719,0.147491
+phon_R01_S16_1,108.80700,134.65600,102.87400,0.00761,0.00007,0.00349,0.00486,0.01046,0.02719,0.25500,0.01483,0.01609,0.02067,0.04450,0.01036,21.02800,1,0.536009,0.819032,-4.649573,0.205558,1.986899,0.316700
+phon_R01_S16_2,109.86000,126.35800,104.43700,0.00874,0.00008,0.00398,0.00539,0.01193,0.03209,0.30700,0.01789,0.01992,0.02454,0.05368,0.01180,20.76700,1,0.558586,0.811843,-4.333543,0.221727,2.014606,0.344834
+phon_R01_S16_3,110.41700,131.06700,103.37000,0.00784,0.00007,0.00352,0.00514,0.01056,0.03715,0.33400,0.02032,0.02302,0.02802,0.06097,0.00969,21.42200,1,0.541781,0.821364,-4.438453,0.238298,1.922940,0.335041
+phon_R01_S16_4,117.27400,129.91600,110.40200,0.00752,0.00006,0.00299,0.00469,0.00898,0.02293,0.22100,0.01189,0.01459,0.01948,0.03568,0.00681,22.81700,1,0.530529,0.817756,-4.608260,0.290024,2.021591,0.314464
+phon_R01_S16_5,116.87900,131.89700,108.15300,0.00788,0.00007,0.00334,0.00493,0.01003,0.02645,0.26500,0.01394,0.01625,0.02137,0.04183,0.00786,22.60300,1,0.540049,0.813432,-4.476755,0.262633,1.827012,0.326197
+phon_R01_S16_6,114.84700,271.31400,104.68000,0.00867,0.00008,0.00373,0.00520,0.01120,0.03225,0.35000,0.01805,0.01974,0.02519,0.05414,0.01143,21.66000,1,0.547975,0.817396,-4.609161,0.221711,1.831691,0.316395
+phon_R01_S17_1,209.14400,237.49400,109.37900,0.00282,0.00001,0.00147,0.00152,0.00442,0.01861,0.17000,0.00975,0.01258,0.01382,0.02925,0.00871,25.55400,0,0.341788,0.678874,-7.040508,0.066994,2.460791,0.101516
+phon_R01_S17_2,223.36500,238.98700,98.66400,0.00264,0.00001,0.00154,0.00151,0.00461,0.01906,0.16500,0.01013,0.01296,0.01340,0.03039,0.00301,26.13800,0,0.447979,0.686264,-7.293801,0.086372,2.321560,0.098555
+phon_R01_S17_3,222.23600,231.34500,205.49500,0.00266,0.00001,0.00152,0.00144,0.00457,0.01643,0.14500,0.00867,0.01108,0.01200,0.02602,0.00340,25.85600,0,0.364867,0.694399,-6.966321,0.095882,2.278687,0.103224
+phon_R01_S17_4,228.83200,234.61900,223.63400,0.00296,0.00001,0.00175,0.00155,0.00526,0.01644,0.14500,0.00882,0.01075,0.01179,0.02647,0.00351,25.96400,0,0.256570,0.683296,-7.245620,0.018689,2.498224,0.093534
+phon_R01_S17_5,229.40100,252.22100,221.15600,0.00205,0.000009,0.00114,0.00113,0.00342,0.01457,0.12900,0.00769,0.00957,0.01016,0.02308,0.00300,26.41500,0,0.276850,0.673636,-7.496264,0.056844,2.003032,0.073581
+phon_R01_S17_6,228.96900,239.54100,113.20100,0.00238,0.00001,0.00136,0.00140,0.00408,0.01745,0.15400,0.00942,0.01160,0.01234,0.02827,0.00420,24.54700,0,0.305429,0.681811,-7.314237,0.006274,2.118596,0.091546
+phon_R01_S18_1,140.34100,159.77400,67.02100,0.00817,0.00006,0.00430,0.00440,0.01289,0.03198,0.31300,0.01830,0.01810,0.02428,0.05490,0.02183,19.56000,1,0.460139,0.720908,-5.409423,0.226850,2.359973,0.226156
+phon_R01_S18_2,136.96900,166.60700,66.00400,0.00923,0.00007,0.00507,0.00463,0.01520,0.03111,0.30800,0.01638,0.01759,0.02603,0.04914,0.02659,19.97900,1,0.498133,0.729067,-5.324574,0.205660,2.291558,0.226247
+phon_R01_S18_3,143.53300,162.21500,65.80900,0.01101,0.00008,0.00647,0.00467,0.01941,0.05384,0.47800,0.03152,0.02422,0.03392,0.09455,0.04882,20.33800,1,0.513237,0.731444,-5.869750,0.151814,2.118496,0.185580
+phon_R01_S18_4,148.09000,162.82400,67.34300,0.00762,0.00005,0.00467,0.00354,0.01400,0.05428,0.49700,0.03357,0.02494,0.03635,0.10070,0.02431,21.71800,1,0.487407,0.727313,-6.261141,0.120956,2.137075,0.141958
+phon_R01_S18_5,142.72900,162.40800,65.47600,0.00831,0.00006,0.00469,0.00419,0.01407,0.03485,0.36500,0.01868,0.01906,0.02949,0.05605,0.02599,20.26400,1,0.489345,0.730387,-5.720868,0.158830,2.277927,0.180828
+phon_R01_S18_6,136.35800,176.59500,65.75000,0.00971,0.00007,0.00534,0.00478,0.01601,0.04978,0.48300,0.02749,0.02466,0.03736,0.08247,0.03361,18.57000,1,0.543299,0.733232,-5.207985,0.224852,2.642276,0.242981
+phon_R01_S19_1,120.08000,139.71000,111.20800,0.00405,0.00003,0.00180,0.00220,0.00540,0.01706,0.15200,0.00974,0.00925,0.01345,0.02921,0.00442,25.74200,1,0.495954,0.762959,-5.791820,0.329066,2.205024,0.188180
+phon_R01_S19_2,112.01400,588.51800,107.02400,0.00533,0.00005,0.00268,0.00329,0.00805,0.02448,0.22600,0.01373,0.01375,0.01956,0.04120,0.00623,24.17800,1,0.509127,0.789532,-5.389129,0.306636,1.928708,0.225461
+phon_R01_S19_3,110.79300,128.10100,107.31600,0.00494,0.00004,0.00260,0.00283,0.00780,0.02442,0.21600,0.01432,0.01325,0.01831,0.04295,0.00479,25.43800,1,0.437031,0.815908,-5.313360,0.201861,2.225815,0.244512
+phon_R01_S19_4,110.70700,122.61100,105.00700,0.00516,0.00005,0.00277,0.00289,0.00831,0.02215,0.20600,0.01284,0.01219,0.01715,0.03851,0.00472,25.19700,1,0.463514,0.807217,-5.477592,0.315074,1.862092,0.228624
+phon_R01_S19_5,112.87600,148.82600,106.98100,0.00500,0.00004,0.00270,0.00289,0.00810,0.03999,0.35000,0.02413,0.02231,0.02704,0.07238,0.00905,23.37000,1,0.489538,0.789977,-5.775966,0.341169,2.007923,0.193918
+phon_R01_S19_6,110.56800,125.39400,106.82100,0.00462,0.00004,0.00226,0.00280,0.00677,0.02199,0.19700,0.01284,0.01199,0.01636,0.03852,0.00420,25.82000,1,0.429484,0.816340,-5.391029,0.250572,1.777901,0.232744
+phon_R01_S20_1,95.38500,102.14500,90.26400,0.00608,0.00006,0.00331,0.00332,0.00994,0.03202,0.26300,0.01803,0.01886,0.02455,0.05408,0.01062,21.87500,1,0.644954,0.779612,-5.115212,0.249494,2.017753,0.260015
+phon_R01_S20_2,100.77000,115.69700,85.54500,0.01038,0.00010,0.00622,0.00576,0.01865,0.03121,0.36100,0.01773,0.01783,0.02139,0.05320,0.02220,19.20000,1,0.594387,0.790117,-4.913885,0.265699,2.398422,0.277948
+phon_R01_S20_3,96.10600,108.66400,84.51000,0.00694,0.00007,0.00389,0.00415,0.01168,0.04024,0.36400,0.02266,0.02451,0.02876,0.06799,0.01823,19.05500,1,0.544805,0.770466,-4.441519,0.155097,2.645959,0.327978
+phon_R01_S20_4,95.60500,107.71500,87.54900,0.00702,0.00007,0.00428,0.00371,0.01283,0.03156,0.29600,0.01792,0.01841,0.02190,0.05377,0.01825,19.65900,1,0.576084,0.778747,-5.132032,0.210458,2.232576,0.260633
+phon_R01_S20_5,100.96000,110.01900,95.62800,0.00606,0.00006,0.00351,0.00348,0.01053,0.02427,0.21600,0.01371,0.01421,0.01751,0.04114,0.01237,20.53600,1,0.554610,0.787896,-5.022288,0.146948,2.428306,0.264666
+phon_R01_S20_6,98.80400,102.30500,87.80400,0.00432,0.00004,0.00247,0.00258,0.00742,0.02223,0.20200,0.01277,0.01343,0.01552,0.03831,0.00882,22.24400,1,0.576644,0.772416,-6.025367,0.078202,2.053601,0.177275
+phon_R01_S21_1,176.85800,205.56000,75.34400,0.00747,0.00004,0.00418,0.00420,0.01254,0.04795,0.43500,0.02679,0.03022,0.03510,0.08037,0.05470,13.89300,1,0.556494,0.729586,-5.288912,0.343073,3.099301,0.242119
+phon_R01_S21_2,180.97800,200.12500,155.49500,0.00406,0.00002,0.00220,0.00244,0.00659,0.03852,0.33100,0.02107,0.02493,0.02877,0.06321,0.02782,16.17600,1,0.583574,0.727747,-5.657899,0.315903,3.098256,0.200423
+phon_R01_S21_3,178.22200,202.45000,141.04700,0.00321,0.00002,0.00163,0.00194,0.00488,0.03759,0.32700,0.02073,0.02415,0.02784,0.06219,0.03151,15.92400,1,0.598714,0.712199,-6.366916,0.335753,2.654271,0.144614
+phon_R01_S21_4,176.28100,227.38100,125.61000,0.00520,0.00003,0.00287,0.00312,0.00862,0.06511,0.58000,0.03671,0.04159,0.04683,0.11012,0.04824,13.92200,1,0.602874,0.740837,-5.515071,0.299549,3.136550,0.220968
+phon_R01_S21_5,173.89800,211.35000,74.67700,0.00448,0.00003,0.00237,0.00254,0.00710,0.06727,0.65000,0.03788,0.04254,0.04802,0.11363,0.04214,14.73900,1,0.599371,0.743937,-5.783272,0.299793,3.007096,0.194052
+phon_R01_S21_6,179.71100,225.93000,144.87800,0.00709,0.00004,0.00391,0.00419,0.01172,0.04313,0.44200,0.02297,0.02768,0.03455,0.06892,0.07223,11.86600,1,0.590951,0.745526,-4.379411,0.375531,3.671155,0.332086
+phon_R01_S21_7,166.60500,206.00800,78.03200,0.00742,0.00004,0.00387,0.00453,0.01161,0.06640,0.63400,0.03650,0.04282,0.05114,0.10949,0.08725,11.74400,1,0.653410,0.733165,-4.508984,0.389232,3.317586,0.301952
+phon_R01_S22_1,151.95500,163.33500,147.22600,0.00419,0.00003,0.00224,0.00227,0.00672,0.07959,0.77200,0.04421,0.04962,0.05690,0.13262,0.01658,19.66400,1,0.501037,0.714360,-6.411497,0.207156,2.344876,0.134120
+phon_R01_S22_2,148.27200,164.98900,142.29900,0.00459,0.00003,0.00250,0.00256,0.00750,0.04190,0.38300,0.02383,0.02521,0.03051,0.07150,0.01914,18.78000,1,0.454444,0.734504,-5.952058,0.087840,2.344336,0.186489
+phon_R01_S22_3,152.12500,161.46900,76.59600,0.00382,0.00003,0.00191,0.00226,0.00574,0.05925,0.63700,0.03341,0.03794,0.04398,0.10024,0.01211,20.96900,1,0.447456,0.697790,-6.152551,0.173520,2.080121,0.160809
+phon_R01_S22_4,157.82100,172.97500,68.40100,0.00358,0.00002,0.00196,0.00196,0.00587,0.03716,0.30700,0.02062,0.02321,0.02764,0.06185,0.00850,22.21900,1,0.502380,0.712170,-6.251425,0.188056,2.143851,0.160812
+phon_R01_S22_5,157.44700,163.26700,149.60500,0.00369,0.00002,0.00201,0.00197,0.00602,0.03272,0.28300,0.01813,0.01909,0.02571,0.05439,0.01018,21.69300,1,0.447285,0.705658,-6.247076,0.180528,2.344348,0.164916
+phon_R01_S22_6,159.11600,168.91300,144.81100,0.00342,0.00002,0.00178,0.00184,0.00535,0.03381,0.30700,0.01806,0.02024,0.02809,0.05417,0.00852,22.66300,1,0.366329,0.693429,-6.417440,0.194627,2.473239,0.151709
+phon_R01_S24_1,125.03600,143.94600,116.18700,0.01280,0.00010,0.00743,0.00623,0.02228,0.03886,0.34200,0.02135,0.02174,0.03088,0.06406,0.08151,15.33800,1,0.629574,0.714485,-4.020042,0.265315,2.671825,0.340623
+phon_R01_S24_2,125.79100,140.55700,96.20600,0.01378,0.00011,0.00826,0.00655,0.02478,0.04689,0.42200,0.02542,0.02630,0.03908,0.07625,0.10323,15.43300,1,0.571010,0.690892,-5.159169,0.202146,2.441612,0.260375
+phon_R01_S24_3,126.51200,141.75600,99.77000,0.01936,0.00015,0.01159,0.00990,0.03476,0.06734,0.65900,0.03611,0.03963,0.05783,0.10833,0.16744,12.43500,1,0.638545,0.674953,-3.760348,0.242861,2.634633,0.378483
+phon_R01_S24_4,125.64100,141.06800,116.34600,0.03316,0.00026,0.02144,0.01522,0.06433,0.09178,0.89100,0.05358,0.04791,0.06196,0.16074,0.31482,8.86700,1,0.671299,0.656846,-3.700544,0.260481,2.991063,0.370961
+phon_R01_S24_5,128.45100,150.44900,75.63200,0.01551,0.00012,0.00905,0.00909,0.02716,0.06170,0.58400,0.03223,0.03672,0.05174,0.09669,0.11843,15.06000,1,0.639808,0.643327,-4.202730,0.310163,2.638279,0.356881
+phon_R01_S24_6,139.22400,586.56700,66.15700,0.03011,0.00022,0.01854,0.01628,0.05563,0.09419,0.93000,0.05551,0.05005,0.06023,0.16654,0.25930,10.48900,1,0.596362,0.641418,-3.269487,0.270641,2.690917,0.444774
+phon_R01_S25_1,150.25800,154.60900,75.34900,0.00248,0.00002,0.00105,0.00136,0.00315,0.01131,0.10700,0.00522,0.00659,0.01009,0.01567,0.00495,26.75900,1,0.296888,0.722356,-6.878393,0.089267,2.004055,0.113942
+phon_R01_S25_2,154.00300,160.26700,128.62100,0.00183,0.00001,0.00076,0.00100,0.00229,0.01030,0.09400,0.00469,0.00582,0.00871,0.01406,0.00243,28.40900,1,0.263654,0.691483,-7.111576,0.144780,2.065477,0.093193
+phon_R01_S25_3,149.68900,160.36800,133.60800,0.00257,0.00002,0.00116,0.00134,0.00349,0.01346,0.12600,0.00660,0.00818,0.01059,0.01979,0.00578,27.42100,1,0.365488,0.719974,-6.997403,0.210279,1.994387,0.112878
+phon_R01_S25_4,155.07800,163.73600,144.14800,0.00168,0.00001,0.00068,0.00092,0.00204,0.01064,0.09700,0.00522,0.00632,0.00928,0.01567,0.00233,29.74600,1,0.334171,0.677930,-6.981201,0.184550,2.129924,0.106802
+phon_R01_S25_5,151.88400,157.76500,133.75100,0.00258,0.00002,0.00115,0.00122,0.00346,0.01450,0.13700,0.00633,0.00788,0.01267,0.01898,0.00659,26.83300,1,0.393563,0.700246,-6.600023,0.249172,2.499148,0.105306
+phon_R01_S25_6,151.98900,157.33900,132.85700,0.00174,0.00001,0.00075,0.00096,0.00225,0.01024,0.09300,0.00455,0.00576,0.00993,0.01364,0.00238,29.92800,1,0.311369,0.676066,-6.739151,0.160686,2.296873,0.115130
+phon_R01_S26_1,193.03000,208.90000,80.29700,0.00766,0.00004,0.00450,0.00389,0.01351,0.03044,0.27500,0.01771,0.01815,0.02084,0.05312,0.00947,21.93400,1,0.497554,0.740539,-5.845099,0.278679,2.608749,0.185668
+phon_R01_S26_2,200.71400,223.98200,89.68600,0.00621,0.00003,0.00371,0.00337,0.01112,0.02286,0.20700,0.01192,0.01439,0.01852,0.03576,0.00704,23.23900,1,0.436084,0.727863,-5.258320,0.256454,2.550961,0.232520
+phon_R01_S26_3,208.51900,220.31500,199.02000,0.00609,0.00003,0.00368,0.00339,0.01105,0.01761,0.15500,0.00952,0.01058,0.01307,0.02855,0.00830,22.40700,1,0.338097,0.712466,-6.471427,0.184378,2.502336,0.136390
+phon_R01_S26_4,204.66400,221.30000,189.62100,0.00841,0.00004,0.00502,0.00485,0.01506,0.02378,0.21000,0.01277,0.01483,0.01767,0.03831,0.01316,21.30500,1,0.498877,0.722085,-4.876336,0.212054,2.376749,0.268144
+phon_R01_S26_5,210.14100,232.70600,185.25800,0.00534,0.00003,0.00321,0.00280,0.00964,0.01680,0.14900,0.00861,0.01017,0.01301,0.02583,0.00620,23.67100,1,0.441097,0.722254,-5.963040,0.250283,2.489191,0.177807
+phon_R01_S26_6,206.32700,226.35500,92.02000,0.00495,0.00002,0.00302,0.00246,0.00905,0.02105,0.20900,0.01107,0.01284,0.01604,0.03320,0.01048,21.86400,1,0.331508,0.715121,-6.729713,0.181701,2.938114,0.115515
+phon_R01_S27_1,151.87200,492.89200,69.08500,0.00856,0.00006,0.00404,0.00385,0.01211,0.01843,0.23500,0.00796,0.00832,0.01271,0.02389,0.06051,23.69300,1,0.407701,0.662668,-4.673241,0.261549,2.702355,0.274407
+phon_R01_S27_2,158.21900,442.55700,71.94800,0.00476,0.00003,0.00214,0.00207,0.00642,0.01458,0.14800,0.00606,0.00747,0.01312,0.01818,0.01554,26.35600,1,0.450798,0.653823,-6.051233,0.273280,2.640798,0.170106
+phon_R01_S27_3,170.75600,450.24700,79.03200,0.00555,0.00003,0.00244,0.00261,0.00731,0.01725,0.17500,0.00757,0.00971,0.01652,0.02270,0.01802,25.69000,1,0.486738,0.676023,-4.597834,0.372114,2.975889,0.282780
+phon_R01_S27_4,178.28500,442.82400,82.06300,0.00462,0.00003,0.00157,0.00194,0.00472,0.01279,0.12900,0.00617,0.00744,0.01151,0.01851,0.00856,25.02000,1,0.470422,0.655239,-4.913137,0.393056,2.816781,0.251972
+phon_R01_S27_5,217.11600,233.48100,93.97800,0.00404,0.00002,0.00127,0.00128,0.00381,0.01299,0.12400,0.00679,0.00631,0.01075,0.02038,0.00681,24.58100,1,0.462516,0.582710,-5.517173,0.389295,2.925862,0.220657
+phon_R01_S27_6,128.94000,479.69700,88.25100,0.00581,0.00005,0.00241,0.00314,0.00723,0.02008,0.22100,0.00849,0.01117,0.01734,0.02548,0.02350,24.74300,1,0.487756,0.684130,-6.186128,0.279933,2.686240,0.152428
+phon_R01_S27_7,176.82400,215.29300,83.96100,0.00460,0.00003,0.00209,0.00221,0.00628,0.01169,0.11700,0.00534,0.00630,0.01104,0.01603,0.01161,27.16600,1,0.400088,0.656182,-4.711007,0.281618,2.655744,0.234809
+phon_R01_S31_1,138.19000,203.52200,83.34000,0.00704,0.00005,0.00406,0.00398,0.01218,0.04479,0.44100,0.02587,0.02567,0.03220,0.07761,0.01968,18.30500,1,0.538016,0.741480,-5.418787,0.160267,2.090438,0.229892
+phon_R01_S31_2,182.01800,197.17300,79.18700,0.00842,0.00005,0.00506,0.00449,0.01517,0.02503,0.23100,0.01372,0.01580,0.01931,0.04115,0.01813,18.78400,1,0.589956,0.732903,-5.445140,0.142466,2.174306,0.215558
+phon_R01_S31_3,156.23900,195.10700,79.82000,0.00694,0.00004,0.00403,0.00395,0.01209,0.02343,0.22400,0.01289,0.01420,0.01720,0.03867,0.02020,19.19600,1,0.618663,0.728421,-5.944191,0.143359,1.929715,0.181988
+phon_R01_S31_4,145.17400,198.10900,80.63700,0.00733,0.00005,0.00414,0.00422,0.01242,0.02362,0.23300,0.01235,0.01495,0.01944,0.03706,0.01874,18.85700,1,0.637518,0.735546,-5.594275,0.127950,1.765957,0.222716
+phon_R01_S31_5,138.14500,197.23800,81.11400,0.00544,0.00004,0.00294,0.00327,0.00883,0.02791,0.24600,0.01484,0.01805,0.02259,0.04451,0.01794,18.17800,1,0.623209,0.738245,-5.540351,0.087165,1.821297,0.214075
+phon_R01_S31_6,166.88800,198.96600,79.51200,0.00638,0.00004,0.00368,0.00351,0.01104,0.02857,0.25700,0.01547,0.01859,0.02301,0.04641,0.01796,18.33000,1,0.585169,0.736964,-5.825257,0.115697,1.996146,0.196535
+phon_R01_S32_1,119.03100,127.53300,109.21600,0.00440,0.00004,0.00214,0.00192,0.00641,0.01033,0.09800,0.00538,0.00570,0.00811,0.01614,0.01724,26.84200,1,0.457541,0.699787,-6.890021,0.152941,2.328513,0.112856
+phon_R01_S32_2,120.07800,126.63200,105.66700,0.00270,0.00002,0.00116,0.00135,0.00349,0.01022,0.09000,0.00476,0.00588,0.00903,0.01428,0.00487,26.36900,1,0.491345,0.718839,-5.892061,0.195976,2.108873,0.183572
+phon_R01_S32_3,120.28900,128.14300,100.20900,0.00492,0.00004,0.00269,0.00238,0.00808,0.01412,0.12500,0.00703,0.00820,0.01194,0.02110,0.01610,23.94900,1,0.467160,0.724045,-6.135296,0.203630,2.539724,0.169923
+phon_R01_S32_4,120.25600,125.30600,104.77300,0.00407,0.00003,0.00224,0.00205,0.00671,0.01516,0.13800,0.00721,0.00815,0.01310,0.02164,0.01015,26.01700,1,0.468621,0.735136,-6.112667,0.217013,2.527742,0.170633
+phon_R01_S32_5,119.05600,125.21300,86.79500,0.00346,0.00003,0.00169,0.00170,0.00508,0.01201,0.10600,0.00633,0.00701,0.00915,0.01898,0.00903,23.38900,1,0.470972,0.721308,-5.436135,0.254909,2.516320,0.232209
+phon_R01_S32_6,118.74700,123.72300,109.83600,0.00331,0.00003,0.00168,0.00171,0.00504,0.01043,0.09900,0.00490,0.00621,0.00903,0.01471,0.00504,25.61900,1,0.482296,0.723096,-6.448134,0.178713,2.034827,0.141422
+phon_R01_S33_1,106.51600,112.77700,93.10500,0.00589,0.00006,0.00291,0.00319,0.00873,0.04932,0.44100,0.02683,0.03112,0.03651,0.08050,0.03031,17.06000,1,0.637814,0.744064,-5.301321,0.320385,2.375138,0.243080
+phon_R01_S33_2,110.45300,127.61100,105.55400,0.00494,0.00004,0.00244,0.00315,0.00731,0.04128,0.37900,0.02229,0.02592,0.03316,0.06688,0.02529,17.70700,1,0.653427,0.706687,-5.333619,0.322044,2.631793,0.228319
+phon_R01_S33_3,113.40000,133.34400,107.81600,0.00451,0.00004,0.00219,0.00283,0.00658,0.04879,0.43100,0.02385,0.02973,0.04370,0.07154,0.02278,19.01300,1,0.647900,0.708144,-4.378916,0.300067,2.445502,0.259451
+phon_R01_S33_4,113.16600,130.27000,100.67300,0.00502,0.00004,0.00257,0.00312,0.00772,0.05279,0.47600,0.02896,0.03347,0.04134,0.08689,0.03690,16.74700,1,0.625362,0.708617,-4.654894,0.304107,2.672362,0.274387
+phon_R01_S33_5,112.23900,126.60900,104.09500,0.00472,0.00004,0.00238,0.00290,0.00715,0.05643,0.51700,0.03070,0.03530,0.04451,0.09211,0.02629,17.36600,1,0.640945,0.701404,-5.634576,0.306014,2.419253,0.209191
+phon_R01_S33_6,116.15000,131.73100,109.81500,0.00381,0.00003,0.00181,0.00232,0.00542,0.03026,0.26700,0.01514,0.01812,0.02770,0.04543,0.01827,18.80100,1,0.624811,0.696049,-5.866357,0.233070,2.445646,0.184985
+phon_R01_S34_1,170.36800,268.79600,79.54300,0.00571,0.00003,0.00232,0.00269,0.00696,0.03273,0.28100,0.01713,0.01964,0.02824,0.05139,0.02485,18.54000,1,0.677131,0.685057,-4.796845,0.397749,2.963799,0.277227
+phon_R01_S34_2,208.08300,253.79200,91.80200,0.00757,0.00004,0.00428,0.00428,0.01285,0.06725,0.57100,0.04016,0.04003,0.04464,0.12047,0.04238,15.64800,1,0.606344,0.665945,-5.410336,0.288917,2.665133,0.231723
+phon_R01_S34_3,198.45800,219.29000,148.69100,0.00376,0.00002,0.00182,0.00215,0.00546,0.03527,0.29700,0.02055,0.02076,0.02530,0.06165,0.01728,18.70200,1,0.606273,0.661735,-5.585259,0.310746,2.465528,0.209863
+phon_R01_S34_4,202.80500,231.50800,86.23200,0.00370,0.00002,0.00189,0.00211,0.00568,0.01997,0.18000,0.01117,0.01177,0.01506,0.03350,0.02010,18.68700,1,0.536102,0.632631,-5.898673,0.213353,2.470746,0.189032
+phon_R01_S34_5,202.54400,241.35000,164.16800,0.00254,0.00001,0.00100,0.00133,0.00301,0.02662,0.22800,0.01475,0.01558,0.02006,0.04426,0.01049,20.68000,1,0.497480,0.630409,-6.132663,0.220617,2.576563,0.159777
+phon_R01_S34_6,223.36100,263.87200,87.63800,0.00352,0.00002,0.00169,0.00188,0.00506,0.02536,0.22500,0.01379,0.01478,0.01909,0.04137,0.01493,20.36600,1,0.566849,0.574282,-5.456811,0.345238,2.840556,0.232861
+phon_R01_S35_1,169.77400,191.75900,151.45100,0.01568,0.00009,0.00863,0.00946,0.02589,0.08143,0.82100,0.03804,0.05426,0.08808,0.11411,0.07530,12.35900,1,0.561610,0.793509,-3.297668,0.414758,3.413649,0.457533
+phon_R01_S35_2,183.52000,216.81400,161.34000,0.01466,0.00008,0.00849,0.00819,0.02546,0.06050,0.61800,0.02865,0.04101,0.06359,0.08595,0.06057,14.36700,1,0.478024,0.768974,-4.276605,0.355736,3.142364,0.336085
+phon_R01_S35_3,188.62000,216.30200,165.98200,0.01719,0.00009,0.00996,0.01027,0.02987,0.07118,0.72200,0.03474,0.04580,0.06824,0.10422,0.08069,12.29800,1,0.552870,0.764036,-3.377325,0.335357,3.274865,0.418646
+phon_R01_S35_4,202.63200,565.74000,177.25800,0.01627,0.00008,0.00919,0.00963,0.02756,0.07170,0.83300,0.03515,0.04265,0.06460,0.10546,0.07889,14.98900,1,0.427627,0.775708,-4.892495,0.262281,2.910213,0.270173
+phon_R01_S35_5,186.69500,211.96100,149.44200,0.01872,0.00010,0.01075,0.01154,0.03225,0.05830,0.78400,0.02699,0.03714,0.06259,0.08096,0.10952,12.52900,1,0.507826,0.762726,-4.484303,0.340256,2.958815,0.301487
+phon_R01_S35_6,192.81800,224.42900,168.79300,0.03107,0.00016,0.01800,0.01958,0.05401,0.11908,1.30200,0.05647,0.07940,0.13778,0.16942,0.21713,8.44100,1,0.625866,0.768320,-2.434031,0.450493,3.079221,0.527367
+phon_R01_S35_7,198.11600,233.09900,174.47800,0.02714,0.00014,0.01568,0.01699,0.04705,0.08684,1.01800,0.04284,0.05556,0.08318,0.12851,0.16265,9.44900,1,0.584164,0.754449,-2.839756,0.356224,3.184027,0.454721
+phon_R01_S37_1,121.34500,139.64400,98.25000,0.00684,0.00006,0.00388,0.00332,0.01164,0.02534,0.24100,0.01340,0.01399,0.02056,0.04019,0.04179,21.52000,1,0.566867,0.670475,-4.865194,0.246404,2.013530,0.168581
+phon_R01_S37_2,119.10000,128.44200,88.83300,0.00692,0.00006,0.00393,0.00300,0.01179,0.02682,0.23600,0.01484,0.01405,0.02018,0.04451,0.04611,21.82400,1,0.651680,0.659333,-4.239028,0.175691,2.451130,0.247455
+phon_R01_S37_3,117.87000,127.34900,95.65400,0.00647,0.00005,0.00356,0.00300,0.01067,0.03087,0.27600,0.01659,0.01804,0.02402,0.04977,0.02631,22.43100,1,0.628300,0.652025,-3.583722,0.207914,2.439597,0.206256
+phon_R01_S37_4,122.33600,142.36900,94.79400,0.00727,0.00006,0.00415,0.00339,0.01246,0.02293,0.22300,0.01205,0.01289,0.01771,0.03615,0.03191,22.95300,1,0.611679,0.623731,-5.435100,0.230532,2.699645,0.220546
+phon_R01_S37_5,117.96300,134.20900,100.75700,0.01813,0.00015,0.01117,0.00718,0.03351,0.04912,0.43800,0.02610,0.02161,0.02916,0.07830,0.10748,19.07500,1,0.630547,0.646786,-3.444478,0.303214,2.964568,0.261305
+phon_R01_S37_6,126.14400,154.28400,97.54300,0.00975,0.00008,0.00593,0.00454,0.01778,0.02852,0.26600,0.01500,0.01581,0.02157,0.04499,0.03828,21.53400,1,0.635015,0.627337,-5.070096,0.280091,2.892300,0.249703
+phon_R01_S39_1,127.93000,138.75200,112.17300,0.00605,0.00005,0.00321,0.00318,0.00962,0.03235,0.33900,0.01360,0.01650,0.03105,0.04079,0.02663,19.65100,1,0.654945,0.675865,-5.498456,0.234196,2.103014,0.216638
+phon_R01_S39_2,114.23800,124.39300,77.02200,0.00581,0.00005,0.00299,0.00316,0.00896,0.04009,0.40600,0.01579,0.01994,0.04114,0.04736,0.02073,20.43700,1,0.653139,0.694571,-5.185987,0.259229,2.151121,0.244948
+phon_R01_S39_3,115.32200,135.73800,107.80200,0.00619,0.00005,0.00352,0.00329,0.01057,0.03273,0.32500,0.01644,0.01722,0.02931,0.04933,0.02810,19.38800,1,0.577802,0.684373,-5.283009,0.226528,2.442906,0.238281
+phon_R01_S39_4,114.55400,126.77800,91.12100,0.00651,0.00006,0.00366,0.00340,0.01097,0.03658,0.36900,0.01864,0.01940,0.03091,0.05592,0.02707,18.95400,1,0.685151,0.719576,-5.529833,0.242750,2.408689,0.220520
+phon_R01_S39_5,112.15000,131.66900,97.52700,0.00519,0.00005,0.00291,0.00284,0.00873,0.01756,0.15500,0.00967,0.01033,0.01363,0.02902,0.01435,21.21900,1,0.557045,0.673086,-5.617124,0.184896,1.871871,0.212386
+phon_R01_S39_6,102.27300,142.83000,85.90200,0.00907,0.00009,0.00493,0.00461,0.01480,0.02814,0.27200,0.01579,0.01553,0.02073,0.04736,0.03882,18.44700,1,0.671378,0.674562,-2.929379,0.396746,2.560422,0.367233
+phon_R01_S42_1,236.20000,244.66300,102.13700,0.00277,0.00001,0.00154,0.00153,0.00462,0.02448,0.21700,0.01410,0.01426,0.01621,0.04231,0.00620,24.07800,0,0.469928,0.628232,-6.816086,0.172270,2.235197,0.119652
+phon_R01_S42_2,237.32300,243.70900,229.25600,0.00303,0.00001,0.00173,0.00159,0.00519,0.01242,0.11600,0.00696,0.00747,0.00882,0.02089,0.00533,24.67900,0,0.384868,0.626710,-7.018057,0.176316,1.852402,0.091604
+phon_R01_S42_3,260.10500,264.91900,237.30300,0.00339,0.00001,0.00205,0.00186,0.00616,0.02030,0.19700,0.01186,0.01230,0.01367,0.03557,0.00910,21.08300,0,0.440988,0.628058,-7.517934,0.160414,1.881767,0.075587
+phon_R01_S42_4,197.56900,217.62700,90.79400,0.00803,0.00004,0.00490,0.00448,0.01470,0.02177,0.18900,0.01279,0.01272,0.01439,0.03836,0.01337,19.26900,0,0.372222,0.725216,-5.736781,0.164529,2.882450,0.202879
+phon_R01_S42_5,240.30100,245.13500,219.78300,0.00517,0.00002,0.00316,0.00283,0.00949,0.02018,0.21200,0.01176,0.01191,0.01344,0.03529,0.00965,21.02000,0,0.371837,0.646167,-7.169701,0.073298,2.266432,0.100881
+phon_R01_S42_6,244.99000,272.21000,239.17000,0.00451,0.00002,0.00279,0.00237,0.00837,0.01897,0.18100,0.01084,0.01121,0.01255,0.03253,0.01049,21.52800,0,0.522812,0.646818,-7.304500,0.171088,2.095237,0.096220
+phon_R01_S43_1,112.54700,133.37400,105.71500,0.00355,0.00003,0.00166,0.00190,0.00499,0.01358,0.12900,0.00664,0.00786,0.01140,0.01992,0.00435,26.43600,0,0.413295,0.756700,-6.323531,0.218885,2.193412,0.160376
+phon_R01_S43_2,110.73900,113.59700,100.13900,0.00356,0.00003,0.00170,0.00200,0.00510,0.01484,0.13300,0.00754,0.00950,0.01285,0.02261,0.00430,26.55000,0,0.369090,0.776158,-6.085567,0.192375,1.889002,0.174152
+phon_R01_S43_3,113.71500,116.44300,96.91300,0.00349,0.00003,0.00171,0.00203,0.00514,0.01472,0.13300,0.00748,0.00905,0.01148,0.02245,0.00478,26.54700,0,0.380253,0.766700,-5.943501,0.192150,1.852542,0.179677
+phon_R01_S43_4,117.00400,144.46600,99.92300,0.00353,0.00003,0.00176,0.00218,0.00528,0.01657,0.14500,0.00881,0.01062,0.01318,0.02643,0.00590,25.44500,0,0.387482,0.756482,-6.012559,0.229298,1.872946,0.163118
+phon_R01_S43_5,115.38000,123.10900,108.63400,0.00332,0.00003,0.00160,0.00199,0.00480,0.01503,0.13700,0.00812,0.00933,0.01133,0.02436,0.00401,26.00500,0,0.405991,0.761255,-5.966779,0.197938,1.974857,0.184067
+phon_R01_S43_6,116.38800,129.03800,108.97000,0.00346,0.00003,0.00169,0.00213,0.00507,0.01725,0.15500,0.00874,0.01021,0.01331,0.02623,0.00415,26.14300,0,0.361232,0.763242,-6.016891,0.109256,2.004719,0.174429
+phon_R01_S44_1,151.73700,190.20400,129.85900,0.00314,0.00002,0.00135,0.00162,0.00406,0.01469,0.13200,0.00728,0.00886,0.01230,0.02184,0.00570,24.15100,1,0.396610,0.745957,-6.486822,0.197919,2.449763,0.132703
+phon_R01_S44_2,148.79000,158.35900,138.99000,0.00309,0.00002,0.00152,0.00186,0.00456,0.01574,0.14200,0.00839,0.00956,0.01309,0.02518,0.00488,24.41200,1,0.402591,0.762508,-6.311987,0.182459,2.251553,0.160306
+phon_R01_S44_3,148.14300,155.98200,135.04100,0.00392,0.00003,0.00204,0.00231,0.00612,0.01450,0.13100,0.00725,0.00876,0.01263,0.02175,0.00540,23.68300,1,0.398499,0.778349,-5.711205,0.240875,2.845109,0.192730
+phon_R01_S44_4,150.44000,163.44100,144.73600,0.00396,0.00003,0.00206,0.00233,0.00619,0.02551,0.23700,0.01321,0.01574,0.02148,0.03964,0.00611,23.13300,1,0.352396,0.759320,-6.261446,0.183218,2.264226,0.144105
+phon_R01_S44_5,148.46200,161.07800,141.99800,0.00397,0.00003,0.00202,0.00235,0.00605,0.01831,0.16300,0.00950,0.01103,0.01559,0.02849,0.00639,22.86600,1,0.408598,0.768845,-5.704053,0.216204,2.679185,0.197710
+phon_R01_S44_6,149.81800,163.41700,144.78600,0.00336,0.00002,0.00174,0.00198,0.00521,0.02145,0.19800,0.01155,0.01341,0.01666,0.03464,0.00595,23.00800,1,0.329577,0.757180,-6.277170,0.109397,2.209021,0.156368
+phon_R01_S49_1,117.22600,123.92500,106.65600,0.00417,0.00004,0.00186,0.00270,0.00558,0.01909,0.17100,0.00864,0.01223,0.01949,0.02592,0.00955,23.07900,0,0.603515,0.669565,-5.619070,0.191576,2.027228,0.215724
+phon_R01_S49_2,116.84800,217.55200,99.50300,0.00531,0.00005,0.00260,0.00346,0.00780,0.01795,0.16300,0.00810,0.01144,0.01756,0.02429,0.01179,22.08500,0,0.663842,0.656516,-5.198864,0.206768,2.120412,0.252404
+phon_R01_S49_3,116.28600,177.29100,96.98300,0.00314,0.00003,0.00134,0.00192,0.00403,0.01564,0.13600,0.00667,0.00990,0.01691,0.02001,0.00737,24.19900,0,0.598515,0.654331,-5.592584,0.133917,2.058658,0.214346
+phon_R01_S49_4,116.55600,592.03000,86.22800,0.00496,0.00004,0.00254,0.00263,0.00762,0.01660,0.15400,0.00820,0.00972,0.01491,0.02460,0.01397,23.95800,0,0.566424,0.667654,-6.431119,0.153310,2.161936,0.120605
+phon_R01_S49_5,116.34200,581.28900,94.24600,0.00267,0.00002,0.00115,0.00148,0.00345,0.01300,0.11700,0.00631,0.00789,0.01144,0.01892,0.00680,25.02300,0,0.528485,0.663884,-6.359018,0.116636,2.152083,0.138868
+phon_R01_S49_6,114.56300,119.16700,86.64700,0.00327,0.00003,0.00146,0.00184,0.00439,0.01185,0.10600,0.00557,0.00721,0.01095,0.01672,0.00703,24.77500,0,0.555303,0.659132,-6.710219,0.149694,1.913990,0.121777
+phon_R01_S50_1,201.77400,262.70700,78.22800,0.00694,0.00003,0.00412,0.00396,0.01235,0.02574,0.25500,0.01454,0.01582,0.01758,0.04363,0.04441,19.36800,0,0.508479,0.683761,-6.934474,0.159890,2.316346,0.112838
+phon_R01_S50_2,174.18800,230.97800,94.26100,0.00459,0.00003,0.00263,0.00259,0.00790,0.04087,0.40500,0.02336,0.02498,0.02745,0.07008,0.02764,19.51700,0,0.448439,0.657899,-6.538586,0.121952,2.657476,0.133050
+phon_R01_S50_3,209.51600,253.01700,89.48800,0.00564,0.00003,0.00331,0.00292,0.00994,0.02751,0.26300,0.01604,0.01657,0.01879,0.04812,0.01810,19.14700,0,0.431674,0.683244,-6.195325,0.129303,2.784312,0.168895
+phon_R01_S50_4,174.68800,240.00500,74.28700,0.01360,0.00008,0.00624,0.00564,0.01873,0.02308,0.25600,0.01268,0.01365,0.01667,0.03804,0.10715,17.88300,0,0.407567,0.655683,-6.787197,0.158453,2.679772,0.131728
+phon_R01_S50_5,198.76400,396.96100,74.90400,0.00740,0.00004,0.00370,0.00390,0.01109,0.02296,0.24100,0.01265,0.01321,0.01588,0.03794,0.07223,19.02000,0,0.451221,0.643956,-6.744577,0.207454,2.138608,0.123306
+phon_R01_S50_6,214.28900,260.27700,77.97300,0.00567,0.00003,0.00295,0.00317,0.00885,0.01884,0.19000,0.01026,0.01161,0.01373,0.03078,0.04398,21.20900,0,0.462803,0.664357,-5.724056,0.190667,2.555477,0.148569

data_cache/obstetrics_fetal.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

data_cache/oncology_cervical.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

data_cache/ophthalmology.arff ADDED Viewed

The diff for this file is too large to render. See raw diff

data_cache/orthopaedics.arff ADDED Viewed

	@@ -0,0 +1,322 @@

+@relation column_2C_weka
+@attribute pelvic_incidence numeric
+@attribute pelvic_tilt numeric
+@attribute lumbar_lordosis_angle numeric
+@attribute sacral_slope numeric
+@attribute pelvic_radius numeric
+@attribute degree_spondylolisthesis numeric
+@attribute class {Abnormal, Normal}
+@data
+63.0278175,22.55258597,39.60911701,40.47523153,98.67291675,-0.254399986,Abnormal
+39.05695098,10.06099147,25.01537822,28.99595951,114.4054254,4.564258645,Abnormal
+68.83202098,22.21848205,50.09219357,46.61353893,105.9851355,-3.530317314,Abnormal
+69.29700807,24.65287791,44.31123813,44.64413017,101.8684951,11.21152344,Abnormal
+49.71285934,9.652074879,28.317406,40.06078446,108.1687249,7.918500615,Abnormal
+40.25019968,13.92190658,25.1249496,26.32829311,130.3278713,2.230651729,Abnormal
+53.43292815,15.86433612,37.16593387,37.56859203,120.5675233,5.988550702,Abnormal
+45.36675362,10.75561143,29.03834896,34.61114218,117.2700675,-10.67587083,Abnormal
+43.79019026,13.5337531,42.69081398,30.25643716,125.0028927,13.28901817,Abnormal
+36.68635286,5.010884121,41.9487509,31.67546874,84.24141517,0.664437117,Abnormal
+49.70660953,13.04097405,31.33450009,36.66563548,108.6482654,-7.825985755,Abnormal
+31.23238734,17.71581923,15.5,13.51656811,120.0553988,0.499751446,Abnormal
+48.91555137,19.96455616,40.26379358,28.95099521,119.321358,8.028894629,Abnormal
+53.5721702,20.46082824,33.1,33.11134196,110.9666978,7.044802938,Abnormal
+57.30022656,24.1888846,46.99999999,33.11134196,116.8065868,5.766946943,Abnormal
+44.31890674,12.53799164,36.098763,31.78091509,124.1158358,5.415825143,Abnormal
+63.83498162,20.36250706,54.55243367,43.47247456,112.3094915,-0.622526643,Abnormal
+31.27601184,3.14466948,32.56299592,28.13134236,129.0114183,3.623020073,Abnormal
+38.69791243,13.44474904,31,25.25316339,123.1592507,1.429185758,Abnormal
+41.72996308,12.25407408,30.12258646,29.475889,116.5857056,-1.244402488,Abnormal
+43.92283983,14.17795853,37.8325467,29.7448813,134.4610156,6.451647637,Abnormal
+54.91944259,21.06233245,42.19999999,33.85711014,125.2127163,2.432561437,Abnormal
+63.07361096,24.41380271,53.99999999,38.65980825,106.4243295,15.77969683,Abnormal
+45.54078988,13.06959759,30.29832059,32.47119229,117.9808303,-4.987129618,Abnormal
+36.12568347,22.75875277,29,13.3669307,115.5771163,-3.237562489,Abnormal
+54.12492019,26.65048856,35.32974693,27.47443163,121.447011,1.571204816,Abnormal
+26.14792141,10.75945357,14,15.38846783,125.2032956,-10.09310817,Abnormal
+43.58096394,16.5088837,46.99999999,27.07208024,109.271634,8.992815727,Abnormal
+44.5510115,21.93114655,26.78591597,22.61986495,111.0729197,2.652320636,Abnormal
+66.87921138,24.89199889,49.27859673,41.9872125,113.4770183,-2.005891748,Abnormal
+50.81926781,15.40221253,42.52893886,35.41705528,112.192804,10.86956554,Abnormal
+46.39026008,11.07904664,32.13655345,35.31121344,98.77454633,6.386831648,Abnormal
+44.93667457,17.44383762,27.78057555,27.49283695,117.9803245,5.569619587,Abnormal
+38.66325708,12.98644139,39.99999999,25.67681568,124.914118,2.703008052,Abnormal
+59.59554032,31.99824445,46.56025198,27.59729587,119.3303537,1.474285836,Abnormal
+31.48421834,7.82622134,24.28481815,23.657997,113.8331446,4.393080498,Abnormal
+32.09098679,6.989378081,35.99819848,25.10160871,132.264735,6.413427708,Abnormal
+35.70345781,19.44325311,20.7,16.26020471,137.5406125,-0.263489651,Abnormal
+55.84328595,28.84744756,47.69054322,26.99583839,123.3118449,2.812426855,Abnormal
+52.41938511,19.01156052,35.87265953,33.40782459,116.5597709,1.694705102,Abnormal
+35.49244617,11.7016723,15.59036345,23.79077387,106.9388517,-3.460357991,Abnormal
+46.44207842,8.39503589,29.0372302,38.04704253,115.4814047,2.045475795,Abnormal
+53.85479842,19.23064334,32.77905978,34.62415508,121.6709148,5.329843204,Abnormal
+66.28539377,26.32784484,47.49999999,39.95754893,121.2196839,-0.799624469,Abnormal
+56.03021778,16.2979149,62.27527456,39.73230287,114.0231172,-2.325683841,Abnormal
+50.91244034,23.01516931,46.99999999,27.89727103,117.4222591,-2.526701511,Abnormal
+48.332638,22.22778399,36.18199318,26.10485401,117.3846251,6.481709096,Abnormal
+41.35250407,16.57736351,30.70619135,24.77514057,113.2666746,-4.497957556,Abnormal
+40.55735663,17.97778407,34,22.57957256,121.0462458,-1.537383074,Abnormal
+41.76773173,17.89940172,20.0308863,23.86833001,118.3633889,2.062962549,Abnormal
+55.28585178,20.44011836,34,34.84573342,115.8770174,3.558372358,Abnormal
+74.43359316,41.55733141,27.7,32.87626175,107.9493045,5.000088788,Abnormal
+50.20966979,29.76012218,36.10400731,20.44954761,128.2925148,5.740614083,Abnormal
+30.14993632,11.91744524,34,18.23249108,112.6841408,11.46322327,Abnormal
+41.17167989,17.32120599,33.46940277,23.85047391,116.3778894,-9.569249858,Abnormal
+47.65772963,13.27738491,36.67998541,34.38034472,98.24978071,6.273012173,Abnormal
+43.34960621,7.467468964,28.06548279,35.88213725,112.7761866,5.753277458,Abnormal
+46.85578065,15.35151393,38,31.50426672,116.2509174,1.662705589,Abnormal
+43.20318499,19.66314572,35,23.54003927,124.8461088,-2.919075955,Abnormal
+48.10923638,14.93072472,35.56468278,33.17851166,124.0564518,7.947904861,Abnormal
+74.37767772,32.05310438,78.77201304,42.32457334,143.5606905,56.12590603,Abnormal
+89.68056731,32.70443487,83.13073216,56.97613244,129.9554764,92.02727682,Abnormal
+44.529051,9.433234213,51.99999999,35.09581679,134.7117723,29.10657504,Abnormal
+77.69057712,21.38064464,64.42944191,56.30993248,114.818751,26.93184095,Abnormal
+76.1472121,21.93618556,82.96150249,54.21102654,123.9320096,10.43197194,Abnormal
+83.93300857,41.28630543,61.99999999,42.64670314,115.012334,26.58810016,Abnormal
+78.49173027,22.1817978,59.99999999,56.30993248,118.5303266,27.38321314,Abnormal
+75.64973136,19.33979889,64.14868477,56.30993248,95.9036288,69.55130292,Abnormal
+72.07627839,18.94617604,50.99999999,53.13010236,114.2130126,1.01004051,Abnormal
+58.59952852,-0.261499046,51.49999999,58.86102756,102.0428116,28.05969711,Abnormal
+72.56070163,17.38519079,51.99999999,55.17551084,119.1937238,32.10853735,Abnormal
+86.90079431,32.9281677,47.79434664,53.97262661,135.0753635,101.7190919,Abnormal
+84.97413208,33.02117462,60.85987263,51.95295747,125.6595336,74.33340864,Abnormal
+55.512212,20.09515673,43.99999999,35.41705528,122.648753,34.55294641,Abnormal
+72.2223343,23.07771056,90.99999999,49.14462374,137.7366546,56.80409277,Abnormal
+70.22145219,39.82272448,68.11840309,30.39872771,148.5255624,145.3781432,Abnormal
+86.75360946,36.04301632,69.22104479,50.71059314,139.414504,110.8607824,Abnormal
+58.78254775,7.667044186,53.33894082,51.11550357,98.50115697,51.58412476,Abnormal
+67.41253785,17.44279712,60.14464036,49.96974073,111.12397,33.15764573,Abnormal
+47.74467877,12.08935067,38.99999999,35.6553281,117.5120039,21.68240136,Abnormal
+77.10657122,30.46999418,69.48062839,46.63657704,112.1516,70.75908308,Abnormal
+74.00554124,21.12240192,57.37950226,52.88313932,120.2059626,74.55516588,Abnormal
+88.62390839,29.08945331,47.56426247,59.53445508,121.7647796,51.80589921,Abnormal
+81.10410039,24.79416792,77.88702048,56.30993247,151.8398566,65.21461611,Abnormal
+76.32600187,42.39620445,57.19999999,33.92979742,124.267007,50.12745689,Abnormal
+45.44374959,9.906071798,44.99999999,35.53767779,163.0710405,20.31531532,Abnormal
+59.78526526,17.87932332,59.20646143,41.90594194,119.3191109,22.12386874,Abnormal
+44.91414916,10.21899563,44.63091389,34.69515353,130.0756599,37.36453993,Abnormal
+56.60577127,16.80020017,41.99999999,39.80557109,127.2945222,24.0185747,Abnormal
+71.18681115,23.89620111,43.6966651,47.29061004,119.8649383,27.28398451,Abnormal
+81.65603206,28.74886935,58.23282055,52.9071627,114.7698556,30.60914842,Abnormal
+70.95272771,20.15993121,62.85910914,50.7927965,116.1779325,32.522331,Abnormal
+85.35231529,15.84491006,71.66865979,69.50740523,124.4197875,76.0206034,Abnormal
+58.10193455,14.83763914,79.64983825,43.26429541,113.5876551,50.23787808,Abnormal
+94.17482232,15.38076983,67.70572132,78.79405249,114.8901128,53.25522004,Abnormal
+57.52235608,33.64707522,50.90985841,23.87528085,140.9817119,148.7537109,Abnormal
+96.65731511,19.46158117,90.21149828,77.19573393,120.6730408,64.08099841,Abnormal
+74.72074622,19.75694203,82.73535954,54.96380419,109.3565941,33.30606685,Abnormal
+77.65511874,22.4329501,93.89277881,55.22216863,123.0557067,61.2111866,Abnormal
+58.52162283,13.92228609,41.46785522,44.59933674,115.514798,30.3879839,Abnormal
+84.5856071,30.36168482,65.47948563,54.22392228,108.0102185,25.11847846,Abnormal
+79.93857026,18.7740711,63.31183486,61.16449915,114.787107,38.53874133,Abnormal
+70.39930842,13.46998624,61.19999999,56.92932218,102.3375244,25.53842852,Abnormal
+49.78212054,6.46680486,52.99999999,43.31531568,110.8647831,25.33564729,Abnormal
+77.40933294,29.39654543,63.23230243,48.0127875,118.4507311,93.56373734,Abnormal
+65.00796426,27.60260762,50.94751899,37.40535663,116.5811088,7.015977884,Abnormal
+65.01377322,9.838262375,57.73583722,55.17551084,94.73852542,49.69695462,Abnormal
+78.42595126,33.42595126,76.27743927,45,138.5541111,77.15517241,Abnormal
+63.17298709,6.330910974,62.99999999,56.84207612,110.6440206,42.60807567,Abnormal
+68.61300092,15.0822353,63.01469619,53.53076561,123.4311742,39.49798659,Abnormal
+63.90063261,13.7062037,62.12433389,50.19442891,114.1292425,41.42282844,Abnormal
+84.99895554,29.61009772,83.35219438,55.38885782,126.9129899,71.32117542,Abnormal
+42.02138603,-6.554948347,67.89999999,48.57633437,111.5857819,27.33867086,Abnormal
+69.75666532,19.27929659,48.49999999,50.47736873,96.49136982,51.1696403,Abnormal
+80.98807441,36.84317181,86.96060151,44.1449026,141.0881494,85.87215224,Abnormal
+129.8340406,8.404475005,48.38405705,121.4295656,107.690466,418.5430821,Abnormal
+70.48410444,12.48948765,62.41714208,57.99461679,114.1900488,56.90244779,Abnormal
+86.04127982,38.75066978,47.87140494,47.29061004,122.0929536,61.98827709,Abnormal
+65.53600255,24.15748726,45.77516991,41.3785153,136.4403015,16.37808564,Abnormal
+60.7538935,15.7538935,43.19915768,45,113.0533309,31.69354839,Abnormal
+54.74177518,12.09507205,40.99999999,42.64670314,117.6432188,40.3823266,Abnormal
+83.87994081,23.07742686,87.14151223,60.80251395,124.6460723,80.55560527,Abnormal
+80.07491418,48.06953097,52.40343873,32.00538321,110.7099121,67.72731595,Abnormal
+65.66534698,10.54067533,56.48913545,55.12467166,109.1627768,53.93202006,Abnormal
+74.71722805,14.32167879,32.5,60.39554926,107.1822176,37.01708012,Abnormal
+48.06062649,5.687032126,57.05716117,42.37359436,95.44375749,32.83587702,Abnormal
+70.67689818,21.70440224,59.18116082,48.97249594,103.0083545,27.8101478,Abnormal
+80.43342782,16.998479,66.53601753,63.43494882,116.4389807,57.78125,Abnormal
+90.51396072,28.27250132,69.8139423,62.2414594,100.8921596,58.82364821,Abnormal
+77.23689752,16.73762214,49.77553438,60.49927538,110.6903772,39.7871542,Abnormal
+50.06678595,9.120340183,32.16846267,40.94644577,99.71245318,26.76669655,Abnormal
+69.78100617,13.77746531,57.99999999,56.00354085,118.9306656,17.91456046,Abnormal
+69.62628302,21.12275138,52.76659472,48.50353164,116.8030913,54.81686729,Abnormal
+81.75441933,20.12346562,70.56044038,61.63095371,119.4250857,55.50688907,Abnormal
+52.20469309,17.21267289,78.09496877,34.9920202,136.9725168,54.93913416,Abnormal
+77.12134424,30.3498745,77.48108264,46.77146974,110.6111484,82.09360704,Abnormal
+88.0244989,39.84466878,81.77447308,48.17983012,116.6015376,56.76608323,Abnormal
+83.39660609,34.31098931,78.42329287,49.08561678,110.4665164,49.67209559,Abnormal
+72.05403412,24.70073725,79.87401586,47.35329687,107.1723576,56.42615873,Abnormal
+85.09550254,21.06989651,91.73479193,64.02560604,109.062312,38.03283108,Abnormal
+69.56348614,15.4011391,74.43849743,54.16234705,105.0673556,29.70121083,Abnormal
+89.5049473,48.90365265,72.0034229,40.60129465,134.6342912,118.3533701,Abnormal
+85.29017283,18.27888963,100.7442198,67.0112832,110.6607005,58.88494802,Abnormal
+60.62621697,20.5959577,64.53526221,40.03025927,117.2255542,104.8592474,Abnormal
+60.04417717,14.30965614,58.03886519,45.73452103,105.1316639,30.40913315,Abnormal
+85.64378664,42.68919513,78.7506635,42.95459151,105.1440758,42.88742577,Abnormal
+85.58171024,30.45703858,78.23137949,55.12467166,114.8660487,68.37612182,Abnormal
+55.08076562,-3.759929872,55.99999999,58.84069549,109.9153669,31.77358318,Abnormal
+65.75567895,9.832874231,50.82289501,55.92280472,104.3949585,39.30721246,Abnormal
+79.24967118,23.94482471,40.79669829,55.30484647,98.62251165,36.7063954,Abnormal
+81.11260488,20.69044356,60.68700588,60.42216132,94.01878339,40.51098228,Abnormal
+48.0306238,3.969814743,58.34451924,44.06080905,125.3509625,35.00007784,Abnormal
+63.40448058,14.11532726,48.13680562,49.28915333,111.9160075,31.78449499,Abnormal
+57.28694488,15.1493501,63.99999999,42.13759477,116.7353868,30.34120327,Abnormal
+41.18776972,5.792973871,42.86739151,35.39479584,103.3488802,27.66027669,Abnormal
+66.80479632,14.55160171,72.08491177,52.25319461,82.45603817,41.6854736,Abnormal
+79.4769781,26.73226755,70.65098189,52.74471055,118.5886691,61.70059824,Abnormal
+44.21646446,1.507074501,46.11033909,42.70938996,108.6295666,42.81048066,Abnormal
+57.03509717,0.34572799,49.19800263,56.68936918,103.0486975,52.16514503,Abnormal
+64.27481758,12.50864276,68.70237672,51.76617482,95.25245421,39.40982612,Abnormal
+92.02630795,35.39267395,77.41696348,56.633634,115.72353,58.05754155,Abnormal
+67.26314926,7.194661096,51.69688681,60.06848816,97.8010854,42.13694325,Abnormal
+118.1446548,38.44950127,50.83851954,79.69515353,81.0245406,74.04376736,Abnormal
+115.9232606,37.51543601,76.79999999,78.40782459,104.6986033,81.19892712,Abnormal
+53.94165809,9.306594428,43.10049819,44.63506366,124.3978211,25.0821266,Abnormal
+83.7031774,20.26822858,77.1105979,63.43494882,125.4801739,69.279571,Abnormal
+56.99140382,6.87408897,57.00900516,50.11731485,109.978045,36.81011057,Abnormal
+72.34359434,16.42078962,59.86901238,55.92280472,70.08257486,12.07264427,Abnormal
+95.38259648,24.82263131,95.15763273,70.55996517,89.3075466,57.66084135,Abnormal
+44.25347645,1.101086714,38,43.15238973,98.27410705,23.9106354,Abnormal
+64.80954139,15.17407796,58.83999352,49.63546343,111.679961,21.40719845,Abnormal
+78.40125389,14.04225971,79.69426258,64.35899418,104.7312342,12.39285327,Abnormal
+56.66829282,13.45820343,43.76970978,43.21008939,93.69220863,21.10812135,Abnormal
+50.82502875,9.064729049,56.29999999,41.7602997,78.99945411,23.04152435,Abnormal
+61.41173702,25.38436364,39.09686927,36.02737339,103.4045971,21.84340688,Abnormal
+56.56382381,8.961261611,52.57784639,47.6025622,98.77711506,50.70187326,Abnormal
+67.02766447,13.28150221,66.15040334,53.74616226,100.7154129,33.98913551,Abnormal
+80.81777144,19.23898066,61.64245116,61.57879078,89.47183446,44.167602,Abnormal
+80.65431956,26.34437939,60.89811835,54.30994017,120.1034928,52.46755185,Abnormal
+68.72190982,49.4318636,68.0560124,19.29004622,125.0185168,54.69128928,Abnormal
+37.90391014,4.47909896,24.71027447,33.42481118,157.848799,33.60702661,Abnormal
+64.62400798,15.22530262,67.63216653,49.39870535,90.298468,31.32641123,Abnormal
+75.43774787,31.53945399,89.59999999,43.89829388,106.8295898,54.96578902,Abnormal
+71.00194076,37.51577195,84.53709256,33.48616882,125.1642324,67.77118983,Abnormal
+81.05661087,20.80149217,91.78449512,60.2551187,125.430176,38.18178176,Abnormal
+91.46874146,24.50817744,84.62027202,66.96056402,117.3078968,52.62304673,Abnormal
+81.08232025,21.25584028,78.76675639,59.82647997,90.07187999,49.159426,Abnormal
+60.419932,5.265665422,59.8142356,55.15426658,109.0330745,30.26578534,Abnormal
+85.68094951,38.65003527,82.68097744,47.03091424,120.8407069,61.95903428,Abnormal
+82.4065243,29.27642195,77.05456489,53.13010235,117.0422439,62.76534831,Abnormal
+43.7182623,9.811985315,51.99999999,33.90627699,88.43424213,40.88092253,Abnormal
+86.472905,40.30376567,61.14101155,46.16913933,97.4041888,55.75222146,Abnormal
+74.46908181,33.28315665,66.94210105,41.18592517,146.4660009,124.9844057,Abnormal
+70.25043628,10.34012252,76.37007032,59.91031376,119.2370072,32.66650243,Abnormal
+72.64385013,18.92911726,67.99999999,53.71473287,116.9634162,25.38424676,Abnormal
+71.24176388,5.268270454,85.99958417,65.97349342,110.703107,38.2598637,Abnormal
+63.7723908,12.76338484,65.36052425,51.00900596,89.82274067,55.99545386,Abnormal
+58.82837872,37.57787321,125.7423855,21.25050551,135.6294176,117.3146829,Abnormal
+74.85448008,13.90908417,62.69325884,60.9453959,115.2087008,33.17225512,Abnormal
+75.29847847,16.67148361,61.29620362,58.62699486,118.8833881,31.57582292,Abnormal
+63.36433898,20.02462134,67.49870507,43.33971763,130.9992576,37.55670552,Abnormal
+67.51305267,33.2755899,96.28306169,34.23746278,145.6010328,88.30148594,Abnormal
+76.31402766,41.93368293,93.2848628,34.38034472,132.2672855,101.2187828,Abnormal
+73.63596236,9.711317947,62.99999999,63.92464442,98.72792982,26.97578722,Abnormal
+56.53505139,14.37718927,44.99154663,42.15786212,101.7233343,25.77317356,Abnormal
+80.11157156,33.94243223,85.10160773,46.16913933,125.5936237,100.2921068,Abnormal
+95.48022873,46.55005318,58.99999999,48.93017555,96.68390337,77.28307195,Abnormal
+74.09473084,18.82372712,76.03215571,55.27100372,128.4057314,73.38821617,Abnormal
+87.67908663,20.36561331,93.82241589,67.31347333,120.9448288,76.73062904,Abnormal
+48.25991962,16.41746236,36.32913708,31.84245726,94.88233607,28.34379914,Abnormal
+38.50527283,16.96429691,35.11281407,21.54097592,127.6328747,7.986683227,Normal
+54.92085752,18.96842952,51.60145541,35.952428,125.8466462,2.001642472,Normal
+44.36249017,8.945434892,46.90209626,35.41705528,129.220682,4.994195288,Normal
+48.3189305,17.45212105,47.99999999,30.86680945,128.9803079,-0.910940567,Normal
+45.70178875,10.65985935,42.5778464,35.0419294,130.1783144,-3.38890999,Normal
+30.74193812,13.35496594,35.90352597,17.38697218,142.4101072,-2.005372903,Normal
+50.91310144,6.6769999,30.89652243,44.23610154,118.151531,-1.057985526,Normal
+38.12658854,6.557617408,50.44507473,31.56897113,132.114805,6.338199339,Normal
+51.62467183,15.96934373,35,35.6553281,129.385308,1.00922834,Normal
+64.31186727,26.32836901,50.95896417,37.98349826,106.1777511,3.118221289,Normal
+44.48927476,21.78643263,31.47415392,22.70284212,113.7784936,-0.284129366,Normal
+54.9509702,5.865353416,52.99999999,49.08561678,126.9703283,-0.631602951,Normal
+56.10377352,13.10630665,62.63701952,42.99746687,116.2285032,31.17276727,Normal
+69.3988184,18.89840693,75.96636144,50.50041147,103.5825398,-0.44366081,Normal
+89.83467631,22.63921678,90.56346144,67.19545953,100.5011917,3.040973261,Normal
+59.72614016,7.724872599,55.34348527,52.00126756,125.1742214,3.235159224,Normal
+63.95952166,16.06094486,63.12373633,47.8985768,142.3601245,6.298970934,Normal
+61.54059876,19.67695713,52.89222856,41.86364163,118.6862678,4.815031084,Normal
+38.04655072,8.30166942,26.23683004,29.7448813,123.8034132,3.885773488,Normal
+43.43645061,10.09574326,36.03222439,33.34070735,137.4396942,-3.114450861,Normal
+65.61180231,23.13791922,62.58217893,42.47388309,124.1280012,-4.083298414,Normal
+53.91105429,12.93931796,38.99999999,40.97173633,118.1930354,5.074353176,Normal
+43.11795103,13.81574355,40.34738779,29.30220748,128.5177217,0.970926407,Normal
+40.6832291,9.148437195,31.02159252,31.53479191,139.1184721,-2.511618596,Normal
+37.7319919,9.386298276,41.99999999,28.34569362,135.740926,13.68304672,Normal
+63.92947003,19.97109671,40.17704963,43.95837332,113.0659387,-11.05817866,Normal
+61.82162717,13.59710457,63.99999999,48.22452261,121.779803,1.296191194,Normal
+62.14080535,13.96097523,57.99999999,48.17983012,133.2818339,4.955105669,Normal
+69.00491277,13.29178975,55.5701429,55.71312302,126.6116215,10.83201105,Normal
+56.44702568,19.44449915,43.5778464,37.00252653,139.1896903,-1.859688529,Normal
+41.6469159,8.835549101,36.03197484,32.8113668,116.5551679,-6.054537956,Normal
+51.52935759,13.51784732,35,38.01151027,126.7185156,13.92833085,Normal
+39.08726449,5.536602477,26.93203835,33.55066201,131.5844199,-0.75946135,Normal
+34.64992241,7.514782784,42.99999999,27.13513962,123.9877408,-4.082937601,Normal
+63.02630005,27.33624023,51.60501665,35.69005983,114.5066078,7.439869802,Normal
+47.80555887,10.68869819,53.99999999,37.11686068,125.3911378,-0.402523218,Normal
+46.63786363,15.85371711,39.99999999,30.78414653,119.3776026,9.06458168,Normal
+49.82813487,16.73643493,28,33.09169994,121.4355585,1.91330704,Normal
+47.31964755,8.573680295,35.56025198,38.74596726,120.5769719,1.630663508,Normal
+50.75329025,20.23505957,37,30.51823068,122.343516,2.288487746,Normal
+36.15782981,-0.810514093,33.62731353,36.96834391,135.9369096,-2.092506504,Normal
+40.74699612,1.835524271,49.99999999,38.91147185,139.2471502,0.668556793,Normal
+42.91804052,-5.845994341,57.99999999,48.76403486,121.6068586,-3.362044654,Normal
+63.79242525,21.34532339,65.99999999,42.44710185,119.5503909,12.38260373,Normal
+72.95564397,19.57697146,61.00707117,53.37867251,111.2340468,0.813491154,Normal
+67.53818154,14.65504222,58.00142908,52.88313932,123.6322597,25.9702063,Normal
+54.75251965,9.752519649,47.99999999,45,123.0379985,8.235294118,Normal
+50.16007802,-2.970024337,41.99999999,53.13010235,131.8024914,-8.290203373,Normal
+40.34929637,10.19474845,37.96774659,30.15454792,128.0099272,0.458901373,Normal
+63.61919213,16.93450781,49.34926218,46.68468432,117.0897469,-0.357811974,Normal
+54.14240778,11.93511014,42.99999999,42.20729763,122.2090834,0.153549242,Normal
+74.97602148,14.92170492,53.73007172,60.05431656,105.6453997,1.594747729,Normal
+42.51727249,14.37567126,25.32356538,28.14160123,128.9056892,0.75702014,Normal
+33.78884314,3.675109986,25.5,30.11373315,128.3253556,-1.776111234,Normal
+54.5036853,6.819910138,46.99999999,47.68377516,111.7911722,-4.406769011,Normal
+48.17074627,9.594216702,39.71092029,38.57652956,135.6233101,5.360050572,Normal
+46.37408781,10.21590237,42.69999999,36.15818544,121.2476572,-0.54202201,Normal
+52.86221391,9.410371613,46.98805181,43.4518423,123.0912395,1.856659161,Normal
+57.1458515,16.48909145,42.84214764,40.65676005,113.8061775,5.0151857,Normal
+37.14014978,16.48123972,24,20.65891006,125.0143609,7.366425398,Normal
+51.31177106,8.875541276,56.99999999,42.43622979,126.4722584,-2.144043911,Normal
+42.51561014,16.54121618,41.99999999,25.97439396,120.631941,7.876730692,Normal
+39.35870531,7.011261806,37,32.3474435,117.8187599,1.904048199,Normal
+35.8775708,1.112373561,43.45725694,34.76519724,126.9239062,-1.632238263,Normal
+43.1919153,9.976663803,28.93814927,33.21525149,123.4674001,1.741017579,Normal
+67.28971201,16.7175142,50.99999999,50.5721978,137.5917777,4.960343813,Normal
+51.32546366,13.63122319,33.25857782,37.69424047,131.3061224,1.78886965,Normal
+65.7563482,13.20692644,43.99999999,52.54942177,129.3935728,-1.982120038,Normal
+40.41336566,-1.329412398,30.98276809,41.74277806,119.3356546,-6.173674823,Normal
+48.80190855,18.01776202,51.99999999,30.78414653,139.1504066,10.44286169,Normal
+50.08615264,13.43004422,34.45754051,36.65610842,119.1346221,3.089484465,Normal
+64.26150724,14.49786554,43.90250363,49.76364169,115.3882683,5.951454368,Normal
+53.68337998,13.44702168,41.58429713,40.23635831,113.9137026,2.737035292,Normal
+48.99595771,13.11382047,51.87351997,35.88213725,126.3981876,0.535471617,Normal
+59.16761171,14.56274875,43.19915768,44.60486296,121.0356423,2.830504124,Normal
+67.80469442,16.55066167,43.25680184,51.25403274,119.6856451,4.867539941,Normal
+61.73487533,17.11431203,46.89999999,44.6205633,120.9201997,3.087725997,Normal
+33.04168754,-0.324678459,19.0710746,33.366366,120.3886112,9.354364925,Normal
+74.56501543,15.72431994,58.61858244,58.84069549,105.417304,0.599247113,Normal
+44.43070103,14.17426387,32.2434952,30.25643716,131.7176127,-3.604255336,Normal
+36.42248549,13.87942449,20.24256187,22.543061,126.0768612,0.179717077,Normal
+51.07983294,14.20993529,35.95122893,36.86989765,115.8037111,6.905089963,Normal
+34.75673809,2.631739646,29.50438112,32.12499844,127.1398495,-0.460894198,Normal
+48.90290434,5.587588658,55.49999999,43.31531568,137.1082886,19.85475919,Normal
+46.23639915,10.0627701,37,36.17362905,128.0636203,-5.100053328,Normal
+46.42636614,6.620795049,48.09999999,39.80557109,130.3500956,2.449382401,Normal
+39.65690201,16.20883944,36.67485694,23.44806258,131.922009,-4.968979881,Normal
+45.57548229,18.75913544,33.77414297,26.81634684,116.7970069,3.131909921,Normal
+66.50717865,20.89767207,31.72747138,45.60950658,128.9029049,1.517203356,Normal
+82.90535054,29.89411893,58.25054221,53.01123161,110.7089577,6.079337831,Normal
+50.67667667,6.461501271,35,44.2151754,116.5879699,-0.214710615,Normal
+89.01487529,26.07598143,69.02125897,62.93889386,111.4810746,6.061508401,Normal
+54.60031622,21.48897426,29.36021618,33.11134196,118.3433212,-1.471067262,Normal
+34.38229939,2.062682882,32.39081996,32.31961651,128.3001991,-3.365515555,Normal
+45.07545026,12.30695118,44.58317718,32.76849908,147.8946372,-8.941709421,Normal
+47.90356517,13.61668819,36,34.28687698,117.4490622,-4.245395422,Normal
+53.93674778,20.72149628,29.22053381,33.21525149,114.365845,-0.421010392,Normal
+61.44659663,22.6949683,46.17034732,38.75162833,125.6707246,-2.707879517,Normal
+45.25279209,8.693157364,41.5831264,36.55963472,118.5458418,0.214750167,Normal
+33.84164075,5.073991409,36.64123294,28.76764934,123.9452436,-0.199249089,Normal

data_cache/pharmacy_readmission.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

data_cache/pulmonology_copd.csv ADDED Viewed

	@@ -0,0 +1,102 @@

+age,sex,smoking_pack_years,fev1_litres,fvc_litres,fev1_fvc_ratio,bmi,mrc_dyspnea_scale,sgrq_score,copd_gold_stage,exacerbation
+77,1,60.0,1.21,2.4,0.504,,,69.55,3,1
+79,0,50.0,1.09,1.64,0.665,,,44.24,2,0
+80,0,11.0,1.52,2.3,0.661,,,44.09,2,0
+56,1,60.0,0.47,1.14,0.412,,,62.04,4,1
+65,1,68.0,1.07,2.91,0.368,,,75.56,3,1
+67,0,26.0,1.09,1.99,0.548,,,73.82,2,0
+67,0,50.0,0.69,1.31,0.527,,,77.44,3,1
+83,1,90.0,0.68,2.23,0.305,,,45.41,3,1
+72,1,50.0,2.13,4.38,0.486,,,69.61,2,0
+75,0,6.0,1.06,2.06,0.515,,,55.56,3,1
+76,0,6.0,1.1,2.06,0.534,,,55.56,3,1
+59,0,28.0,0.68,2.02,0.337,,,55.23,4,1
+64,1,30.0,0.45,1.56,0.288,,,50.53,4,1
+74,0,75.0,1.79,2.62,0.683,,,45.0,1,0
+70,0,103.0,1.2,2.09,0.574,,,39.66,2,0
+71,0,105.0,0.72,2.09,0.344,,,39.66,2,0
+69,1,78.0,1.46,3.33,0.438,,,28.86,3,1
+55,0,109.0,1.54,2.15,0.716,,,76.5,2,0
+72,1,15.0,0.6,1.81,0.331,,,38.74,4,1
+72,1,15.0,0.89,1.81,0.492,,,38.74,4,1
+74,0,24.0,0.51,2.06,0.248,,,71.21,4,1
+75,1,40.0,0.79,1.81,0.436,,,35.79,4,1
+69,0,15.0,0.91,2.9,0.314,,,58.78,3,1
+73,1,75.0,1.46,2.37,0.616,,,34.71,3,1
+75,1,45.0,2.35,4.12,0.57,,,58.25,1,0
+80,1,67.0,1.77,2.77,0.639,,,67.66,2,0
+76,1,38.0,1.06,3.11,0.341,,,56.8,3,1
+73,1,31.0,1.88,2.71,0.694,,,66.51,2,0
+77,1,75.0,1.92,2.66,0.722,,,36.39,2,0
+88,1,1.0,1.3,2.0,0.65,,,47.2,3,1
+44,1,30.0,1.66,3.08,0.539,,,72.24,3,1
+82,1,45.0,1.18,2.57,0.459,,,37.04,3,1
+73,1,38.0,1.86,3.69,0.504,,,35.81,2,0
+64,0,40.0,1.81,3.24,0.559,,,27.27,2,0
+76,1,23.0,2.01,3.63,0.554,,,37.71,2,0
+83,1,11.0,1.11,2.04,0.544,,,25.34,3,1
+65,0,66.0,2.0,3.35,0.597,,,25.02,2,0
+74,1,64.0,2.37,4.7,0.504,,,43.57,2,0
+70,1,50.0,1.07,2.91,0.368,,,64.68,3,1
+71,0,20.0,1.32,2.27,0.581,,,38.43,1,0
+78,1,37.5,1.6,2.68,0.597,,,27.52,2,0
+75,0,10.0,0.92,2.29,0.402,,,54.49,2,0
+67,1,36.0,1.79,3.19,0.561,,,50.03,2,0
+78,1,55.0,1.6,3.87,0.413,,,38.21,3,0
+73,1,59.0,2.43,5.37,0.453,,,19.94,2,0
+53,1,35.0,2.06,3.77,0.546,,,72.56,1,0
+64,1,90.0,1.26,2.1,0.6,,,42.01,3,1
+81,1,54.0,1.48,2.29,0.646,,,16.29,2,0
+82,1,54.0,1.34,2.29,0.585,,,16.29,2,0
+71,0,3.0,1.67,2.58,0.647,,,29.29,1,0
+65,0,34.0,1.45,2.85,0.509,,,41.1,2,0
+71,1,20.0,2.97,3.5,0.849,,,38.57,1,0
+78,1,55.0,1.78,4.0,0.445,,,28.51,2,0
+73,0,34.0,0.72,1.47,0.49,,,32.47,3,1
+72,0,34.0,0.73,1.47,0.497,,,32.47,3,1
+63,1,44.0,1.28,3.56,0.36,,,62.09,3,1
+60,1,14.0,2.12,3.62,0.586,,,51.77,2,0
+75,1,45.0,2.62,4.9,0.535,,,18.72,1,0
+73,0,49.0,1.42,2.14,0.664,,,46.77,2,0
+66,1,20.0,3.02,5.23,0.577,,,17.97,1,0
+80,1,3.0,1.97,2.33,0.845,,,36.74,1,0
+81,1,3.0,1.83,2.33,0.785,,,36.74,1,0
+73,1,100.0,1.26,2.28,0.553,,,15.05,3,1
+71,0,47.0,1.28,2.29,0.559,,,28.41,2,0
+69,0,47.0,0.65,2.29,0.284,,,28.41,2,0
+74,1,55.0,3.06,4.46,0.686,,,24.48,1,0
+62,0,80.0,1.93,3.39,0.569,,,10.01,1,0
+68,1,20.0,1.12,3.22,0.348,,,61.97,3,1
+70,0,36.0,2.11,3.51,0.601,,,10.92,1,0
+67,1,20.0,1.11,3.22,0.345,,,61.97,3,1
+70,0,36.0,1.89,3.51,0.538,,,10.92,1,0
+49,0,39.0,0.74,2.31,0.32,,,28.33,4,1
+75,1,5.0,2.43,4.33,0.561,,,47.88,2,0
+73,1,60.0,1.92,3.76,0.511,,,56.96,2,0
+78,1,30.0,1.14,3.04,0.375,,,34.46,3,1
+67,1,45.0,2.79,4.11,0.679,,,29.98,1,0
+75,1,30.0,1.64,3.1,0.529,,,32.38,2,0
+76,1,30.0,1.74,3.1,0.561,,,32.38,2,0
+63,0,50.0,1.69,2.31,0.732,,,47.36,2,0
+65,0,6.0,3.18,4.54,0.7,,,56.2,1,0
+65,0,20.0,2.13,3.2,0.666,,,2.0,1,0
+62,1,8.0,2.52,3.89,0.648,,,32.69,2,0
+63,0,20.0,1.01,2.48,0.407,,,17.95,2,0
+64,0,35.0,0.82,1.52,0.539,,,67.56,3,1
+72,1,75.0,0.94,2.47,0.381,,,45.3,3,1
+61,0,9.0,1.37,2.42,0.566,,,39.51,2,0
+60,1,10.0,1.96,4.01,0.489,,,24.52,2,0
+78,0,51.0,1.23,1.89,0.651,,,33.69,2,0
+67,1,20.0,2.74,4.54,0.604,,,8.12,2,0
+68,1,30.0,1.75,5.15,0.34,,,33.2,1,0
+72,1,30.0,2.8,4.39,0.638,,,42.88,1,0
+69,1,27.0,1.89,2.61,0.724,,,8.25,2,0
+68,1,27.0,1.89,2.61,0.724,,,8.25,2,0
+52,1,40.0,2.93,3.63,0.807,,,25.62,1,0
+55,1,40.0,2.75,4.54,0.606,,,58.41,1,0
+72,1,30.0,1.61,3.14,0.513,,,34.64,2,0
+68,1,25.0,2.7,3.87,0.698,,,35.84,1,0
+75,1,40.0,2.9,4.72,0.614,,,15.05,1,0
+68,0,30.0,1.65,2.8,0.589,,,19.7,2,0
+54,1,30.0,1.72,4.07,0.423,,,20.55,3,1
+78,1,55.0,1.15,2.01,0.572,,,30.21,2,0

data_cache/radiology_pneumonia.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

data_cache/thyroid.csv ADDED Viewed

	@@ -0,0 +1,215 @@

+1,107,10.1,2.2,0.9,2.7
+1,113,9.9,3.1,2.0,5.9
+1,127,12.9,2.4,1.4,0.6
+1,109,5.3,1.6,1.4,1.5
+1,105,7.3,1.5,1.5,-0.1
+1,105,6.1,2.1,1.4,7.0
+1,110,10.4,1.6,1.6,2.7
+1,114,9.9,2.4,1.5,5.7
+1,106,9.4,2.2,1.5,0.0
+1,107,13.0,1.1,0.9,3.1
+1,106,4.2,1.2,1.6,1.4
+1,110,11.3,2.3,0.9,3.3
+1,116,9.2,2.7,1.0,4.2
+1,112,8.1,1.9,3.7,2.0
+1,122,9.7,1.6,0.9,2.2
+1,109,8.4,2.1,1.1,3.6
+1,111,8.4,1.5,0.8,1.2
+1,114,6.7,1.5,1.0,3.5
+1,119,10.6,2.1,1.3,1.1
+1,115,7.1,1.3,1.3,2.0
+1,101,7.8,1.2,1.0,1.7
+1,103,10.1,1.3,0.7,0.1
+1,109,10.4,1.9,0.4,-0.1
+1,102,7.6,1.8,2.0,2.5
+1,121,10.1,1.7,1.3,0.1
+1,100,6.1,2.4,1.8,3.8
+1,106,9.6,2.4,1.0,1.3
+1,116,10.1,2.2,1.6,0.8
+1,105,11.1,2.0,1.0,1.0
+1,110,10.4,1.8,1.0,2.3
+1,120,8.4,1.1,1.4,1.4
+1,116,11.1,2.0,1.2,2.3
+1,110,7.8,1.9,2.1,6.4
+1,90,8.1,1.6,1.4,1.1
+1,117,12.2,1.9,1.2,3.9
+1,117,11.0,1.4,1.5,2.1
+1,113,9.0,2.0,1.8,1.6
+1,106,9.4,1.5,0.8,0.5
+1,130,9.5,1.7,0.4,3.2
+1,100,10.5,2.4,0.9,1.9
+1,121,10.1,2.4,0.8,3.0
+1,110,9.2,1.6,1.5,0.3
+1,129,11.9,2.7,1.2,3.5
+1,121,13.5,1.5,1.6,0.5
+1,123,8.1,2.3,1.0,5.1
+1,107,8.4,1.8,1.5,0.8
+1,109,10.0,1.3,1.8,4.3
+1,120,6.8,1.9,1.3,1.9
+1,100,9.5,2.5,1.3,-0.2
+1,118,8.1,1.9,1.5,13.7
+1,100,11.3,2.5,0.7,-0.3
+1,103,12.2,1.2,1.3,2.7
+1,115,8.1,1.7,0.6,2.2
+1,119,8.0,2.0,0.6,3.2
+1,106,9.4,1.7,0.9,3.1
+1,114,10.9,2.1,0.3,1.4
+1,93,8.9,1.5,0.8,2.7
+1,120,10.4,2.1,1.1,1.8
+1,106,11.3,1.8,0.9,1.0
+1,110,8.7,1.9,1.6,4.4
+1,103,8.1,1.4,0.5,3.8
+1,101,7.1,2.2,0.8,2.2
+1,115,10.4,1.8,1.6,2.0
+1,116,10.0,1.7,1.5,4.3
+1,117,9.2,1.9,1.5,6.8
+1,106,6.7,1.5,1.2,3.9
+1,118,10.5,2.1,0.7,3.5
+1,97,7.8,1.3,1.2,0.9
+1,113,11.1,1.7,0.8,2.3
+1,104,6.3,2.0,1.2,4.0
+1,96,9.4,1.5,1.0,3.1
+1,120,12.4,2.4,0.8,1.9
+1,133,9.7,2.9,0.8,1.9
+1,126,9.4,2.3,1.0,4.0
+1,113,8.5,1.8,0.8,0.5
+1,109,9.7,1.4,1.1,2.1
+1,119,12.9,1.5,1.3,3.6
+1,101,7.1,1.6,1.5,1.6
+1,108,10.4,2.1,1.3,2.4
+1,117,6.7,2.2,1.8,6.7
+1,115,15.3,2.3,2.0,2.0
+1,91,8.0,1.7,2.1,4.6
+1,103,8.5,1.8,1.9,1.1
+1,98,9.1,1.4,1.9,-0.3
+1,111,7.8,2.0,1.8,4.1
+1,107,13.0,1.5,2.8,1.7
+1,119,11.4,2.3,2.2,1.6
+1,122,11.8,2.7,1.7,2.3
+1,105,8.1,2.0,1.9,-0.5
+1,109,7.6,1.3,2.2,1.9
+1,105,9.5,1.8,1.6,3.6
+1,112,5.9,1.7,2.0,1.3
+1,112,9.5,2.0,1.2,0.7
+1,98,8.6,1.6,1.6,6.0
+1,109,12.4,2.3,1.7,0.8
+1,114,9.1,2.6,1.5,1.5
+1,114,11.1,2.4,2.0,-0.3
+1,110,8.4,1.4,1.0,1.9
+1,120,7.1,1.2,1.5,4.3
+1,108,10.9,1.2,1.9,1.0
+1,108,8.7,1.2,2.2,2.5
+1,116,11.9,1.8,1.9,1.5
+1,113,11.5,1.5,1.9,2.9
+1,105,7.0,1.5,2.7,4.3
+1,114,8.4,1.6,1.6,-0.2
+1,114,8.1,1.6,1.6,0.5
+1,105,11.1,1.1,0.8,1.2
+1,107,13.8,1.5,1.0,1.9
+1,116,11.5,1.8,1.4,5.4
+1,102,9.5,1.4,1.1,1.6
+1,116,16.1,0.9,1.3,1.5
+1,118,10.6,1.8,1.4,3.0
+1,109,8.9,1.7,1.0,0.9
+1,110,7.0,1.0,1.6,4.3
+1,104,9.6,1.1,1.3,0.8
+1,105,8.7,1.5,1.1,1.5
+1,102,8.5,1.2,1.3,1.4
+1,112,6.8,1.7,1.4,3.3
+1,111,8.5,1.6,1.1,3.9
+1,111,8.5,1.6,1.2,7.7
+1,103,7.3,1.0,0.7,0.5
+1,98,10.4,1.6,2.3,-0.7
+1,117,7.8,2.0,1.0,3.9
+1,111,9.1,1.7,1.2,4.1
+1,101,6.3,1.5,0.9,2.9
+1,106,8.9,0.7,1.0,2.3
+1,102,8.4,1.5,0.8,2.4
+1,115,10.6,0.8,2.1,4.6
+1,130,10.0,1.6,0.9,4.6
+1,101,6.7,1.3,1.0,5.7
+1,110,6.3,1.0,0.8,1.0
+1,103,9.5,2.9,1.4,-0.1
+1,113,7.8,2.0,1.1,3.0
+1,112,10.6,1.6,0.9,-0.1
+1,118,6.5,1.2,1.2,1.7
+1,109,9.2,1.8,1.1,4.4
+1,116,7.8,1.4,1.1,3.7
+1,127,7.7,1.8,1.9,6.4
+1,108,6.5,1.0,0.9,1.5
+1,108,7.1,1.3,1.6,2.2
+1,105,5.7,1.0,0.9,0.9
+1,98,5.7,0.4,1.3,2.8
+1,112,6.5,1.2,1.2,2.0
+1,118,12.2,1.5,1.0,2.3
+1,94,7.5,1.2,1.3,4.4
+1,126,10.4,1.7,1.2,3.5
+1,114,7.5,1.1,1.6,4.4
+1,111,11.9,2.3,0.9,3.8
+1,104,6.1,1.8,0.5,0.8
+1,102,6.6,1.2,1.4,1.3
+2,139,16.4,3.8,1.1,-0.2
+2,111,16.0,2.1,0.9,-0.1
+2,113,17.2,1.8,1.0,0.0
+2,65,25.3,5.8,1.3,0.2
+2,88,24.1,5.5,0.8,0.1
+2,65,18.2,10.0,1.3,0.1
+2,134,16.4,4.8,0.6,0.1
+2,110,20.3,3.7,0.6,0.2
+2,67,23.3,7.4,1.8,-0.6
+2,95,11.1,2.7,1.6,-0.3
+2,89,14.3,4.1,0.5,0.2
+2,89,23.8,5.4,0.5,0.1
+2,88,12.9,2.7,0.1,0.2
+2,105,17.4,1.6,0.3,0.4
+2,89,20.1,7.3,1.1,-0.2
+2,99,13.0,3.6,0.7,-0.1
+2,80,23.0,10.0,0.9,-0.1
+2,89,21.8,7.1,0.7,-0.1
+2,99,13.0,3.1,0.5,-0.1
+2,68,14.7,7.8,0.6,-0.2
+2,97,14.2,3.6,1.5,0.3
+2,84,21.5,2.7,1.1,-0.6
+2,84,18.5,4.4,1.1,-0.3
+2,98,16.7,4.3,1.7,0.2
+2,94,20.5,1.8,1.4,-0.5
+2,99,17.5,1.9,1.4,0.3
+2,76,25.3,4.5,1.2,-0.1
+2,110,15.2,1.9,0.7,-0.2
+2,144,22.3,3.3,1.3,0.6
+2,105,12.0,3.3,1.1,0.0
+2,88,16.5,4.9,0.8,0.1
+2,97,15.1,1.8,1.2,-0.2
+2,106,13.4,3.0,1.1,0.0
+2,79,19.0,5.5,0.9,0.3
+2,92,11.1,2.0,0.7,-0.2
+3,125,2.3,0.9,16.5,9.5
+3,120,6.8,2.1,10.4,38.6
+3,108,3.5,0.6,1.7,1.4
+3,120,3.0,2.5,1.2,4.5
+3,119,3.8,1.1,23.0,5.7
+3,141,5.6,1.8,9.2,14.4
+3,129,1.5,0.6,12.5,2.9
+3,118,3.6,1.5,11.6,48.8
+3,120,1.9,0.7,18.5,24.0
+3,119,0.8,0.7,56.4,21.6
+3,123,5.6,1.1,13.7,56.3
+3,115,6.3,1.2,4.7,14.4
+3,126,0.5,0.2,12.2,8.8
+3,121,4.7,1.8,11.2,53.0
+3,131,2.7,0.8,9.9,4.7
+3,134,2.0,0.5,12.2,2.2
+3,141,2.5,1.3,8.5,7.5
+3,113,5.1,0.7,5.8,19.6
+3,136,1.4,0.3,32.6,8.4
+3,120,3.4,1.8,7.5,21.5
+3,125,3.7,1.1,8.5,25.9
+3,123,1.9,0.3,22.8,22.2
+3,112,2.6,0.7,41.0,19.0
+3,134,1.9,0.6,18.4,8.2
+3,119,5.1,1.1,7.0,40.8
+3,118,6.5,1.3,1.7,11.5
+3,139,4.2,0.7,4.3,6.3
+3,103,5.1,1.4,1.2,5.0
+3,97,4.7,1.1,2.1,12.6
+3,102,5.3,1.4,1.3,6.7

datasets/.gitkeep ADDED Viewed

File without changes

main_hf.py ADDED Viewed

	@@ -0,0 +1,72 @@

+"""HuggingFace Spaces entry — serves API + static frontend."""
+from __future__ import annotations
+import os
+from pathlib import Path
+from fastapi import FastAPI, Request
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import FileResponse
+from fastapi.staticfiles import StaticFiles
+from app.services.certificate_service import CertificateService
+from app.services.data_service import DataService
+from app.services.ethics_service import EthicsService
+from app.services.explain_service import ExplainService
+from app.services.insight_service import InsightService
+from app.services.ml_service import MLService
+from arena.service import ArenaService
+app = FastAPI(title="HealthWithSevgi API", version="1.3.1")
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=False,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+app.state.data_service = DataService()
+app.state.ml_service = MLService()
+app.state.explain_service = ExplainService()
+app.state.ethics_service = EthicsService()
+app.state.insight_service = InsightService()
+app.state.certificate_service = CertificateService()
+app.state.arena_service = ArenaService(app.state.ml_service)
+from app.routers.data_router import router as data_router
+from app.routers.explain_router import router as explain_router
+from app.routers.ml_router import router as ml_router
+from arena.router import router as arena_router
+app.include_router(data_router)
+app.include_router(ml_router)
+app.include_router(explain_router)
+app.include_router(arena_router)
+STATIC_DIR = Path(__file__).parent / "static"
+# Health check — verify critical native libraries load correctly
+@app.get("/health")
+async def health_check() -> dict:
+    errors: list[str] = []
+    for lib in ("sklearn", "xgboost", "lightgbm", "shap", "scipy"):
+        try:
+            __import__(lib)
+        except Exception as exc:
+            errors.append(f"{lib}: {exc}")
+    if errors:
+        return {"status": "degraded", "errors": errors}
+    return {"status": "healthy"}
+# Serve frontend static files
+if STATIC_DIR.is_dir():
+    app.mount("/assets", StaticFiles(directory=STATIC_DIR / "assets"), name="assets")
+    @app.get("/{full_path:path}")
+    async def serve_spa(request: Request, full_path: str):
+        file = STATIC_DIR / full_path
+        if file.is_file():
+            return FileResponse(file)
+        return FileResponse(STATIC_DIR / "index.html")

requirements.txt ADDED Viewed

	@@ -0,0 +1,19 @@

+fastapi>=0.110.0
+uvicorn[standard]>=0.29.0
+scikit-learn>=1.4.0
+pandas>=2.2.0
+numpy>=1.26.0
+imbalanced-learn>=0.12.0    # SMOTE
+shap>=0.45.0                # Explainability
+reportlab>=4.1.0            # PDF certificate generation
+python-multipart>=0.0.9     # File upload support
+pydantic>=2.6.0
+xgboost>=2.0.0              # Gradient boosting (high performance)
+lightgbm>=4.3.0             # Fast gradient boosting
+requests>=2.31.0            # Real dataset downloads with caching
+httpx>=0.28.0               # FastAPI TestClient dependency (used by backend tests)
+python-dotenv>=1.0.0        # Load .env file for API keys
+scipy>=1.12.0               # ARFF file parsing (vertebral column dataset)
+openpyxl>=3.1.0             # Excel .xlsx reading (fetal health CTG dataset)
+xlrd>=2.0.0                 # Excel .xls reading (legacy UCI datasets)
+ucimlrepo>=0.0.3            # UCI ML Repository API (CKD, CTG, and other datasets)

static/.gitkeep ADDED Viewed

File without changes

static/apple-touch-icon.png ADDED Viewed

static/assets/ArenaPage-C8SsT3v3.js ADDED Viewed

The diff for this file is too large to render. See raw diff

static/assets/ArenaPage-C8SsT3v3.js.map ADDED Viewed

The diff for this file is too large to render. See raw diff