Spaces:

techatcreated
/

CI-Outcome-Predictor

Sleeping

App Files Files Community

techatcreated commited on Dec 17, 2025

Commit

c929715

verified ·

1 Parent(s): 2bf0c7c

Upload app.py

Browse files

Files changed (1) hide show

app.py +638 -0

app.py ADDED Viewed

	@@ -0,0 +1,638 @@

+# -*- coding: utf-8 -*-
+"""Navya_Mrig.ipynb
+Automatically generated by Colab.
+Original file is located at
+    https://colab.research.google.com/drive/10xqPbYcTUoYEytn7C0HJoSNObUNmuCxZ
+"""
+import re
+import pickle
+import joblib
+import numpy as np
+import pandas as pd
+import gradio as gr
+# =========================
+# PATHS
+# =========================
+VAL_CSV_PATH  = "/content/validation_data.csv"
+MAIN_CSV_PATH = "/content/Cochlear_Implant_Dataset.csv"
+CLF_PKL_PATH  = "/content/ci_success_classifier.pkl"
+REG_PKL_PATH  = "/content/ci_speech_score_regressor.pkl"
+# =========================
+# Load data + models
+# =========================
+val_df  = pd.read_csv(VAL_CSV_PATH)
+main_df = pd.read_csv(MAIN_CSV_PATH)
+def load_model(path: str):
+    try:
+        return joblib.load(path)
+    except Exception:
+        with open(path, "rb") as f:
+            return pickle.load(f)
+clf_model = load_model(CLF_PKL_PATH)
+reg_model = load_model(REG_PKL_PATH)
+def get_model_feature_names(m):
+    if hasattr(m, "feature_names_in_"):
+        return list(getattr(m, "feature_names_in_"))
+    if hasattr(m, "named_steps"):
+        for step in m.named_steps.values():
+            if hasattr(step, "feature_names_in_"):
+                return list(step.feature_names_in_)
+    return None
+clf_expected = get_model_feature_names(clf_model) or []
+reg_expected = get_model_feature_names(reg_model) or []
+# Union of expected columns (preserve order)
+input_cols = []
+for colset in [clf_expected, reg_expected]:
+    for c in colset:
+        if c not in input_cols:
+            input_cols.append(c)
+if not input_cols:
+    input_cols = list(val_df.columns)
+# =========================
+# Build Gene dropdown choices from MAIN dataset
+# =========================
+def find_gene_column(df: pd.DataFrame):
+    if "Gene" in df.columns:
+        return "Gene"
+    for c in df.columns:
+        if "gene" in c.lower():
+            return c
+    return None
+def normalize_str_series(s: pd.Series) -> pd.Series:
+    return (
+        s.astype(str)
+         .str.strip()
+         .replace({"null": np.nan, "NULL": np.nan, "None": np.nan, "none": np.nan,
+                   "": np.nan, "nan": np.nan, "NaN": np.nan})
+    )
+gene_col_main = find_gene_column(main_df)
+gene_choices = []
+if gene_col_main is not None:
+    gene_choices = sorted(set(normalize_str_series(main_df[gene_col_main]).dropna().tolist()))
+if not gene_choices:
+    gene_col_val = find_gene_column(val_df)
+    if gene_col_val is not None:
+        gene_choices = sorted(set(normalize_str_series(val_df[gene_col_val]).dropna().tolist()))
+# =========================
+# Helpers
+# =========================
+def parse_age_to_years(age_raw: str, mode: str):
+    """
+    mode:
+      - "Years.Months (1.11 = 1y 11m)"  -> 1 + 11/12
+      - "Decimal (1.11 = 1.11 years)"  -> 1.11
+    Accepts "1.6YRS", "2yrs", etc.
+    """
+    if age_raw is None:
+        return np.nan
+    s = str(age_raw).strip()
+    if s == "" or s.lower() in {"nan", "none", "null"}:
+        return np.nan
+    cleaned = re.sub(r"[^0-9\.]", "", s)
+    if mode.startswith("Decimal"):
+        try:
+            return float(cleaned)
+        except:
+            return np.nan
+    # Years.Months mode
+    if cleaned.count(".") == 1:
+        a, b = cleaned.split(".")
+        if a.isdigit() and b.isdigit() and len(b) == 2:
+            years = int(a)
+            months = int(b)
+            if 0 <= months <= 11:
+                return years + months / 12.0
+        # fallback to decimal
+        try:
+            return float(cleaned)
+        except:
+            return np.nan
+    try:
+        return float(cleaned)
+    except:
+        return np.nan
+def safe_pct(x):
+    try:
+        return int(round(float(x) * 100))
+    except:
+        return None
+def get_gene_feature_name(cols):
+    # Prefer exact "Gene"
+    for c in cols:
+        if c.lower() == "gene":
+            return c
+    # Fallback: any column containing 'gene'
+    for c in cols:
+        if "gene" in c.lower():
+            return c
+    return None
+def get_age_feature_names(cols):
+    return [c for c in cols if "age" in c.lower()]
+GENE_FEAT = get_gene_feature_name(input_cols)
+AGE_FEATS = get_age_feature_names(input_cols)
+def align_to_expected(df: pd.DataFrame, expected_cols):
+    if not expected_cols:
+        return df
+    out = df.copy()
+    for c in expected_cols:
+        if c not in out.columns:
+            out[c] = np.nan
+    return out[expected_cols]
+def render_single_result_html(gene, age_entered, age_used_years, parse_mode, label, prob, speech):
+    if label == 1:
+        status = "Likely Success"
+        badge = "ok"
+        icon = "✓"
+    elif label == 0:
+        status = "Lower Likelihood"
+        badge = "warn"
+        icon = "!"
+    else:
+        status = "Unavailable"
+        badge = "neutral"
+        icon = "?"
+    prob_pct = safe_pct(prob) if prob is not None else None
+    prob_text = f"{prob_pct}%" if prob_pct is not None else "—"
+    bar_width = f"{prob_pct}%" if prob_pct is not None else "0%"
+    try:
+        speech_disp = f"{float(speech):.3f}"
+    except:
+        speech_disp = "—"
+    age_used_disp = f"{float(age_used_years):.3f} years" if np.isfinite(age_used_years) else "—"
+    gene_disp = str(gene) if gene is not None else "—"
+    return f"""
+    <div class="result-card">
+      <div class="result-head">
+        <div class="result-title">Prediction</div>
+        <div class="pill {badge}">
+          <span class="dot"></span>
+          <span class="pill-ic">{icon}</span>
+          <span>{status}</span>
+        </div>
+      </div>
+      <div class="grid2">
+        <div class="box">
+          <div class="k">Gene</div>
+          <div class="v mono">{gene_disp}</div>
+        </div>
+        <div class="box">
+          <div class="k">Age entered</div>
+          <div class="v mono">{age_entered}</div>
+        </div>
+      </div>
+      <div class="box" style="margin-top:12px;">
+        <div class="k">Age used by model</div>
+        <div class="v mono">{age_used_disp}</div>
+        <div class="sub">Parsing mode: <span class="mono">{parse_mode}</span></div>
+      </div>
+      <div class="box" style="margin-top:12px;">
+        <div class="k">Success probability (Class 1)</div>
+        <div class="prob-row">
+          <div class="prob-bar"><div class="prob-fill" style="width:{bar_width};"></div></div>
+          <div class="prob-txt mono">{prob_text}</div>
+        </div>
+      </div>
+      <div class="grid2" style="margin-top:12px;">
+        <div class="box">
+          <div class="k">Predicted label</div>
+          <div class="v mono">{label}</div>
+        </div>
+        <div class="box">
+          <div class="k">Predicted speech score</div>
+          <div class="v mono">{speech_disp}</div>
+        </div>
+      </div>
+      <div class="fine">
+        Informational tool only. Not medical advice.
+      </div>
+    </div>
+    """
+def predict_single(gene, age_text, parse_mode):
+    if gene is None or str(gene).strip() == "":
+        raise gr.Error("Please select a Gene.")
+    age_used = parse_age_to_years(age_text, parse_mode)
+    if not (isinstance(age_used, (float, np.floating)) and np.isfinite(age_used)):
+        raise gr.Error("Please enter a valid Age (e.g., 1.6YRS, 1.11, 2.3).")
+    # Build model input row using known feature names; fill others with NaN
+    row = {}
+    for c in input_cols:
+        if GENE_FEAT and c == GENE_FEAT:
+            row[c] = gene
+        elif c in AGE_FEATS:
+            row[c] = age_used
+        else:
+            row[c] = np.nan
+    X = pd.DataFrame([row])
+    Xc = align_to_expected(X, clf_expected)
+    Xr = align_to_expected(X, reg_expected)
+    label = int(clf_model.predict(Xc)[0])
+    prob = None
+    if hasattr(clf_model, "predict_proba"):
+        p = clf_model.predict_proba(Xc)[0]
+        if len(p) >= 2:
+            prob = float(p[1])
+    speech = reg_model.predict(Xr)[0]
+    return render_single_result_html(gene, age_text, age_used, parse_mode, label, prob, speech)
+def _file_to_path(file_obj):
+    """Gradio File can be a string path, or have .name, or be dict-like depending on version."""
+    if file_obj is None:
+        return None
+    if isinstance(file_obj, str):
+        return file_obj
+    if hasattr(file_obj, "name"):
+        return file_obj.name
+    if isinstance(file_obj, dict) and "name" in file_obj:
+        return file_obj["name"]
+    return None
+def predict_batch(csv_file, parse_mode):
+    path = _file_to_path(csv_file)
+    if not path:
+        raise gr.Error("Please upload a CSV file.")
+    df = pd.read_csv(path)
+    if df.empty:
+        raise gr.Error("Uploaded CSV is empty.")
+    df_cols_lower = {c.lower(): c for c in df.columns}
+    # Require at least Gene + one Age column (case-insensitive)
+    # Gene
+    gene_col = None
+    if GENE_FEAT and GENE_FEAT.lower() in df_cols_lower:
+        gene_col = df_cols_lower[GENE_FEAT.lower()]
+    else:
+        # fallback: any column containing 'gene'
+        for c in df.columns:
+            if "gene" in c.lower():
+                gene_col = c
+                break
+    if gene_col is None:
+        raise gr.Error("CSV must include a Gene column (e.g., 'Gene').")
+    # Age (at least one)
+    age_source_col = None
+    for c in df.columns:
+        if "age" in c.lower():
+            age_source_col = c
+            break
+    if age_source_col is None:
+        raise gr.Error("CSV must include an Age column (e.g., 'Age').")
+    # Build X in the exact model input_cols order; fill missing optional cols with NaN
+    X = pd.DataFrame(index=df.index)
+    parsed_age = df[age_source_col].apply(lambda v: parse_age_to_years(v, parse_mode))
+    if parsed_age.isna().any():
+        bad_n = int(parsed_age.isna().sum())
+        raise gr.Error(f"{bad_n} rows have invalid Age values for the selected parsing mode.")
+    for col in input_cols:
+        if GENE_FEAT and col == GENE_FEAT:
+            X[col] = df[gene_col]
+        elif col in AGE_FEATS:
+            X[col] = parsed_age
+        else:
+            # try case-insensitive exact match; else NaN
+            src = df_cols_lower.get(col.lower())
+            X[col] = df[src] if src is not None else np.nan
+    Xc = align_to_expected(X, clf_expected)
+    Xr = align_to_expected(X, reg_expected)
+    out = df.copy()
+    out["success_label_pred"] = clf_model.predict(Xc)
+    if hasattr(clf_model, "predict_proba"):
+        proba = clf_model.predict_proba(Xc)
+        if proba.shape[1] == 2:
+            out["success_prob_class1"] = proba[:, 1]
+    out["speech_score_pred"] = reg_model.predict(Xr)
+    out_path = "predictions_output.csv"
+    out.to_csv(out_path, index=False)
+    n = len(out)
+    succ = int((out["success_label_pred"] == 1).sum())
+    succ_pct = int(round((succ / n) * 100)) if n else 0
+    avg_prob_txt = "—"
+    if "success_prob_class1" in out.columns:
+        try:
+            avg_prob_txt = f"{int(round(float(out['success_prob_class1'].mean())*100))}%"
+        except:
+            pass
+    avg_speech_txt = "—"
+    try:
+        avg_speech_txt = f"{float(pd.to_numeric(out['speech_score_pred'], errors='coerce').mean()):.3f}"
+    except:
+        pass
+    summary = f"""
+    <div class="result-card">
+      <div class="result-head">
+        <div class="result-title">Batch Summary</div>
+        <div class="pill neutral"><span class="dot"></span><span class="pill-ic">↯</span><span>{n} rows</span></div>
+      </div>
+      <div class="grid3">
+        <div class="box"><div class="k">Predicted success</div><div class="v mono">{succ}</div></div>
+        <div class="box"><div class="k">Predicted success (%)</div><div class="v mono">{succ_pct}%</div></div>
+        <div class="box"><div class="k">Avg prob (Class 1)</div><div class="v mono">{avg_prob_txt}</div></div>
+      </div>
+      <div class="box" style="margin-top:12px;">
+        <div class="k">Avg speech score</div><div class="v mono">{avg_speech_txt}</div>
+        <div class="sub">Parsing mode: <span class="mono">{parse_mode}</span></div>
+      </div>
+      <div class="fine">Download the output CSV below.</div>
+    </div>
+    """
+    return summary, out.head(20), out_path
+def age_preview(age_text, parse_mode):
+    v = parse_age_to_years(age_text, parse_mode)
+    if isinstance(v, (float, np.floating)) and np.isfinite(v):
+        return f"<div class='hint'>Model will use: <span class='mono'><b>{v:.3f}</b> years</span></div>"
+    return "<div class='hint'>Model will use: <span class='mono'>—</span></div>"
+# =========================
+# CSS: minimal, clean, mobile responsive + hide Gradio footer
+# =========================
+CSS = """
+:root{
+  --bg:#f6f7fb;
+  --card:#ffffff;
+  --border:#e5e7eb;
+  --text:#0f172a;
+  --muted:#64748b;
+  --accent:#2563eb;
+  --ok:#16a34a;
+  --warn:#d97706;
+  --shadow: 0 10px 30px rgba(15, 23, 42, .08);
+  --radius: 16px;
+}
+.gradio-container{
+  background: var(--bg);
+  color: var(--text);
+}
+/* Hide Gradio footer / API bar */
+footer, .footer, #footer, .gradio-footer { display:none !important; height:0 !important; }
+/* Page wrapper */
+#wrap{ max-width: 980px; margin: 0 auto; padding: 14px 12px 28px; }
+/* Make Rows wrap on small screens */
+.gr-row{ flex-wrap: wrap !important; gap: 12px !important; }
+.gr-column{ min-width: 280px; }
+/* Hero */
+.hero{
+  padding: 16px 16px;
+  border-radius: var(--radius);
+  border: 1px solid var(--border);
+  background: linear-gradient(180deg, #ffffff, #fbfdff);
+  box-shadow: var(--shadow);
+  margin-bottom: 12px;
+}
+.hero h1{ margin:0; font-size: 18px; font-weight: 800; letter-spacing:.2px; }
+.hero p{ margin:6px 0 0; color: var(--muted); font-size: 13px; line-height:1.35; }
+/* Card wrapper for inputs/outputs */
+.card{
+  background: var(--card);
+  border: 1px solid var(--border);
+  border-radius: var(--radius);
+  box-shadow: var(--shadow);
+  padding: 14px;
+}
+.mono{ font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace; }
+/* Results */
+.result-card{
+  background: #ffffff;
+  border: 1px solid var(--border);
+  border-radius: var(--radius);
+  padding: 14px;
+  box-shadow: var(--shadow);
+}
+.result-head{ display:flex; align-items:center; justify-content:space-between; gap:10px; margin-bottom:12px; }
+.result-title{ font-size: 13px; font-weight: 900; letter-spacing:.3px; }
+.grid2{ display:grid; grid-template-columns: 1fr 1fr; gap: 10px; }
+.grid3{ display:grid; grid-template-columns: 1fr 1fr 1fr; gap: 10px; }
+.box{
+  border: 1px solid var(--border);
+  background: #fbfcff;
+  border-radius: 14px;
+  padding: 12px;
+}
+.k{ color: var(--muted); font-size: 12px; }
+.v{ color: var(--text); font-size: 14px; font-weight: 800; margin-top: 3px; }
+.sub{ margin-top:6px; color: var(--muted); font-size: 11px; }
+.pill{
+  display:flex; align-items:center; gap:8px;
+  padding: 8px 10px;
+  border-radius: 999px;
+  border: 1px solid var(--border);
+  background: #ffffff;
+  font-size: 12px;
+  white-space: nowrap;
+}
+.pill .dot{ width:10px; height:10px; border-radius:999px; background: rgba(100,116,139,.25); }
+.pill.ok{ border-color: rgba(22,163,74,.25); }
+.pill.ok .dot{ background: var(--ok); }
+.pill.warn{ border-color: rgba(217,119,6,.25); }
+.pill.warn .dot{ background: var(--warn); }
+.pill.neutral{ border-color: rgba(37,99,235,.20); }
+.pill.neutral .dot{ background: var(--accent); }
+.pill-ic{ font-weight: 900; }
+.prob-row{ display:flex; align-items:center; gap: 10px; margin-top: 6px; }
+.prob-bar{
+  flex: 1;
+  height: 10px;
+  border-radius: 999px;
+  background: #eef2ff;
+  border: 1px solid rgba(37,99,235,.15);
+  overflow: hidden;
+}
+.prob-fill{
+  height: 100%;
+  background: linear-gradient(90deg, rgba(37,99,235,.95), rgba(22,163,74,.85));
+  border-radius: 999px;
+}
+.prob-txt{ width: 56px; text-align:right; color: var(--text); font-weight: 900; }
+.fine{
+  margin-top: 12px;
+  font-size: 11px;
+  color: var(--muted);
+  line-height: 1.35;
+}
+.hint{
+  margin-top: 6px;
+  font-size: 12px;
+  color: var(--muted);
+  padding: 8px 10px;
+  border: 1px dashed rgba(100,116,139,.35);
+  border-radius: 12px;
+  background: #ffffff;
+}
+/* Primary button styling + full width on mobile */
+#primaryBtn button{
+  border-radius: 14px !important;
+  border: 1px solid rgba(37,99,235,.35) !important;
+  background: var(--accent) !important;
+  color: white !important;
+  font-weight: 900 !important;
+}
+@media (max-width: 740px){
+  #primaryBtn button{ width: 100% !important; }
+  .grid2{ grid-template-columns: 1fr; }
+  .grid3{ grid-template-columns: 1fr; }
+  .result-head{ flex-direction: column; align-items: flex-start; }
+  .gr-column{ min-width: 100%; }
+}
+"""
+theme = gr.themes.Base(
+    primary_hue="blue",
+    secondary_hue="emerald",
+    neutral_hue="slate",
+    radius_size="lg",
+    font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"],
+)
+# =========================
+# UI
+# =========================
+with gr.Blocks(theme=theme, css=CSS, title="CI Outcome Predictor") as demo:
+    with gr.Column(elem_id="wrap"):
+        gr.HTML("""
+          <div class="hero">
+            <h1>CI Outcome Predictor</h1>
+            <p>Minimal UI for single and batch predictions. Gene options are loaded from the main dataset. Age parsing is shown transparently.</p>
+          </div>
+        """)
+        with gr.Tabs():
+            with gr.Tab("Single Prediction"):
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        with gr.Group(elem_classes=["card"]):
+                            gene_in = gr.Dropdown(
+                                choices=gene_choices,
+                                value=gene_choices[0] if gene_choices else None,
+                                label="Gene",
+                                filterable=True,
+                            )
+                            age_in = gr.Textbox(
+                                label="Age",
+                                placeholder="Examples: 1.11  |  1.6YRS  |  2.3"
+                            )
+                            parse_mode = gr.Radio(
+                                choices=[
+                                    "Decimal (1.11 = 1.11 years)",
+                                    "Years.Months (1.11 = 1y 11m)"
+                                ],
+                                value="Decimal (1.11 = 1.11 years)",
+                                label="Age parsing"
+                            )
+                            age_hint = gr.HTML(value=age_preview("", "Decimal (1.11 = 1.11 years)"))
+                            btn = gr.Button("Run Prediction", elem_id="primaryBtn")
+                    with gr.Column(scale=1):
+                        single_out = gr.HTML(value="", elem_classes=["card"])
+                # Live preview of how age will be interpreted
+                age_in.change(fn=age_preview, inputs=[age_in, parse_mode], outputs=[age_hint])
+                parse_mode.change(fn=age_preview, inputs=[age_in, parse_mode], outputs=[age_hint])
+                btn.click(
+                    fn=predict_single,
+                    inputs=[gene_in, age_in, parse_mode],
+                    outputs=[single_out]
+                )
+            with gr.Tab("Batch Prediction (CSV)"):
+                with gr.Group(elem_classes=["card"]):
+                    gr.Markdown(
+                        "**Minimum required columns:** `Gene`, `Age`  \n"
+                        f"**Model feature columns (auto-filled if missing):** `{len(input_cols)}` total",
+                        elem_classes=["mono"]
+                    )
+                    parse_mode_b = gr.Radio(
+                        choices=[
+                            "Decimal (1.11 = 1.11 years)",
+                            "Years.Months (1.11 = 1y 11m)"
+                        ],
+                        value="Decimal (1.11 = 1.11 years)",
+                        label="Age parsing"
+                    )
+                    csv_in = gr.File(file_types=[".csv"], label="Upload CSV")
+                    run_b = gr.Button("Run Batch Prediction", elem_id="primaryBtn")
+                    batch_summary = gr.HTML(value="")
+                    preview = gr.Dataframe(label="Preview (first 20 rows)", wrap=True)
+                    out_file = gr.File(label="Download predictions_output.csv")
+                run_b.click(
+                    fn=predict_batch,
+                    inputs=[csv_in, parse_mode_b],
+                    outputs=[batch_summary, preview, out_file]
+                )
+demo.launch(share=True)