Spaces:

Muhammadidrees
/

WellBeingLLMSInsight

Sleeping

App Files Files Community

Muhammadidrees commited on Oct 1, 2025

Commit

f3e951a

verified ·

1 Parent(s): c08ace3

Update app.py

Browse files

Files changed (1) hide show

app.py +370 -140

app.py CHANGED Viewed

@@ -1,156 +1,386 @@
 import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 import gradio as gr
-import os
-# ----------------------------
-# Model Config
-# ----------------------------
-MODEL_ID = "Muhammadidrees/my-biomed"   # or "BioMistral/BioMistral-7B"
-tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
-model = AutoModelForCausalLM.from_pretrained(
-    MODEL_ID,
-    device_map="auto",           # Ensures GPU usage on HF (L4)
-    torch_dtype=torch.float16    # FP16 for speed + memory efficiency
 )
-pipe = pipeline(
-    "text-generation",
-    model=model,
-    tokenizer=tokenizer
-)
-# ----------------------------
-# Helper: split report into panels
-# ----------------------------
-def split_report(text: str):
-    markers = ["5. Tabular", "📊 Tabular", "## 5"]
-    idx = None
-    for m in markers:
-        pos = text.find(m)
-        if pos != -1:
-            idx = pos if idx is None or pos < idx else idx
-    if idx is None:
-        return text.strip(), ""
-    return text[:idx].strip(), text[idx:].strip()
-# ----------------------------
-# Main Analysis Function
-# ----------------------------
-def analyze(albumin, creatinine, glucose, crp, mcv, rdw, alp,
-            wbc, lymph, age, gender, height, weight):
-    # Compute BMI
     try:
-        bmi = round(float(weight) / ((float(height) / 100) ** 2), 2)
-    except Exception:
-        bmi = "N/A"
-    # ----------------------------
-    # Strict System Prompt
-    # ----------------------------
-    system_prompt = (
-        "You are a professional AI Medical Assistant.\n"
-        "You must ONLY analyze: 9 Levine biomarkers + Age + Height + Weight.\n"
-        "Forbidden: Any extra labs (cholesterol, vitamin D, hormones, etc.).\n"
-        "If information is not derivable, state clearly: 'Not available from current biomarkers.'\n\n"
-        "Biomarkers allowed:\n"
-        "- Albumin\n- Creatinine\n- Glucose\n- C-reactive protein (CRP)\n"
-        "- Mean Cell Volume (MCV)\n- Red Cell Distribution Width (RDW)\n"
-        "- Alkaline Phosphatase (ALP)\n- White Blood Cell count (WBC)\n"
-        "- Lymphocyte percentage\n\n"
-        "Output format:\n"
-        "1. Executive Summary\n"
-        "2. System-Specific Analysis\n"
-        "3. Personalized Action Plan\n"
-        "4. Interaction Alerts\n"
-        "5. Tabular Mapping (Markdown table with Biomarker | Value | Range | Status | Insight)\n"
-        "6. Enhanced AI Insights & Longitudinal Risk\n\n"
-        "Style: Professional, concise, structured, client-friendly. "
-        "No hallucinations. No extra biomarkers. No absolute longevity claims.\n"
-    )
-    patient_input = (
-        f"Patient Profile:\n- Age: {age}\n- Gender: {gender}\n"
-        f"- Height: {height} cm\n- Weight: {weight} kg\n- BMI: {bmi}\n\n"
-        "Lab Values:\n"
-        f"- Albumin: {albumin}\n- Creatinine: {creatinine}\n"
-        f"- Glucose: {glucose}\n- CRP: {crp}\n"
-        f"- MCV: {mcv}\n- RDW: {rdw}\n"
-        f"- ALP: {alp}\n- WBC: {wbc}\n- Lymphocytes: {lymph}\n"
     )
-    prompt = system_prompt + "\n" + patient_input
-    # ----------------------------
-    # Generate
-    # ----------------------------
-    try:
-        gen = pipe(
-            prompt,
-            max_new_tokens=1200,
-            temperature=0.25,
-            top_p=0.9,
-            return_full_text=False
         )
-        generated = gen[0]["generated_text"].strip()
-    except Exception as e:
-        return f"❌ Error: {str(e)}", ""
-    if not generated:
-        return "⚠️ No valid response. Please try again.", ""
-    left_md, right_md = split_report(generated)
-    return left_md, right_md
-# ----------------------------
-# Gradio UI
-# ----------------------------
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🏥 AI Medical Biomarker Dashboard")
-    with gr.Row():
-        with gr.Column():
-            gr.Markdown("### Demographics")
-            age = gr.Number(label="Age", value=45)
-            gender = gr.Dropdown(["Male", "Female"], label="Gender", value="Male")
-            height = gr.Number(label="Height (cm)", value=174)
-            weight = gr.Number(label="Weight (kg)", value=75)
-            gr.Markdown("### Blood Panel")
-            wbc = gr.Number(label="WBC (K/uL)", value=6.5)
-            lymph = gr.Number(label="Lymphocytes (%)", value=30)
-            mcv = gr.Number(label="MCV (fL)", value=88)
-            rdw = gr.Number(label="RDW (%)", value=13)
-        with gr.Column():
-            gr.Markdown("### Chemistry Panel")
-            albumin = gr.Number(label="Albumin (g/dL)", value=4.2)
-            creatinine = gr.Number(label="Creatinine (mg/dL)", value=0.9)
-            glucose = gr.Number(label="Glucose (mg/dL)", value=92)
-            crp = gr.Number(label="CRP (mg/L)", value=1.0)
-            alp = gr.Number(label="ALP (U/L)", value=70)
-            analyze_btn = gr.Button("🔬 Generate Report", variant="primary")
-    with gr.Row():
-        with gr.Column():
-            gr.Markdown("### 📝 Summary & Action Plan")
-            left_output = gr.Markdown()
-        with gr.Column():
-            gr.Markdown("### 📊 Tabular & AI Insights")
-            right_output = gr.Markdown()
-    analyze_btn.click(
-        fn=analyze,
-        inputs=[albumin, creatinine, glucose, crp, mcv, rdw, alp, wbc, lymph, age, gender, height, weight],
-        outputs=[left_output, right_output]
-    )
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)), show_error=True)

+"""
+app.py
+Gradio app that loads Muhammadidrees/bioLLM (Hugging Face) and
+performs prompt-feeding based biomarker report generation.
+Inputs:
+ - Nine Levine biomarkers (numeric)
+ - Age, weight (kg), height (cm), sex
+Outputs:
+ - Text sections: Executive summary, System analysis, Personalized action plan, Further recommendations
+ - Table: biomarker | value | unit | status | short interpretation
+Notes:
+ - Not a clinical decision tool. Validate outputs externally.
+ - If CUDA is available, the script will use GPU (device=0).
+"""
+import os
+import math
+import pandas as pd
 import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, set_seed
 import gradio as gr
+from typing import Dict, Tuple
+# ========== Config ==========
+HF_MODEL = "Muhammadidrees/bioLLM"  # user-specified
+SEED = 42
+set_seed(SEED)
+# Generation hyperparameters (tune as needed)
+GEN_KWARGS = dict(
+    max_new_tokens=512,
+    do_sample=True,
+    temperature=0.7,
+    top_p=0.92,
+    num_return_sequences=1,
+    eos_token_id=None,
 )
+# ========== Utility functions ==========
+def bmi_from_weight_height(kg: float, cm: float) -> float:
+    if kg <= 0 or cm <= 0:
+        return None
+    m = cm / 100.0
+    return kg / (m * m)
+def device_selection():
+    if torch.cuda.is_available():
+        return 0  # pipeline expects device index (0)
+    return -1  # CPU
+def load_model_and_tokenizer(model_name: str):
+    """
+    Load tokenizer and model for causal LM. Adjust device map as needed.
+    """
+    print(f"Loading tokenizer and model: {model_name} ...")
+    tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
+    # Avoid loading to CPU twice; huggingface will map to available device
+    model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16 if torch.cuda.is_available() else None, low_cpu_mem_usage=True)
+    # Create pipeline
+    pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device_selection())
+    return pipe, tokenizer
+# ========== Domain knowledge: Levine 9 biomarkers ==========
+# Units below are conventional / commonly used — allow user to input any units but document expects these.
+LEVINED_BIOMARKERS = [
+    ("Fasting Glucose", "mg/dL"),
+    ("C-reactive protein (CRP)", "mg/L"),
+    ("Albumin", "g/dL"),
+    ("Alkaline phosphatase", "U/L"),
+    ("Creatinine", "mg/dL"),
+    ("Red Cell Distribution Width (RDW)", "%"),
+    ("Lymphocyte %", "%"),
+    ("White Blood Cell count (WBC)", "10^3/µL"),
+    ("Mean Corpuscular Volume (MCV)", "fL"),
+]
+# Example reference ranges (simple defaults). You can adjust to local lab ranges.
+# These ranges are coarse and illustrative. Use local clinical ranges in production.
+REFERENCE_RANGES = {
+    "Fasting Glucose": (70, 99),            # mg/dL normal fasting
+    "C-reactive protein (CRP)": (0.0, 3.0), # mg/L low-normal; >10 often indicates acute inflammation
+    "Albumin": (3.5, 5.0),                  # g/dL
+    "Alkaline phosphatase": (44, 147),      # U/L
+    "Creatinine": (0.6, 1.3),               # mg/dL (adult average; sex/age dependent)
+    "Red Cell Distribution Width (RDW)": (11.5, 14.5), # %
+    "Lymphocyte %": (20.0, 50.0),           # %
+    "White Blood Cell count (WBC)": (4.0, 11.0), # 10^3/µL
+    "Mean Corpuscular Volume (MCV)": (80, 100),    # fL
+}
+def value_to_status(name: str, val: float) -> str:
+    """
+    Convert a numeric biomarker value to a simple 'Low/Normal/High' status.
+    Uses REFERENCE_RANGES map; if not found returns 'Unknown'.
+    """
+    if val is None:
+        return "Missing"
+    rng = REFERENCE_RANGES.get(name)
+    if not rng:
+        return "Unknown"
+    low, high = rng
     try:
+        v = float(val)
+    except:
+        return "Invalid"
+    if v < low:
+        return "Low"
+    elif v > high:
+        return "High"
+    else:
+        return "Normal"
+def short_interpretation(name: str, val: float, status: str) -> str:
+    """
+    Very short, literature-style interpretation used for the table.
+    These are template-like; you can expand or replace with another knowledge base.
+    """
+    if status == "Missing":
+        return "No value provided."
+    if status == "Invalid":
+        return "Invalid input."
+    if status == "Unknown":
+        return "No reference range available."
+    if name == "Albumin":
+        if status == "Low":
+            return "Hypoalbuminemia — malnutrition, liver disease, or inflammation possible."
+        else:
+            return "Within expected range."
+    if name.startswith("C-reactive"):
+        if status == "High":
+            return "Elevated — suggests systemic inflammation or acute infection."
+        else:
+            return "Within expected range."
+    if name == "Fasting Glucose":
+        if status == "High":
+            return "Hyperglycemia — impaired glucose control; consider further metabolic workup."
+        else:
+            return "Within expected fasting range."
+    if name == "Creatinine":
+        if status == "High":
+            return "Possible renal dysfunction or reduced GFR (age/sex dependent)."
+        else:
+            return "Within expected range."
+    if name == "Alkaline phosphatase":
+        if status == "High":
+            return "May suggest cholestasis or bone turnover; correlate clinically."
+        else:
+            return "Within expected range."
+    if name == "Red Cell Distribution Width (RDW)":
+        if status == "High":
+            return "Anisocytosis — possible nutritional deficiency or marrow response."
+        else:
+            return "Within expected range."
+    if name == "Lymphocyte %":
+        if status == "Low":
+            return "Relative lymphopenia — may reflect infection or immunosuppression."
+        else:
+            return "Within expected range."
+    if name == "White Blood Cell count (WBC)":
+        if status == "High":
+            return "Leukocytosis — often infection or inflammation."
+        elif status == "Low":
+            return "Leukopenia — consider bone marrow suppression or viral infection."
+        else:
+            return "Within expected range."
+    if name == "Mean Corpuscular Volume (MCV)":
+        if status == "High":
+            return "Macrocytosis — check B12/folate, liver disease, alcohol use."
+        elif status == "Low":
+            return "Microcytosis — consider iron deficiency."
+        else:
+            return "Within expected range."
+    # fallback
+    return "Interpretation not available."
+# ========== Prompt construction ==========
+PROMPT_TEMPLATE = """
+You are a biomedical-language assistant. You will be given patient demographics and a list of biomarkers with values and status (Low/Normal/High).
+Produce an output that contains the following labeled sections:
+1) Executive Summary:
+   - 2-4 sentence high-level synthesis of the patient's biomarker pattern and clinical implication.
+2) System-specific analysis:
+   - Bullet-style analysis per organ/system (e.g., metabolic, hepatic, renal, hematologic, immune), referencing biomarkers when relevant.
+3) Personalized action plan:
+   - Practical, prioritized next steps suitable for a clinician or informed researcher (diagnostic tests, immediate actions, lifestyle suggestions).
+   - Keep recommendations conservative and evidence-minded. Mention when specialty referral is reasonable.
+4) Further recommendations:
+   - Additional tests, monitoring frequency, non-pharmacological measures, and resources for patient education.
+5) Tabular biomarker interpretation:
+   - Provide a Markdown table with columns: Biomarker | Value | Unit | Status | Short interpretation.
+   - Include only biomarkers provided.
+Important:
+ - Keep your language cautious and non-prescriptive (e.g., "consider", "suggest", "may indicate").
+ - Add a single-line concluding clinical disclaimer: "This report is for research/educational use only and not a substitute for clinical judgment."
+Patient demographics:
+- Age: {age}
+- Sex: {sex}
+- Weight: {weight_kg} kg
+- Height: {height_cm} cm
+- BMI: {bmi:.1f}
+Biomarkers:
+{biomarker_lines}
+Now produce the requested sections.
+"""
+def build_biomarker_lines(biomarker_inputs: Dict[str, Tuple[float,str]]) -> str:
+    """
+    biomarker_inputs: dict mapping biomarker name -> (value, unit)
+    returns a text block with lines: - Name: value unit (Status)
+    """
+    lines = []
+    for name, (val, unit) in biomarker_inputs.items():
+        status = value_to_status(name, val)
+        val_str = "" if val is None else str(val)
+        lines.append(f"- {name}: {val_str} {unit} ({status})")
+    return "\n".join(lines)
+# ========== Load pipeline ==========
+print("Initializing model pipeline...\n(If this stalls, make sure the model is available and you have network access.)")
+pipe, tokenizer = load_model_and_tokenizer(HF_MODEL)
+print("Model loaded.")
+# ========== Inference function ==========
+def generate_report(
+    fasting_glucose,
+    crp,
+    albumin,
+    alk_phos,
+    creatinine,
+    rdw,
+    lymph_pct,
+    wbc,
+    mcv,
+    age,
+    weight_kg,
+    height_cm,
+    sex
+):
+    # Build biomarker dict (names must match LEVINE list keys)
+    biom_inputs = {
+        "Fasting Glucose": (fasting_glucose, "mg/dL"),
+        "C-reactive protein (CRP)": (crp, "mg/L"),
+        "Albumin": (albumin, "g/dL"),
+        "Alkaline phosphatase": (alk_phos, "U/L"),
+        "Creatinine": (creatinine, "mg/dL"),
+        "Red Cell Distribution Width (RDW)": (rdw, "%"),
+        "Lymphocyte %": (lymph_pct, "%"),
+        "White Blood Cell count (WBC)": (wbc, "10^3/µL"),
+        "Mean Corpuscular Volume (MCV)": (mcv, "fL"),
+    }
+    # Compute BMI
+    bmi = bmi_from_weight_height(weight_kg, height_cm) or 0.0
+    # Construct biomarker lines for prompt
+    biomarker_lines = build_biomarker_lines(biom_inputs)
+    prompt = PROMPT_TEMPLATE.format(
+        age=int(age) if age is not None else "Unknown",
+        sex=sex,
+        weight_kg=weight_kg,
+        height_cm=height_cm,
+        bmi=bmi,
+        biomarker_lines=biomarker_lines
     )
+    # Debug: you may print prompt during development
+    # print("Prompt sent to model:\n", prompt)
+    # Call model
+    gen = pipe(prompt, **GEN_KWARGS)[0]["generated_text"]
+    # Post-process: separate sections if model includes them; otherwise present whole text.
+    # We'll attempt to split by the numeric section headings used in prompt (1), 2), etc.
+    sections = {"full_text": gen}
+    for label in ["Executive Summary:", "System-specific analysis:", "Personalized action plan:", "Further recommendations:", "Tabular biomarker interpretation:"]:
+        if label in gen:
+            # naive split: find start
+            start = gen.find(label)
+            # find next label start index
+            sections[label] = gen[start: gen.find("\n\n", start) + 2]  # short snippet fallback
+    # Build table as DataFrame for nicer display in Gradio
+    rows = []
+    for name, (val, unit) in biom_inputs.items():
+        status = value_to_status(name, val)
+        interp = short_interpretation(name, val, status)
+        rows.append({
+            "Biomarker": name,
+            "Value": "" if val is None else val,
+            "Unit": unit,
+            "Status": status,
+            "Short interpretation": interp
+        })
+    df = pd.DataFrame(rows)
+    # Return model raw text and dataframe
+    return gen, df
+# ========== Gradio UI ==========
+def launch_app():
+    demo_inputs = {
+        "fasting_glucose": 92,
+        "crp": 5.2,
+        "albumin": 3.2,
+        "alk_phos": 85,
+        "creatinine": 1.0,
+        "rdw": 13.1,
+        "lymph_pct": 30,
+        "wbc": 7.2,
+        "mcv": 90,
+        "age": 58,
+        "weight_kg": 78,
+        "height_cm": 172,
+        "sex": "Male",
+    }
+    with gr.Blocks(title="bioLLM — Biomarker Report (Levine 9-panel)") as demo:
+        # gr.Markdown("## bioLLM biomarker report generator\n**Not for clinical use.** ` and prompt-feeding to produce literature-style explanations.")
+        with gr.Row():
+            with gr.Column(scale=1):
+                fasting_glucose = gr.Number(label="Fasting Glucose (mg/dL)", value=demo_inputs["fasting_glucose"])
+                crp = gr.Number(label="C-reactive protein (CRP) (mg/L)", value=demo_inputs["crp"])
+                albumin = gr.Number(label="Albumin (g/dL)", value=demo_inputs["albumin"])
+                alk_phos = gr.Number(label="Alkaline phosphatase (U/L)", value=demo_inputs["alk_phos"])
+                creatinine = gr.Number(label="Creatinine (mg/dL)", value=demo_inputs["creatinine"])
+            with gr.Column(scale=1):
+                rdw = gr.Number(label="Red Cell Distribution Width (RDW) (%)", value=demo_inputs["rdw"])
+                lymph_pct = gr.Number(label="Lymphocyte %", value=demo_inputs["lymph_pct"])
+                wbc = gr.Number(label="White Blood Cell count (10^3/µL)", value=demo_inputs["wbc"])
+                mcv = gr.Number(label="Mean Corpuscular Volume (MCV) (fL)", value=demo_inputs["mcv"])
+                sex = gr.Dropdown(choices=["Male", "Female", "Other"], value=demo_inputs["sex"], label="Sex")
+        with gr.Row():
+            age = gr.Number(label="Age (years)", value=demo_inputs["age"])
+            weight_kg = gr.Number(label="Weight (kg)", value=demo_inputs["weight_kg"])
+            height_cm = gr.Number(label="Height (cm)", value=demo_inputs["height_cm"])
+        with gr.Row():
+            submit = gr.Button("Generate report")
+            clear = gr.Button("Reset")
+        with gr.Row():
+            output_text = gr.Markdown(label="Model report (text)")
+        with gr.Row():
+            output_table = gr.Dataframe(headers=["Biomarker", "Value", "Unit", "Status", "Short interpretation"], label="Tabular interpretation (derived)")
+        def on_submit(
+            fasting_glucose, crp, albumin, alk_phos, creatinine,
+            rdw, lymph_pct, wbc, mcv, age, weight_kg, height_cm, sex
+        ):
+            text, df = generate_report(
+                fasting_glucose, crp, albumin, alk_phos, creatinine,
+                rdw, lymph_pct, wbc, mcv, age, weight_kg, height_cm, sex
+            )
+            # Return text as markdown and dataframe as table
+            return text, df
+        submit.click(
+            on_submit,
+            inputs=[fasting_glucose, crp, albumin, alk_phos, creatinine, rdw, lymph_pct, wbc, mcv, age, weight_kg, height_cm, sex],
+            outputs=[output_text, output_table]
         )
+        clear.click(lambda: (gr.update(value=demo_inputs["fasting_glucose"]), gr.update(value=demo_inputs["crp"]),
+                             gr.update(value=demo_inputs["albumin"]), gr.update(value=demo_inputs["alk_phos"]),
+                             gr.update(value=demo_inputs["creatinine"]), gr.update(value=demo_inputs["rdw"]),
+                             gr.update(value=demo_inputs["lymph_pct"]), gr.update(value=demo_inputs["wbc"]),
+                             gr.update(value=demo_inputs["mcv"]), gr.update(value=demo_inputs["age"]),
+                             gr.update(value=demo_inputs["weight_kg"]), gr.update(value=demo_inputs["height_cm"]),
+                             gr.update(value=demo_inputs["sex"])),
+                    inputs=[], outputs=[]
+                   )
+    demo.launch(server_name="0.0.0.0", share=False)
 if __name__ == "__main__":
+    launch_app()