Spaces:

Muhammadidrees
/

WellBeingLLMSInsight

Sleeping

App Files Files Community

Muhammadidrees commited on Oct 1, 2025

Commit

f19b8f7

verified ·

1 Parent(s): 239b274

Update app.py

Browse files

Files changed (1) hide show

app.py +100 -362

app.py CHANGED Viewed

@@ -1,386 +1,124 @@
-"""
-app.py
-Gradio app that loads Muhammadidrees/bioLLM (Hugging Face) and
-performs prompt-feeding based biomarker report generation.
-Inputs:
- - Nine Levine biomarkers (numeric)
- - Age, weight (kg), height (cm), sex
-Outputs:
- - Text sections: Executive summary, System analysis, Personalized action plan, Further recommendations
- - Table: biomarker | value | unit | status | short interpretation
-Notes:
- - Not a clinical decision tool. Validate outputs externally.
- - If CUDA is available, the script will use GPU (device=0).
-"""
-import os
-import math
-import pandas as pd
-import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, set_seed
 import gradio as gr
-from typing import Dict, Tuple
-# ========== Config ==========
-HF_MODEL = "Muhammadidrees/bioLLM"  # user-specified
-SEED = 42
-set_seed(SEED)
-# Generation hyperparameters (tune as needed)
-GEN_KWARGS = dict(
-    max_new_tokens=1500,
-    do_sample=True,
-    temperature=0.7,
-    top_p=0.92,
-    num_return_sequences=1,
-    eos_token_id=None,
-)
-# ========== Utility functions ==========
-def bmi_from_weight_height(kg: float, cm: float) -> float:
-    if kg <= 0 or cm <= 0:
-        return None
-    m = cm / 100.0
-    return kg / (m * m)
-def device_selection():
-    if torch.cuda.is_available():
-        return 0  # pipeline expects device index (0)
-    return -1  # CPU
-def load_model_and_tokenizer(model_name: str):
-    """
-    Load tokenizer and model for causal LM. Adjust device map as needed.
-    """
-    print(f"Loading tokenizer and model: {model_name} ...")
-    tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
-    # Avoid loading to CPU twice; huggingface will map to available device
-    model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16 if torch.cuda.is_available() else None, low_cpu_mem_usage=True)
-    # Create pipeline
-    pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device_selection())
     return pipe, tokenizer
-# ========== Domain knowledge: Levine 9 biomarkers ==========
-# Units below are conventional / commonly used — allow user to input any units but document expects these.
-LEVINED_BIOMARKERS = [
-    ("Fasting Glucose", "mg/dL"),
-    ("C-reactive protein (CRP)", "mg/L"),
-    ("Albumin", "g/dL"),
-    ("Alkaline phosphatase", "U/L"),
-    ("Creatinine", "mg/dL"),
-    ("Red Cell Distribution Width (RDW)", "%"),
-    ("Lymphocyte %", "%"),
-    ("White Blood Cell count (WBC)", "10^3/µL"),
-    ("Mean Corpuscular Volume (MCV)", "fL"),
-]
-# Example reference ranges (simple defaults). You can adjust to local lab ranges.
-# These ranges are coarse and illustrative. Use local clinical ranges in production.
-REFERENCE_RANGES = {
-    "Fasting Glucose": (70, 99),            # mg/dL normal fasting
-    "C-reactive protein (CRP)": (0.0, 3.0), # mg/L low-normal; >10 often indicates acute inflammation
-    "Albumin": (3.5, 5.0),                  # g/dL
-    "Alkaline phosphatase": (44, 147),      # U/L
-    "Creatinine": (0.6, 1.3),               # mg/dL (adult average; sex/age dependent)
-    "Red Cell Distribution Width (RDW)": (11.5, 14.5), # %
-    "Lymphocyte %": (20.0, 50.0),           # %
-    "White Blood Cell count (WBC)": (4.0, 11.0), # 10^3/µL
-    "Mean Corpuscular Volume (MCV)": (80, 100),    # fL
-}
-def value_to_status(name: str, val: float) -> str:
-    """
-    Convert a numeric biomarker value to a simple 'Low/Normal/High' status.
-    Uses REFERENCE_RANGES map; if not found returns 'Unknown'.
-    """
-    if val is None:
-        return "Missing"
-    rng = REFERENCE_RANGES.get(name)
-    if not rng:
-        return "Unknown"
-    low, high = rng
-    try:
-        v = float(val)
-    except:
-        return "Invalid"
-    if v < low:
-        return "Low"
-    elif v > high:
-        return "High"
-    else:
-        return "Normal"
-def short_interpretation(name: str, val: float, status: str) -> str:
-    """
-    Very short, literature-style interpretation used for the table.
-    These are template-like; you can expand or replace with another knowledge base.
-    """
-    if status == "Missing":
-        return "No value provided."
-    if status == "Invalid":
-        return "Invalid input."
-    if status == "Unknown":
-        return "No reference range available."
-    if name == "Albumin":
-        if status == "Low":
-            return "Hypoalbuminemia — malnutrition, liver disease, or inflammation possible."
-        else:
-            return "Within expected range."
-    if name.startswith("C-reactive"):
-        if status == "High":
-            return "Elevated — suggests systemic inflammation or acute infection."
-        else:
-            return "Within expected range."
-    if name == "Fasting Glucose":
-        if status == "High":
-            return "Hyperglycemia — impaired glucose control; consider further metabolic workup."
-        else:
-            return "Within expected fasting range."
-    if name == "Creatinine":
-        if status == "High":
-            return "Possible renal dysfunction or reduced GFR (age/sex dependent)."
-        else:
-            return "Within expected range."
-    if name == "Alkaline phosphatase":
-        if status == "High":
-            return "May suggest cholestasis or bone turnover; correlate clinically."
-        else:
-            return "Within expected range."
-    if name == "Red Cell Distribution Width (RDW)":
-        if status == "High":
-            return "Anisocytosis — possible nutritional deficiency or marrow response."
-        else:
-            return "Within expected range."
-    if name == "Lymphocyte %":
-        if status == "Low":
-            return "Relative lymphopenia — may reflect infection or immunosuppression."
-        else:
-            return "Within expected range."
-    if name == "White Blood Cell count (WBC)":
-        if status == "High":
-            return "Leukocytosis — often infection or inflammation."
-        elif status == "Low":
-            return "Leukopenia — consider bone marrow suppression or viral infection."
-        else:
-            return "Within expected range."
-    if name == "Mean Corpuscular Volume (MCV)":
-        if status == "High":
-            return "Macrocytosis — check B12/folate, liver disease, alcohol use."
-        elif status == "Low":
-            return "Microcytosis — consider iron deficiency."
-        else:
-            return "Within expected range."
-    # fallback
-    return "Interpretation not available."
-# ========== Prompt construction ==========
-PROMPT_TEMPLATE = """
-You are a biomedical-language assistant. You will be given patient demographics and a list of biomarkers with values and status (Low/Normal/High).
-Produce an output that contains the following labeled sections:
-1) Executive Summary:
-   - 2-4 sentence high-level synthesis of the patient's biomarker pattern and clinical implication.
-2) System-specific analysis:
-   - Bullet-style analysis per organ/system (e.g., metabolic, hepatic, renal, hematologic, immune), referencing biomarkers when relevant.
-3) Personalized action plan:
-   - Practical, prioritized next steps suitable for a clinician or informed researcher (diagnostic tests, immediate actions, lifestyle suggestions).
-   - Keep recommendations conservative and evidence-minded. Mention when specialty referral is reasonable.
-4) Further recommendations:
-   - Additional tests, monitoring frequency, non-pharmacological measures, and resources for patient education.
-5) Tabular biomarker interpretation:
-   - Provide a Markdown table with columns: Biomarker | Value | Unit | Status | Short interpretation.
-   - Include only biomarkers provided.
-Important:
- - Keep your language cautious and non-prescriptive (e.g., "consider", "suggest", "may indicate").
- - Add a single-line concluding clinical disclaimer: "This report is for research/educational use only and not a substitute for clinical judgment."
-Patient demographics:
-- Age: {age}
-- Sex: {sex}
-- Weight: {weight_kg} kg
-- Height: {height_cm} cm
-- BMI: {bmi:.1f}
 Biomarkers:
-{biomarker_lines}
-Now produce the requested sections.
 """
-def build_biomarker_lines(biomarker_inputs: Dict[str, Tuple[float,str]]) -> str:
-    """
-    biomarker_inputs: dict mapping biomarker name -> (value, unit)
-    returns a text block with lines: - Name: value unit (Status)
-    """
-    lines = []
-    for name, (val, unit) in biomarker_inputs.items():
-        status = value_to_status(name, val)
-        val_str = "" if val is None else str(val)
-        lines.append(f"- {name}: {val_str} {unit} ({status})")
-    return "\n".join(lines)
-# ========== Load pipeline ==========
-print("Initializing model pipeline...\n(If this stalls, make sure the model is available and you have network access.)")
-pipe, tokenizer = load_model_and_tokenizer(HF_MODEL)
-print("Model loaded.")
-# ========== Inference function ==========
-def generate_report(
-    fasting_glucose,
-    crp,
-    albumin,
-    alk_phos,
-    creatinine,
-    rdw,
-    lymph_pct,
-    wbc,
-    mcv,
-    age,
-    weight_kg,
-    height_cm,
-    sex
-):
-    # Build biomarker dict (names must match LEVINE list keys)
-    biom_inputs = {
-        "Fasting Glucose": (fasting_glucose, "mg/dL"),
-        "C-reactive protein (CRP)": (crp, "mg/L"),
-        "Albumin": (albumin, "g/dL"),
-        "Alkaline phosphatase": (alk_phos, "U/L"),
-        "Creatinine": (creatinine, "mg/dL"),
-        "Red Cell Distribution Width (RDW)": (rdw, "%"),
-        "Lymphocyte %": (lymph_pct, "%"),
-        "White Blood Cell count (WBC)": (wbc, "10^3/µL"),
-        "Mean Corpuscular Volume (MCV)": (mcv, "fL"),
     }
-    # Compute BMI
-    bmi = bmi_from_weight_height(weight_kg, height_cm) or 0.0
-    # Construct biomarker lines for prompt
-    biomarker_lines = build_biomarker_lines(biom_inputs)
-    prompt = PROMPT_TEMPLATE.format(
-        age=int(age) if age is not None else "Unknown",
-        sex=sex,
-        weight_kg=weight_kg,
-        height_cm=height_cm,
-        bmi=bmi,
-        biomarker_lines=biomarker_lines
     )
-    # Debug: you may print prompt during development
-    # print("Prompt sent to model:\n", prompt)
-    # Call model
-    gen = pipe(prompt, **GEN_KWARGS)[0]["generated_text"]
-    # Post-process: separate sections if model includes them; otherwise present whole text.
-    # We'll attempt to split by the numeric section headings used in prompt (1), 2), etc.
-    sections = {"full_text": gen}
-    for label in ["Executive Summary:", "System-specific analysis:", "Personalized action plan:", "Further recommendations:", "Tabular biomarker interpretation:"]:
-        if label in gen:
-            # naive split: find start
-            start = gen.find(label)
-            # find next label start index
-            sections[label] = gen[start: gen.find("\n\n", start) + 2]  # short snippet fallback
-    # Build table as DataFrame for nicer display in Gradio
-    rows = []
-    for name, (val, unit) in biom_inputs.items():
-        status = value_to_status(name, val)
-        interp = short_interpretation(name, val, status)
-        rows.append({
-            "Biomarker": name,
-            "Value": "" if val is None else val,
-            "Unit": unit,
-            "Status": status,
-            "Short interpretation": interp
-        })
-    df = pd.DataFrame(rows)
-    # Return model raw text and dataframe
-    return gen, df
-# ========== Gradio UI ==========
-def launch_app():
-    demo_inputs = {
-        "fasting_glucose": 92,
-        "crp": 5.2,
-        "albumin": 3.2,
-        "alk_phos": 85,
-        "creatinine": 1.0,
-        "rdw": 13.1,
-        "lymph_pct": 30,
-        "wbc": 7.2,
-        "mcv": 90,
-        "age": 58,
-        "weight_kg": 78,
-        "height_cm": 172,
-        "sex": "Male",
-    }
-    with gr.Blocks(title="bioLLM — Biomarker Report (Levine 9-panel)") as demo:
-        # gr.Markdown("## bioLLM biomarker report generator\n**Not for clinical use.** ` and prompt-feeding to produce literature-style explanations.")
-        with gr.Row():
-            with gr.Column(scale=1):
-                fasting_glucose = gr.Number(label="Fasting Glucose (mg/dL)", value=demo_inputs["fasting_glucose"])
-                crp = gr.Number(label="C-reactive protein (CRP) (mg/L)", value=demo_inputs["crp"])
-                albumin = gr.Number(label="Albumin (g/dL)", value=demo_inputs["albumin"])
-                alk_phos = gr.Number(label="Alkaline phosphatase (U/L)", value=demo_inputs["alk_phos"])
-                creatinine = gr.Number(label="Creatinine (mg/dL)", value=demo_inputs["creatinine"])
-            with gr.Column(scale=1):
-                rdw = gr.Number(label="Red Cell Distribution Width (RDW) (%)", value=demo_inputs["rdw"])
-                lymph_pct = gr.Number(label="Lymphocyte %", value=demo_inputs["lymph_pct"])
-                wbc = gr.Number(label="White Blood Cell count (10^3/µL)", value=demo_inputs["wbc"])
-                mcv = gr.Number(label="Mean Corpuscular Volume (MCV) (fL)", value=demo_inputs["mcv"])
-                sex = gr.Dropdown(choices=["Male", "Female", "Other"], value=demo_inputs["sex"], label="Sex")
-        with gr.Row():
-            age = gr.Number(label="Age (years)", value=demo_inputs["age"])
-            weight_kg = gr.Number(label="Weight (kg)", value=demo_inputs["weight_kg"])
-            height_cm = gr.Number(label="Height (cm)", value=demo_inputs["height_cm"])
-        with gr.Row():
-            submit = gr.Button("Generate report")
-            clear = gr.Button("Reset")
-        with gr.Row():
-            output_text = gr.Markdown(label="Model report (text)")
-        with gr.Row():
-            output_table = gr.Dataframe(headers=["Biomarker", "Value", "Unit", "Status", "Short interpretation"], label="Tabular interpretation (derived)")
-        def on_submit(
-            fasting_glucose, crp, albumin, alk_phos, creatinine,
-            rdw, lymph_pct, wbc, mcv, age, weight_kg, height_cm, sex
-        ):
-            text, df = generate_report(
-                fasting_glucose, crp, albumin, alk_phos, creatinine,
-                rdw, lymph_pct, wbc, mcv, age, weight_kg, height_cm, sex
-            )
-            # Return text as markdown and dataframe as table
-            return text, df
-        submit.click(
-            on_submit,
-            inputs=[fasting_glucose, crp, albumin, alk_phos, creatinine, rdw, lymph_pct, wbc, mcv, age, weight_kg, height_cm, sex],
-            outputs=[output_text, output_table]
-        )
-        clear.click(lambda: (gr.update(value=demo_inputs["fasting_glucose"]), gr.update(value=demo_inputs["crp"]),
-                             gr.update(value=demo_inputs["albumin"]), gr.update(value=demo_inputs["alk_phos"]),
-                             gr.update(value=demo_inputs["creatinine"]), gr.update(value=demo_inputs["rdw"]),
-                             gr.update(value=demo_inputs["lymph_pct"]), gr.update(value=demo_inputs["wbc"]),
-                             gr.update(value=demo_inputs["mcv"]), gr.update(value=demo_inputs["age"]),
-                             gr.update(value=demo_inputs["weight_kg"]), gr.update(value=demo_inputs["height_cm"]),
-                             gr.update(value=demo_inputs["sex"])),
-                    inputs=[], outputs=[]
-                   )
-    demo.launch(server_name="0.0.0.0", share=False)
 if __name__ == "__main__":
-    launch_app()

 import gradio as gr
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+HF_MODEL = "Muhammadidrees/bioLLM"
+# ---------------------------
+# Safe model/tokenizer loader
+# ---------------------------
+def load_model_and_tokenizer(model_name):
+    try:
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+    except ImportError:
+        # fallback if sacremoses is missing
+        tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
+    model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float32)
+    pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)
     return pipe, tokenizer
+pipe, tokenizer = load_model_and_tokenizer(HF_MODEL)
+# ---------------------------
+# Prompt template
+# ---------------------------
+def build_prompt(inputs):
+    biomarkers = "\n".join([f"- {k}: {v}" for k, v in inputs.items() if k not in ["Age", "Weight", "Height", "Sex"]])
+    demographics = f"Age: {inputs['Age']}, Sex: {inputs['Sex']}, Height: {inputs['Height']} cm, Weight: {inputs['Weight']} kg"
+    prompt = f"""
+You are a biomedical AI assistant.
+You will generate a medical-style report based on the given biomarkers and demographics.
+Follow this structure exactly:
+### Executive Summary
+(A concise summary of patient status)
+### System-Specific Analysis
+(Explain implications for cardiovascular, renal, hepatic, metabolic, and immune systems)
+### Personalized Action Plan
+(List lifestyle, dietary, and medical recommendations)
+### Further Recommendations
+(Additional tests, follow-ups, or referrals)
+### Biomarker Interpretation Table
+Generate a Markdown table with columns: Biomarker | Value | Status | Interpretation
+---
+Patient Demographics:
+{demographics}
 Biomarkers:
+{biomarkers}
 """
+    return prompt
+# ---------------------------
+# Gradio function
+# ---------------------------
+def generate_report(Age, Weight, Height, Sex,
+                    Albumin, Creatinine, Glucose, CRP, MCV,
+                    RDW, Hemoglobin, WBC, Platelets, Cholesterol):
+    inputs = {
+        "Age": Age, "Weight": Weight, "Height": Height, "Sex": Sex,
+        "Albumin": Albumin, "Creatinine": Creatinine, "Glucose": Glucose, "CRP": CRP,
+        "MCV": MCV, "RDW": RDW, "Hemoglobin": Hemoglobin, "WBC": WBC,
+        "Platelets": Platelets, "Cholesterol": Cholesterol
     }
+    prompt = build_prompt(inputs)
+    output = pipe(prompt, max_new_tokens=1500, temperature=0.6, do_sample=True)[0]["generated_text"]
+    return output
+# ---------------------------
+# Gradio UI
+# ---------------------------
+with gr.Blocks() as demo:
+    gr.Markdown("# 🧬 BioLLM: Biomarker AI Report Generator")
+    gr.Markdown("Provide biomarkers + demographics to generate an AI-based health report.")
+    with gr.Row():
+        with gr.Column():
+            Age = gr.Number(label="Age", value=45)
+            Sex = gr.Dropdown(["Male", "Female"], label="Sex", value="Male")
+            Height = gr.Number(label="Height (cm)", value=175)
+            Weight = gr.Number(label="Weight (kg)", value=75)
+        with gr.Column():
+            Albumin = gr.Textbox(label="Albumin (g/dL)", value="4.2")
+            Creatinine = gr.Textbox(label="Creatinine (mg/dL)", value="1.0")
+            Glucose = gr.Textbox(label="Glucose (mg/dL)", value="90")
+            CRP = gr.Textbox(label="CRP (mg/L)", value="2.0")
+            MCV = gr.Textbox(label="MCV (fL)", value="88")
+            RDW = gr.Textbox(label="RDW (%)", value="12.5")
+            Hemoglobin = gr.Textbox(label="Hemoglobin (g/dL)", value="14.0")
+            WBC = gr.Textbox(label="WBC (10^3/uL)", value="6.5")
+            Platelets = gr.Textbox(label="Platelets (10^3/uL)", value="250")
+            Cholesterol = gr.Textbox(label="Cholesterol (mg/dL)", value="180")
+    run_btn = gr.Button("🔍 Generate Report")
+    output_box = gr.Markdown(label="AI-Generated Report")
+    run_btn.click(
+        generate_report,
+        inputs=[Age, Weight, Height, Sex,
+                Albumin, Creatinine, Glucose, CRP, MCV,
+                RDW, Hemoglobin, WBC, Platelets, Cholesterol],
+        outputs=[output_box]
     )
+# ---------------------------
+# Run app
+# ---------------------------
 if __name__ == "__main__":
+    demo.launch()