Muhammadidrees commited on
Commit
f19b8f7
·
verified ·
1 Parent(s): 239b274

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -362
app.py CHANGED
@@ -1,386 +1,124 @@
1
- """
2
- app.py
3
- Gradio app that loads Muhammadidrees/bioLLM (Hugging Face) and
4
- performs prompt-feeding based biomarker report generation.
5
-
6
- Inputs:
7
- - Nine Levine biomarkers (numeric)
8
- - Age, weight (kg), height (cm), sex
9
-
10
- Outputs:
11
- - Text sections: Executive summary, System analysis, Personalized action plan, Further recommendations
12
- - Table: biomarker | value | unit | status | short interpretation
13
-
14
- Notes:
15
- - Not a clinical decision tool. Validate outputs externally.
16
- - If CUDA is available, the script will use GPU (device=0).
17
- """
18
-
19
- import os
20
- import math
21
- import pandas as pd
22
- import torch
23
- from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, set_seed
24
  import gradio as gr
25
- from typing import Dict, Tuple
26
-
27
- # ========== Config ==========
28
- HF_MODEL = "Muhammadidrees/bioLLM" # user-specified
29
- SEED = 42
30
- set_seed(SEED)
31
-
32
- # Generation hyperparameters (tune as needed)
33
- GEN_KWARGS = dict(
34
- max_new_tokens=1500,
35
- do_sample=True,
36
- temperature=0.7,
37
- top_p=0.92,
38
- num_return_sequences=1,
39
- eos_token_id=None,
40
- )
41
-
42
- # ========== Utility functions ==========
43
 
44
- def bmi_from_weight_height(kg: float, cm: float) -> float:
45
- if kg <= 0 or cm <= 0:
46
- return None
47
- m = cm / 100.0
48
- return kg / (m * m)
49
 
50
- def device_selection():
51
- if torch.cuda.is_available():
52
- return 0 # pipeline expects device index (0)
53
- return -1 # CPU
 
 
 
 
 
54
 
55
- def load_model_and_tokenizer(model_name: str):
56
- """
57
- Load tokenizer and model for causal LM. Adjust device map as needed.
58
- """
59
- print(f"Loading tokenizer and model: {model_name} ...")
60
- tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
61
- # Avoid loading to CPU twice; huggingface will map to available device
62
- model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16 if torch.cuda.is_available() else None, low_cpu_mem_usage=True)
63
- # Create pipeline
64
- pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device_selection())
65
  return pipe, tokenizer
66
 
67
- # ========== Domain knowledge: Levine 9 biomarkers ==========
68
- # Units below are conventional / commonly used — allow user to input any units but document expects these.
69
- LEVINED_BIOMARKERS = [
70
- ("Fasting Glucose", "mg/dL"),
71
- ("C-reactive protein (CRP)", "mg/L"),
72
- ("Albumin", "g/dL"),
73
- ("Alkaline phosphatase", "U/L"),
74
- ("Creatinine", "mg/dL"),
75
- ("Red Cell Distribution Width (RDW)", "%"),
76
- ("Lymphocyte %", "%"),
77
- ("White Blood Cell count (WBC)", "10^3/µL"),
78
- ("Mean Corpuscular Volume (MCV)", "fL"),
79
- ]
80
-
81
- # Example reference ranges (simple defaults). You can adjust to local lab ranges.
82
- # These ranges are coarse and illustrative. Use local clinical ranges in production.
83
- REFERENCE_RANGES = {
84
- "Fasting Glucose": (70, 99), # mg/dL normal fasting
85
- "C-reactive protein (CRP)": (0.0, 3.0), # mg/L low-normal; >10 often indicates acute inflammation
86
- "Albumin": (3.5, 5.0), # g/dL
87
- "Alkaline phosphatase": (44, 147), # U/L
88
- "Creatinine": (0.6, 1.3), # mg/dL (adult average; sex/age dependent)
89
- "Red Cell Distribution Width (RDW)": (11.5, 14.5), # %
90
- "Lymphocyte %": (20.0, 50.0), # %
91
- "White Blood Cell count (WBC)": (4.0, 11.0), # 10^3/µL
92
- "Mean Corpuscular Volume (MCV)": (80, 100), # fL
93
- }
94
-
95
- def value_to_status(name: str, val: float) -> str:
96
- """
97
- Convert a numeric biomarker value to a simple 'Low/Normal/High' status.
98
- Uses REFERENCE_RANGES map; if not found returns 'Unknown'.
99
- """
100
- if val is None:
101
- return "Missing"
102
- rng = REFERENCE_RANGES.get(name)
103
- if not rng:
104
- return "Unknown"
105
- low, high = rng
106
- try:
107
- v = float(val)
108
- except:
109
- return "Invalid"
110
- if v < low:
111
- return "Low"
112
- elif v > high:
113
- return "High"
114
- else:
115
- return "Normal"
116
 
117
- def short_interpretation(name: str, val: float, status: str) -> str:
118
- """
119
- Very short, literature-style interpretation used for the table.
120
- These are template-like; you can expand or replace with another knowledge base.
121
- """
122
- if status == "Missing":
123
- return "No value provided."
124
- if status == "Invalid":
125
- return "Invalid input."
126
- if status == "Unknown":
127
- return "No reference range available."
128
- if name == "Albumin":
129
- if status == "Low":
130
- return "Hypoalbuminemia — malnutrition, liver disease, or inflammation possible."
131
- else:
132
- return "Within expected range."
133
- if name.startswith("C-reactive"):
134
- if status == "High":
135
- return "Elevated — suggests systemic inflammation or acute infection."
136
- else:
137
- return "Within expected range."
138
- if name == "Fasting Glucose":
139
- if status == "High":
140
- return "Hyperglycemia — impaired glucose control; consider further metabolic workup."
141
- else:
142
- return "Within expected fasting range."
143
- if name == "Creatinine":
144
- if status == "High":
145
- return "Possible renal dysfunction or reduced GFR (age/sex dependent)."
146
- else:
147
- return "Within expected range."
148
- if name == "Alkaline phosphatase":
149
- if status == "High":
150
- return "May suggest cholestasis or bone turnover; correlate clinically."
151
- else:
152
- return "Within expected range."
153
- if name == "Red Cell Distribution Width (RDW)":
154
- if status == "High":
155
- return "Anisocytosis — possible nutritional deficiency or marrow response."
156
- else:
157
- return "Within expected range."
158
- if name == "Lymphocyte %":
159
- if status == "Low":
160
- return "Relative lymphopenia — may reflect infection or immunosuppression."
161
- else:
162
- return "Within expected range."
163
- if name == "White Blood Cell count (WBC)":
164
- if status == "High":
165
- return "Leukocytosis — often infection or inflammation."
166
- elif status == "Low":
167
- return "Leukopenia — consider bone marrow suppression or viral infection."
168
- else:
169
- return "Within expected range."
170
- if name == "Mean Corpuscular Volume (MCV)":
171
- if status == "High":
172
- return "Macrocytosis — check B12/folate, liver disease, alcohol use."
173
- elif status == "Low":
174
- return "Microcytosis — consider iron deficiency."
175
- else:
176
- return "Within expected range."
177
- # fallback
178
- return "Interpretation not available."
179
 
180
- # ========== Prompt construction ==========
181
- PROMPT_TEMPLATE = """
182
- You are a biomedical-language assistant. You will be given patient demographics and a list of biomarkers with values and status (Low/Normal/High).
183
- Produce an output that contains the following labeled sections:
 
 
184
 
185
- 1) Executive Summary:
186
- - 2-4 sentence high-level synthesis of the patient's biomarker pattern and clinical implication.
 
 
187
 
188
- 2) System-specific analysis:
189
- - Bullet-style analysis per organ/system (e.g., metabolic, hepatic, renal, hematologic, immune), referencing biomarkers when relevant.
190
 
191
- 3) Personalized action plan:
192
- - Practical, prioritized next steps suitable for a clinician or informed researcher (diagnostic tests, immediate actions, lifestyle suggestions).
193
- - Keep recommendations conservative and evidence-minded. Mention when specialty referral is reasonable.
194
 
195
- 4) Further recommendations:
196
- - Additional tests, monitoring frequency, non-pharmacological measures, and resources for patient education.
197
 
198
- 5) Tabular biomarker interpretation:
199
- - Provide a Markdown table with columns: Biomarker | Value | Unit | Status | Short interpretation.
200
- - Include only biomarkers provided.
201
 
202
- Important:
203
- - Keep your language cautious and non-prescriptive (e.g., "consider", "suggest", "may indicate").
204
- - Add a single-line concluding clinical disclaimer: "This report is for research/educational use only and not a substitute for clinical judgment."
205
 
206
- Patient demographics:
207
- - Age: {age}
208
- - Sex: {sex}
209
- - Weight: {weight_kg} kg
210
- - Height: {height_cm} cm
211
- - BMI: {bmi:.1f}
212
 
213
  Biomarkers:
214
- {biomarker_lines}
215
-
216
- Now produce the requested sections.
217
  """
218
-
219
- def build_biomarker_lines(biomarker_inputs: Dict[str, Tuple[float,str]]) -> str:
220
- """
221
- biomarker_inputs: dict mapping biomarker name -> (value, unit)
222
- returns a text block with lines: - Name: value unit (Status)
223
- """
224
- lines = []
225
- for name, (val, unit) in biomarker_inputs.items():
226
- status = value_to_status(name, val)
227
- val_str = "" if val is None else str(val)
228
- lines.append(f"- {name}: {val_str} {unit} ({status})")
229
- return "\n".join(lines)
230
-
231
- # ========== Load pipeline ==========
232
- print("Initializing model pipeline...\n(If this stalls, make sure the model is available and you have network access.)")
233
- pipe, tokenizer = load_model_and_tokenizer(HF_MODEL)
234
- print("Model loaded.")
235
-
236
- # ========== Inference function ==========
237
- def generate_report(
238
- fasting_glucose,
239
- crp,
240
- albumin,
241
- alk_phos,
242
- creatinine,
243
- rdw,
244
- lymph_pct,
245
- wbc,
246
- mcv,
247
- age,
248
- weight_kg,
249
- height_cm,
250
- sex
251
- ):
252
- # Build biomarker dict (names must match LEVINE list keys)
253
- biom_inputs = {
254
- "Fasting Glucose": (fasting_glucose, "mg/dL"),
255
- "C-reactive protein (CRP)": (crp, "mg/L"),
256
- "Albumin": (albumin, "g/dL"),
257
- "Alkaline phosphatase": (alk_phos, "U/L"),
258
- "Creatinine": (creatinine, "mg/dL"),
259
- "Red Cell Distribution Width (RDW)": (rdw, "%"),
260
- "Lymphocyte %": (lymph_pct, "%"),
261
- "White Blood Cell count (WBC)": (wbc, "10^3/µL"),
262
- "Mean Corpuscular Volume (MCV)": (mcv, "fL"),
263
  }
264
 
265
- # Compute BMI
266
- bmi = bmi_from_weight_height(weight_kg, height_cm) or 0.0
267
-
268
- # Construct biomarker lines for prompt
269
- biomarker_lines = build_biomarker_lines(biom_inputs)
270
- prompt = PROMPT_TEMPLATE.format(
271
- age=int(age) if age is not None else "Unknown",
272
- sex=sex,
273
- weight_kg=weight_kg,
274
- height_cm=height_cm,
275
- bmi=bmi,
276
- biomarker_lines=biomarker_lines
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
  )
278
 
279
- # Debug: you may print prompt during development
280
- # print("Prompt sent to model:\n", prompt)
281
-
282
- # Call model
283
- gen = pipe(prompt, **GEN_KWARGS)[0]["generated_text"]
284
-
285
- # Post-process: separate sections if model includes them; otherwise present whole text.
286
- # We'll attempt to split by the numeric section headings used in prompt (1), 2), etc.
287
- sections = {"full_text": gen}
288
- for label in ["Executive Summary:", "System-specific analysis:", "Personalized action plan:", "Further recommendations:", "Tabular biomarker interpretation:"]:
289
- if label in gen:
290
- # naive split: find start
291
- start = gen.find(label)
292
- # find next label start index
293
- sections[label] = gen[start: gen.find("\n\n", start) + 2] # short snippet fallback
294
-
295
- # Build table as DataFrame for nicer display in Gradio
296
- rows = []
297
- for name, (val, unit) in biom_inputs.items():
298
- status = value_to_status(name, val)
299
- interp = short_interpretation(name, val, status)
300
- rows.append({
301
- "Biomarker": name,
302
- "Value": "" if val is None else val,
303
- "Unit": unit,
304
- "Status": status,
305
- "Short interpretation": interp
306
- })
307
- df = pd.DataFrame(rows)
308
-
309
- # Return model raw text and dataframe
310
- return gen, df
311
-
312
- # ========== Gradio UI ==========
313
- def launch_app():
314
- demo_inputs = {
315
- "fasting_glucose": 92,
316
- "crp": 5.2,
317
- "albumin": 3.2,
318
- "alk_phos": 85,
319
- "creatinine": 1.0,
320
- "rdw": 13.1,
321
- "lymph_pct": 30,
322
- "wbc": 7.2,
323
- "mcv": 90,
324
- "age": 58,
325
- "weight_kg": 78,
326
- "height_cm": 172,
327
- "sex": "Male",
328
- }
329
-
330
- with gr.Blocks(title="bioLLM — Biomarker Report (Levine 9-panel)") as demo:
331
- # gr.Markdown("## bioLLM biomarker report generator\n**Not for clinical use.** ` and prompt-feeding to produce literature-style explanations.")
332
- with gr.Row():
333
- with gr.Column(scale=1):
334
- fasting_glucose = gr.Number(label="Fasting Glucose (mg/dL)", value=demo_inputs["fasting_glucose"])
335
- crp = gr.Number(label="C-reactive protein (CRP) (mg/L)", value=demo_inputs["crp"])
336
- albumin = gr.Number(label="Albumin (g/dL)", value=demo_inputs["albumin"])
337
- alk_phos = gr.Number(label="Alkaline phosphatase (U/L)", value=demo_inputs["alk_phos"])
338
- creatinine = gr.Number(label="Creatinine (mg/dL)", value=demo_inputs["creatinine"])
339
- with gr.Column(scale=1):
340
- rdw = gr.Number(label="Red Cell Distribution Width (RDW) (%)", value=demo_inputs["rdw"])
341
- lymph_pct = gr.Number(label="Lymphocyte %", value=demo_inputs["lymph_pct"])
342
- wbc = gr.Number(label="White Blood Cell count (10^3/µL)", value=demo_inputs["wbc"])
343
- mcv = gr.Number(label="Mean Corpuscular Volume (MCV) (fL)", value=demo_inputs["mcv"])
344
- sex = gr.Dropdown(choices=["Male", "Female", "Other"], value=demo_inputs["sex"], label="Sex")
345
- with gr.Row():
346
- age = gr.Number(label="Age (years)", value=demo_inputs["age"])
347
- weight_kg = gr.Number(label="Weight (kg)", value=demo_inputs["weight_kg"])
348
- height_cm = gr.Number(label="Height (cm)", value=demo_inputs["height_cm"])
349
- with gr.Row():
350
- submit = gr.Button("Generate report")
351
- clear = gr.Button("Reset")
352
- with gr.Row():
353
- output_text = gr.Markdown(label="Model report (text)")
354
- with gr.Row():
355
- output_table = gr.Dataframe(headers=["Biomarker", "Value", "Unit", "Status", "Short interpretation"], label="Tabular interpretation (derived)")
356
-
357
- def on_submit(
358
- fasting_glucose, crp, albumin, alk_phos, creatinine,
359
- rdw, lymph_pct, wbc, mcv, age, weight_kg, height_cm, sex
360
- ):
361
- text, df = generate_report(
362
- fasting_glucose, crp, albumin, alk_phos, creatinine,
363
- rdw, lymph_pct, wbc, mcv, age, weight_kg, height_cm, sex
364
- )
365
- # Return text as markdown and dataframe as table
366
- return text, df
367
-
368
- submit.click(
369
- on_submit,
370
- inputs=[fasting_glucose, crp, albumin, alk_phos, creatinine, rdw, lymph_pct, wbc, mcv, age, weight_kg, height_cm, sex],
371
- outputs=[output_text, output_table]
372
- )
373
- clear.click(lambda: (gr.update(value=demo_inputs["fasting_glucose"]), gr.update(value=demo_inputs["crp"]),
374
- gr.update(value=demo_inputs["albumin"]), gr.update(value=demo_inputs["alk_phos"]),
375
- gr.update(value=demo_inputs["creatinine"]), gr.update(value=demo_inputs["rdw"]),
376
- gr.update(value=demo_inputs["lymph_pct"]), gr.update(value=demo_inputs["wbc"]),
377
- gr.update(value=demo_inputs["mcv"]), gr.update(value=demo_inputs["age"]),
378
- gr.update(value=demo_inputs["weight_kg"]), gr.update(value=demo_inputs["height_cm"]),
379
- gr.update(value=demo_inputs["sex"])),
380
- inputs=[], outputs=[]
381
- )
382
-
383
- demo.launch(server_name="0.0.0.0", share=False)
384
 
 
 
 
385
  if __name__ == "__main__":
386
- launch_app()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
+ HF_MODEL = "Muhammadidrees/bioLLM"
 
 
 
 
6
 
7
+ # ---------------------------
8
+ # Safe model/tokenizer loader
9
+ # ---------------------------
10
+ def load_model_and_tokenizer(model_name):
11
+ try:
12
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
13
+ except ImportError:
14
+ # fallback if sacremoses is missing
15
+ tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
16
 
17
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float32)
18
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)
 
 
 
 
 
 
 
 
19
  return pipe, tokenizer
20
 
21
+ pipe, tokenizer = load_model_and_tokenizer(HF_MODEL)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
+ # ---------------------------
25
+ # Prompt template
26
+ # ---------------------------
27
+ def build_prompt(inputs):
28
+ biomarkers = "\n".join([f"- {k}: {v}" for k, v in inputs.items() if k not in ["Age", "Weight", "Height", "Sex"]])
29
+ demographics = f"Age: {inputs['Age']}, Sex: {inputs['Sex']}, Height: {inputs['Height']} cm, Weight: {inputs['Weight']} kg"
30
 
31
+ prompt = f"""
32
+ You are a biomedical AI assistant.
33
+ You will generate a medical-style report based on the given biomarkers and demographics.
34
+ Follow this structure exactly:
35
 
36
+ ### Executive Summary
37
+ (A concise summary of patient status)
38
 
39
+ ### System-Specific Analysis
40
+ (Explain implications for cardiovascular, renal, hepatic, metabolic, and immune systems)
 
41
 
42
+ ### Personalized Action Plan
43
+ (List lifestyle, dietary, and medical recommendations)
44
 
45
+ ### Further Recommendations
46
+ (Additional tests, follow-ups, or referrals)
 
47
 
48
+ ### Biomarker Interpretation Table
49
+ Generate a Markdown table with columns: Biomarker | Value | Status | Interpretation
 
50
 
51
+ ---
52
+ Patient Demographics:
53
+ {demographics}
 
 
 
54
 
55
  Biomarkers:
56
+ {biomarkers}
 
 
57
  """
58
+ return prompt
59
+
60
+
61
+ # ---------------------------
62
+ # Gradio function
63
+ # ---------------------------
64
+ def generate_report(Age, Weight, Height, Sex,
65
+ Albumin, Creatinine, Glucose, CRP, MCV,
66
+ RDW, Hemoglobin, WBC, Platelets, Cholesterol):
67
+
68
+ inputs = {
69
+ "Age": Age, "Weight": Weight, "Height": Height, "Sex": Sex,
70
+ "Albumin": Albumin, "Creatinine": Creatinine, "Glucose": Glucose, "CRP": CRP,
71
+ "MCV": MCV, "RDW": RDW, "Hemoglobin": Hemoglobin, "WBC": WBC,
72
+ "Platelets": Platelets, "Cholesterol": Cholesterol
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  }
74
 
75
+ prompt = build_prompt(inputs)
76
+
77
+ output = pipe(prompt, max_new_tokens=1500, temperature=0.6, do_sample=True)[0]["generated_text"]
78
+
79
+ return output
80
+
81
+
82
+ # ---------------------------
83
+ # Gradio UI
84
+ # ---------------------------
85
+ with gr.Blocks() as demo:
86
+ gr.Markdown("# 🧬 BioLLM: Biomarker AI Report Generator")
87
+ gr.Markdown("Provide biomarkers + demographics to generate an AI-based health report.")
88
+
89
+ with gr.Row():
90
+ with gr.Column():
91
+ Age = gr.Number(label="Age", value=45)
92
+ Sex = gr.Dropdown(["Male", "Female"], label="Sex", value="Male")
93
+ Height = gr.Number(label="Height (cm)", value=175)
94
+ Weight = gr.Number(label="Weight (kg)", value=75)
95
+
96
+ with gr.Column():
97
+ Albumin = gr.Textbox(label="Albumin (g/dL)", value="4.2")
98
+ Creatinine = gr.Textbox(label="Creatinine (mg/dL)", value="1.0")
99
+ Glucose = gr.Textbox(label="Glucose (mg/dL)", value="90")
100
+ CRP = gr.Textbox(label="CRP (mg/L)", value="2.0")
101
+ MCV = gr.Textbox(label="MCV (fL)", value="88")
102
+ RDW = gr.Textbox(label="RDW (%)", value="12.5")
103
+ Hemoglobin = gr.Textbox(label="Hemoglobin (g/dL)", value="14.0")
104
+ WBC = gr.Textbox(label="WBC (10^3/uL)", value="6.5")
105
+ Platelets = gr.Textbox(label="Platelets (10^3/uL)", value="250")
106
+ Cholesterol = gr.Textbox(label="Cholesterol (mg/dL)", value="180")
107
+
108
+ run_btn = gr.Button("🔍 Generate Report")
109
+ output_box = gr.Markdown(label="AI-Generated Report")
110
+
111
+ run_btn.click(
112
+ generate_report,
113
+ inputs=[Age, Weight, Height, Sex,
114
+ Albumin, Creatinine, Glucose, CRP, MCV,
115
+ RDW, Hemoglobin, WBC, Platelets, Cholesterol],
116
+ outputs=[output_box]
117
  )
118
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
+ # ---------------------------
121
+ # Run app
122
+ # ---------------------------
123
  if __name__ == "__main__":
124
+ demo.launch()