Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,168 +1,249 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
|
|
|
|
|
| 3 |
|
| 4 |
-
# Load
|
| 5 |
MODEL_ID = "Muhammadidrees/my-gpt-oss"
|
| 6 |
|
| 7 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
| 8 |
-
model = AutoModelForCausalLM.from_pretrained(
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
-
# Function to build structured input and query the LLM
|
| 12 |
def analyze(
|
| 13 |
albumin, creatinine, glucose, crp, mcv, rdw, alp,
|
| 14 |
wbc, lymph, age, gender, height, weight
|
| 15 |
):
|
| 16 |
# Calculate BMI
|
| 17 |
try:
|
| 18 |
-
height_m = height / 100
|
| 19 |
bmi = round(weight / (height_m ** 2), 2)
|
| 20 |
-
except
|
| 21 |
bmi = "N/A"
|
| 22 |
|
| 23 |
-
#
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
|
|
|
|
|
|
| 29 |
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
- Protein & Liver Health (Albumin, ALP)
|
| 37 |
-
- Kidney Health (Creatinine)
|
| 38 |
-
- Metabolic Health (Glucose, CRP)
|
| 39 |
|
| 40 |
-
|
| 41 |
-
- Medical: [Recommended tests/consultations]
|
| 42 |
-
- Nutrition: [Dietary recommendations and supplements]
|
| 43 |
-
- Lifestyle: [Exercise, hydration, sleep guidance]
|
| 44 |
-
- Testing: [Follow-up labs needed]
|
| 45 |
|
| 46 |
-
|
| 47 |
-
|
|
|
|
| 48 |
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
|
|
|
| 53 |
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
|
| 60 |
-
|
|
|
|
| 61 |
|
| 62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 63 |
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
-
|
| 68 |
-
-
|
| 69 |
-
- Height: {height} cm
|
| 70 |
-
- Weight: {weight} kg
|
| 71 |
-
- BMI: {bmi}
|
| 72 |
|
| 73 |
-
|
| 74 |
-
- Albumin: {albumin} g/dL
|
| 75 |
-
- Creatinine: {creatinine} mg/dL
|
| 76 |
-
- Glucose: {glucose} mg/dL
|
| 77 |
-
- C-Reactive Protein (CRP): {crp} mg/L
|
| 78 |
-
- Mean Cell Volume (MCV): {mcv} fL
|
| 79 |
-
- Red Cell Distribution Width (RDW): {rdw} %
|
| 80 |
-
- Alkaline Phosphatase (ALP): {alp} U/L
|
| 81 |
-
- White Blood Cell Count (WBC): {wbc} K/uL
|
| 82 |
-
- Lymphocyte Percentage: {lymph} %
|
| 83 |
|
| 84 |
-
|
| 85 |
|
| 86 |
-
|
| 87 |
|
| 88 |
try:
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
-
#
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
|
|
|
|
|
|
| 111 |
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
| 116 |
return output_text
|
| 117 |
-
|
| 118 |
except Exception as e:
|
| 119 |
-
return f"Error
|
| 120 |
|
| 121 |
|
| 122 |
-
#
|
| 123 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
|
|
| 124 |
gr.Markdown("""
|
| 125 |
-
#
|
| 126 |
-
###
|
| 127 |
""")
|
| 128 |
-
|
| 129 |
with gr.Row():
|
| 130 |
-
with gr.Column():
|
| 131 |
gr.Markdown("### 👤 Demographics")
|
| 132 |
-
age = gr.Number(label="Age
|
| 133 |
-
gender = gr.Dropdown(
|
| 134 |
height = gr.Number(label="Height (cm)", value=175)
|
| 135 |
-
weight = gr.Number(label="Weight (kg)", value=
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
gr.
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
rdw = gr.Number(label="Red Cell Distribution Width (%)", value=13)
|
| 143 |
-
|
| 144 |
-
with gr.Row():
|
| 145 |
-
with gr.Column():
|
| 146 |
-
gr.Markdown("### 🫀 Metabolic Markers")
|
| 147 |
-
glucose = gr.Number(label="Glucose (mg/dL)", value=95)
|
| 148 |
-
crp = gr.Number(label="C-Reactive Protein (mg/L)", value=1.5)
|
| 149 |
|
| 150 |
-
with gr.Column():
|
| 151 |
-
gr.Markdown("### 🧬
|
| 152 |
albumin = gr.Number(label="Albumin (g/dL)", value=4.2)
|
| 153 |
creatinine = gr.Number(label="Creatinine (mg/dL)", value=1.0)
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
|
|
|
|
|
|
| 157 |
|
| 158 |
-
gr.Markdown("###
|
| 159 |
output = gr.Textbox(
|
| 160 |
-
label="
|
| 161 |
-
lines=
|
| 162 |
max_lines=50,
|
| 163 |
-
show_copy_button=True
|
|
|
|
| 164 |
)
|
| 165 |
-
|
| 166 |
analyze_btn.click(
|
| 167 |
fn=analyze,
|
| 168 |
inputs=[albumin, creatinine, glucose, crp, mcv, rdw, alp,
|
|
@@ -170,10 +251,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 170 |
outputs=output
|
| 171 |
)
|
| 172 |
|
| 173 |
-
gr.Markdown(""
|
| 174 |
-
---
|
| 175 |
-
**Note:** This tool provides educational insights based on biomarker analysis.
|
| 176 |
-
Always consult healthcare professionals for medical advice.
|
| 177 |
-
""")
|
| 178 |
|
| 179 |
-
demo.launch(
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 3 |
+
import torch
|
| 4 |
+
import re
|
| 5 |
|
| 6 |
+
# Load model
|
| 7 |
MODEL_ID = "Muhammadidrees/my-gpt-oss"
|
| 8 |
|
| 9 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
| 10 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 11 |
+
MODEL_ID,
|
| 12 |
+
device_map="auto",
|
| 13 |
+
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
+
if tokenizer.pad_token is None:
|
| 17 |
+
tokenizer.pad_token = tokenizer.eos_token
|
| 18 |
+
model.config.pad_token_id = tokenizer.eos_token_id
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def clean_output(text):
|
| 22 |
+
"""Remove reasoning artifacts and extract only the actual report"""
|
| 23 |
+
|
| 24 |
+
# Remove common reasoning patterns
|
| 25 |
+
patterns_to_remove = [
|
| 26 |
+
r"Let's produce.*?(?=\*\*|$)",
|
| 27 |
+
r"We need to.*?(?=\*\*|$)",
|
| 28 |
+
r"We must.*?(?=\*\*|$)",
|
| 29 |
+
r"assistantfinal\*\*",
|
| 30 |
+
r"Note that.*?(?=\*\*|$)",
|
| 31 |
+
r"Use concise statements.*?(?=\*\*|$)",
|
| 32 |
+
r"Provide bullet points.*?(?=\*\*|$)",
|
| 33 |
+
r"✅ Medical Insights(?!\*\*)",
|
| 34 |
+
]
|
| 35 |
+
|
| 36 |
+
for pattern in patterns_to_remove:
|
| 37 |
+
text = re.sub(pattern, "", text, flags=re.DOTALL | re.IGNORECASE)
|
| 38 |
+
|
| 39 |
+
# Find where actual report starts
|
| 40 |
+
start_markers = [
|
| 41 |
+
"**1. Executive Summary**",
|
| 42 |
+
"**Executive Summary**",
|
| 43 |
+
"1. Executive Summary",
|
| 44 |
+
"## 1. Executive Summary"
|
| 45 |
+
]
|
| 46 |
+
|
| 47 |
+
for marker in start_markers:
|
| 48 |
+
if marker in text:
|
| 49 |
+
idx = text.find(marker)
|
| 50 |
+
text = text[idx:]
|
| 51 |
+
break
|
| 52 |
+
|
| 53 |
+
# Clean up extra whitespace and duplicate asterisks
|
| 54 |
+
text = re.sub(r'\*{3,}', '**', text)
|
| 55 |
+
text = re.sub(r'\n{3,}', '\n\n', text)
|
| 56 |
+
text = text.strip()
|
| 57 |
+
|
| 58 |
+
return text
|
| 59 |
+
|
| 60 |
|
|
|
|
| 61 |
def analyze(
|
| 62 |
albumin, creatinine, glucose, crp, mcv, rdw, alp,
|
| 63 |
wbc, lymph, age, gender, height, weight
|
| 64 |
):
|
| 65 |
# Calculate BMI
|
| 66 |
try:
|
| 67 |
+
height_m = height / 100
|
| 68 |
bmi = round(weight / (height_m ** 2), 2)
|
| 69 |
+
except:
|
| 70 |
bmi = "N/A"
|
| 71 |
|
| 72 |
+
# Determine status for each biomarker
|
| 73 |
+
def get_status(value, normal_range):
|
| 74 |
+
low, high = normal_range
|
| 75 |
+
if value < low:
|
| 76 |
+
return "Low"
|
| 77 |
+
elif value > high:
|
| 78 |
+
return "High"
|
| 79 |
+
return "Normal"
|
| 80 |
|
| 81 |
+
albumin_status = get_status(albumin, (3.5, 5.5))
|
| 82 |
+
creatinine_status = get_status(creatinine, (0.7, 1.3))
|
| 83 |
+
glucose_status = get_status(glucose, (70, 100))
|
| 84 |
+
crp_status = get_status(crp, (0, 3))
|
| 85 |
+
mcv_status = get_status(mcv, (80, 100))
|
| 86 |
+
rdw_status = get_status(rdw, (11.5, 14.5))
|
| 87 |
+
alp_status = get_status(alp, (44, 147))
|
| 88 |
+
wbc_status = get_status(wbc, (4, 11))
|
| 89 |
+
lymph_status = get_status(lymph, (20, 40))
|
| 90 |
|
| 91 |
+
# Ultra-direct prompt with explicit instruction to skip reasoning
|
| 92 |
+
prompt = f"""You are a medical AI. Generate ONLY the final report. Do NOT include any reasoning, planning, or meta-commentary.
|
|
|
|
|
|
|
|
|
|
| 93 |
|
| 94 |
+
OUTPUT ONLY THIS STRUCTURE:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
|
| 96 |
+
**1. Executive Summary**
|
| 97 |
+
- **Top Priority Issues**: List 2-3 main concerns based on abnormal values
|
| 98 |
+
- **Key Strengths**: List 2-3 positive findings
|
| 99 |
|
| 100 |
+
**2. System-Specific Analysis**
|
| 101 |
+
- **Blood Health** (MCV {mcv} fL, RDW {rdw}%, Lymph {lymph}%, WBC {wbc} K/uL): Brief clinical interpretation
|
| 102 |
+
- **Protein & Liver Health** (Albumin {albumin} g/dL, ALP {alp} U/L): Brief clinical interpretation
|
| 103 |
+
- **Kidney Health** (Creatinine {creatinine} mg/dL): Brief clinical interpretation
|
| 104 |
+
- **Metabolic Health** (Glucose {glucose} mg/dL, CRP {crp} mg/L): Brief clinical interpretation
|
| 105 |
|
| 106 |
+
**3. Personalized Action Plan**
|
| 107 |
+
- **Medical**: Specific tests or consultations needed
|
| 108 |
+
- **Nutrition**: Dietary recommendations and supplements
|
| 109 |
+
- **Lifestyle**: Exercise, hydration, sleep recommendations
|
| 110 |
+
- **Testing**: Follow-up labs (ferritin, Vitamin D, GGT, etc.)
|
| 111 |
|
| 112 |
+
**4. Interaction Alerts**
|
| 113 |
+
Explain how abnormal biomarkers may interact or compound risks.
|
| 114 |
|
| 115 |
+
**5. Tabular Mapping**
|
| 116 |
+
| Biomarker | Value | Status | Clinical Insight | Recommendation |
|
| 117 |
+
|-----------|-------|--------|------------------|----------------|
|
| 118 |
+
| Albumin | {albumin} g/dL | {albumin_status} | [insight] | [action] |
|
| 119 |
+
| Creatinine | {creatinine} mg/dL | {creatinine_status} | [insight] | [action] |
|
| 120 |
+
| Glucose | {glucose} mg/dL | {glucose_status} | [insight] | [action] |
|
| 121 |
+
| CRP | {crp} mg/L | {crp_status} | [insight] | [action] |
|
| 122 |
+
| MCV | {mcv} fL | {mcv_status} | [insight] | [action] |
|
| 123 |
+
| RDW | {rdw}% | {rdw_status} | [insight] | [action] |
|
| 124 |
+
| ALP | {alp} U/L | {alp_status} | [insight] | [action] |
|
| 125 |
+
| WBC | {wbc} K/uL | {wbc_status} | [insight] | [action] |
|
| 126 |
+
| Lymphocytes | {lymph}% | {lymph_status} | [insight] | [action] |
|
| 127 |
|
| 128 |
+
**6. Enhanced AI Insights & Longitudinal Risk**
|
| 129 |
+
- **Subclinical Nutrients**: Predicted Iron, B12, Folate, Copper status
|
| 130 |
+
- **ALP Source**: Likely bone vs liver origin
|
| 131 |
+
- **Immune Trends**: WBC and lymphocyte assessment
|
| 132 |
+
- **Long-term Risk**: Predictive health considerations
|
|
|
|
|
|
|
|
|
|
| 133 |
|
| 134 |
+
Patient: {gender}, Age {age}, BMI {bmi}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
|
| 136 |
+
START REPORT NOW (no reasoning or commentary):
|
| 137 |
|
| 138 |
+
**1. Executive Summary**"""
|
| 139 |
|
| 140 |
try:
|
| 141 |
+
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048)
|
| 142 |
+
inputs = {k: v.to(model.device) for k, v in inputs.items()}
|
| 143 |
+
|
| 144 |
+
with torch.no_grad():
|
| 145 |
+
outputs = model.generate(
|
| 146 |
+
**inputs,
|
| 147 |
+
max_new_tokens=2800,
|
| 148 |
+
min_new_tokens=1000,
|
| 149 |
+
temperature=0.75,
|
| 150 |
+
top_p=0.92,
|
| 151 |
+
top_k=40,
|
| 152 |
+
repetition_penalty=1.2,
|
| 153 |
+
do_sample=True,
|
| 154 |
+
early_stopping=False,
|
| 155 |
+
pad_token_id=tokenizer.pad_token_id,
|
| 156 |
+
eos_token_id=tokenizer.eos_token_id,
|
| 157 |
+
no_repeat_ngram_size=4
|
| 158 |
+
)
|
| 159 |
+
|
| 160 |
+
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 161 |
+
|
| 162 |
+
# Extract the actual report part
|
| 163 |
+
if "START REPORT NOW" in generated_text:
|
| 164 |
+
output_text = generated_text.split("START REPORT NOW")[-1]
|
| 165 |
+
elif "**1. Executive Summary**" in generated_text:
|
| 166 |
+
parts = generated_text.split("**1. Executive Summary**")
|
| 167 |
+
# Take the LAST occurrence (the actual output, not from prompt)
|
| 168 |
+
if len(parts) > 1:
|
| 169 |
+
output_text = "**1. Executive Summary**" + parts[-1]
|
| 170 |
+
else:
|
| 171 |
+
output_text = generated_text
|
| 172 |
+
else:
|
| 173 |
+
output_text = generated_text
|
| 174 |
+
|
| 175 |
+
# Clean up the output
|
| 176 |
+
output_text = clean_output(output_text)
|
| 177 |
|
| 178 |
+
# Validation
|
| 179 |
+
required_sections = [
|
| 180 |
+
"Executive Summary",
|
| 181 |
+
"System-Specific Analysis",
|
| 182 |
+
"Personalized Action Plan",
|
| 183 |
+
"Interaction Alerts",
|
| 184 |
+
"Tabular Mapping",
|
| 185 |
+
"Enhanced AI Insights"
|
| 186 |
+
]
|
| 187 |
|
| 188 |
+
missing_sections = [s for s in required_sections if s not in output_text]
|
| 189 |
+
|
| 190 |
+
if len(output_text) < 800 or len(missing_sections) >= 3:
|
| 191 |
+
warning = "\n\n⚠️ **Model Performance Issue Detected**\n\n"
|
| 192 |
+
warning += f"Generated text length: {len(output_text)} characters\n"
|
| 193 |
+
warning += f"Missing sections: {', '.join(missing_sections) if missing_sections else 'None'}\n\n"
|
| 194 |
+
warning += "**This model may not be suitable for this task. Consider:**\n"
|
| 195 |
+
warning += "1. Using Llama-3-8B-Instruct or Mistral-7B-Instruct\n"
|
| 196 |
+
warning += "2. Fine-tuning on medical report generation\n"
|
| 197 |
+
warning += "3. Using API-based models (GPT-4, Claude, etc.)\n"
|
| 198 |
+
output_text += warning
|
| 199 |
|
| 200 |
return output_text
|
| 201 |
+
|
| 202 |
except Exception as e:
|
| 203 |
+
return f"❌ **Error**: {str(e)}\n\nPlease verify all inputs and model availability."
|
| 204 |
|
| 205 |
|
| 206 |
+
# Gradio Interface
|
| 207 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 208 |
+
|
| 209 |
gr.Markdown("""
|
| 210 |
+
# 🏥 AI Medical Biomarker Analysis
|
| 211 |
+
### Comprehensive wellness insights from lab values
|
| 212 |
""")
|
| 213 |
+
|
| 214 |
with gr.Row():
|
| 215 |
+
with gr.Column(scale=1):
|
| 216 |
gr.Markdown("### 👤 Demographics")
|
| 217 |
+
age = gr.Number(label="Age", value=35)
|
| 218 |
+
gender = gr.Dropdown(["Male", "Female"], label="Gender", value="Male")
|
| 219 |
height = gr.Number(label="Height (cm)", value=175)
|
| 220 |
+
weight = gr.Number(label="Weight (kg)", value=75)
|
| 221 |
+
|
| 222 |
+
gr.Markdown("### 🩸 Blood Panel")
|
| 223 |
+
wbc = gr.Number(label="WBC (K/uL)", value=7.0)
|
| 224 |
+
lymph = gr.Number(label="Lymphocytes (%)", value=30)
|
| 225 |
+
mcv = gr.Number(label="MCV (fL)", value=90)
|
| 226 |
+
rdw = gr.Number(label="RDW (%)", value=13)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 227 |
|
| 228 |
+
with gr.Column(scale=1):
|
| 229 |
+
gr.Markdown("### 🧬 Chemistry Panel")
|
| 230 |
albumin = gr.Number(label="Albumin (g/dL)", value=4.2)
|
| 231 |
creatinine = gr.Number(label="Creatinine (mg/dL)", value=1.0)
|
| 232 |
+
glucose = gr.Number(label="Glucose (mg/dL)", value=95)
|
| 233 |
+
crp = gr.Number(label="CRP (mg/L)", value=1.5)
|
| 234 |
+
alp = gr.Number(label="ALP (U/L)", value=75)
|
| 235 |
+
|
| 236 |
+
analyze_btn = gr.Button("🔬 Generate Report", variant="primary", size="lg")
|
| 237 |
|
| 238 |
+
gr.Markdown("### 📊 Analysis Output")
|
| 239 |
output = gr.Textbox(
|
| 240 |
+
label="Medical Report",
|
| 241 |
+
lines=30,
|
| 242 |
max_lines=50,
|
| 243 |
+
show_copy_button=True,
|
| 244 |
+
placeholder="Results will appear here..."
|
| 245 |
)
|
| 246 |
+
|
| 247 |
analyze_btn.click(
|
| 248 |
fn=analyze,
|
| 249 |
inputs=[albumin, creatinine, glucose, crp, mcv, rdw, alp,
|
|
|
|
| 251 |
outputs=output
|
| 252 |
)
|
| 253 |
|
| 254 |
+
gr.Markdown("*Disclaimer: For educational purposes only. Consult healthcare professionals.*")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 255 |
|
| 256 |
+
demo.launch()
|