Medico / app.py
Safalya's picture
Upload 2 files
3f122dd verified
import streamlit as st
from transformers import BertForSequenceClassification, BertTokenizer
from huggingface_hub import hf_hub_download
import torch
import PyPDF2
import pickle
import re
from nltk.corpus import stopwords
import nltk
from io import BytesIO
# Download NLTK stopwords
nltk.download('stopwords')
# Hugging Face Model Repository
HF_MODEL_REPO = "DrSyedFaizan/medReport"
@st.cache_resource
def load_model():
"""Load model, tokenizer, and label encoder from Hugging Face."""
try:
st.info("🔄 Loading model from Hugging Face...")
# Download model, tokenizer, and label encoder from Hugging Face
model = BertForSequenceClassification.from_pretrained(HF_MODEL_REPO)
tokenizer = BertTokenizer.from_pretrained(HF_MODEL_REPO)
# Load label encoder
label_encoder_path = hf_hub_download(repo_id=HF_MODEL_REPO, filename="label_encoder.pkl")
with open(label_encoder_path, "rb") as f:
label_encoder = pickle.load(f)
st.success("✅ Model Loaded Successfully!")
return model, tokenizer, label_encoder
except Exception as e:
st.error(f"❌ Error loading model: {e}")
return None, None, None
# Load the model once
model, tokenizer, label_encoder = load_model()
# Disease Information Data
disease_data = {
"Peptic Ulcer Disease": {
"description": "A sore that develops on the lining of the esophagus, stomach, or small intestine.",
"medicines": ["Omeprazole", "Pantoprazole", "Ranitidine", "Esomeprazole", "Amoxicillin"],
"specialists": ["Gastroenterologist", "General Physician", "Internal Medicine Specialist"]
},
"Type 2 Diabetes Mellitus": {
"description": "A chronic condition that affects the way the body processes blood sugar (glucose).",
"medicines": ["Metformin", "Glipizide", "Insulin", "Sitagliptin", "Canagliflozin"],
"specialists": ["Endocrinologist", "Diabetologist", "Nutritionist"]
},
"Acute Myocardial Infarction": {
"description": "A medical emergency where the blood flow to the heart is blocked.",
"medicines": ["Aspirin", "Clopidogrel", "Statins", "Beta Blockers", "ACE Inhibitors"],
"specialists": ["Cardiologist", "Emergency Medicine Specialist"]
},
"Chronic Obstructive Pulmonary Disease": {
"description": "A group of lung diseases that block airflow and make breathing difficult.",
"medicines": ["Tiotropium", "Albuterol", "Ipratropium", "Fluticasone", "Salmeterol"],
"specialists": ["Pulmonologist", "General Physician", "Respiratory Therapist"]
},
"Cerebrovascular Accident (Stroke)": {
"description": "A condition caused by the interruption of blood flow to the brain.",
"medicines": ["Alteplase", "Aspirin", "Clopidogrel", "Warfarin", "Atorvastatin"],
"specialists": ["Neurologist", "Rehabilitation Specialist", "Neurosurgeon"]
},
"Deep Vein Thrombosis": {
"description": "A blood clot forms in a deep vein, usually in the legs.",
"medicines": ["Warfarin", "Heparin", "Apixaban", "Dabigatran", "Rivaroxaban"],
"specialists": ["Hematologist", "Vascular Surgeon", "Cardiologist"]
},
"Chronic Kidney Disease": {
"description": "The gradual loss of kidney function over time.",
"medicines": ["Erythropoietin", "Phosphate Binders", "ACE Inhibitors", "Diuretics", "Calcitriol"],
"specialists": ["Nephrologist", "Dietitian", "Internal Medicine Specialist"]
},
"Community-Acquired Pneumonia": {
"description": "A lung infection acquired outside of a hospital setting.",
"medicines": ["Amoxicillin", "Azithromycin", "Clarithromycin", "Ceftriaxone", "Levofloxacin"],
"specialists": ["Pulmonologist", "Infectious Disease Specialist", "General Physician"]
},
"Septic Shock": {
"description": "A severe infection leading to dangerously low blood pressure.",
"medicines": ["Norepinephrine", "Vancomycin", "Meropenem", "Hydrocortisone", "Dopamine"],
"specialists": ["Intensivist", "Infectious Disease Specialist", "Emergency Medicine Specialist"]
},
"Rheumatoid Arthritis": {
"description": "An autoimmune disorder causing inflammation in joints.",
"medicines": ["Methotrexate", "Sulfasalazine", "Hydroxychloroquine", "Adalimumab", "Etanercept"],
"specialists": ["Rheumatologist", "Orthopedic Specialist", "Physical Therapist"]
},
"Congestive Heart Failure": {
"description": "A chronic condition where the heart doesn't pump blood effectively.",
"medicines": ["ACE Inhibitors", "Beta Blockers", "Diuretics", "Spironolactone", "Digoxin"],
"specialists": ["Cardiologist", "General Physician", "Cardiac Surgeon"]
},
"Pulmonary Embolism": {
"description": "A blockage in one of the pulmonary arteries in the lungs.",
"medicines": ["Heparin", "Warfarin", "Alteplase", "Rivaroxaban", "Dabigatran"],
"specialists": ["Pulmonologist", "Hematologist", "Emergency Medicine Specialist"]
},
"Sepsis": {
"description": "A life-threatening organ dysfunction caused by a dysregulated immune response to infection.",
"medicines": ["Vancomycin", "Meropenem", "Piperacillin-Tazobactam", "Cefepime", "Dopamine"],
"specialists": ["Infectious Disease Specialist", "Intensivist", "Emergency Medicine Specialist"]
},
"Liver Cirrhosis": {
"description": "A late-stage liver disease caused by liver scarring and damage.",
"medicines": ["Spironolactone", "Furosemide", "Lactulose", "Nadolol", "Rifaximin"],
"specialists": ["Hepatologist", "Gastroenterologist", "Nutritionist"]
},
"Acute Renal Failure": {
"description": "A sudden loss of kidney function.",
"medicines": ["Diuretics", "Dopamine", "Calcium Gluconate", "Sodium Bicarbonate", "Epoetin"],
"specialists": ["Nephrologist", "Critical Care Specialist", "Internal Medicine Specialist"]
},
"Urinary Tract Infection": {
"description": "An infection in any part of the urinary system.",
"medicines": ["Nitrofurantoin", "Ciprofloxacin", "Amoxicillin-Clavulanate", "Trimethoprim-Sulfamethoxazole", "Cephalexin"],
"specialists": ["Urologist", "General Physician", "Infectious Disease Specialist"]
},
"Hypertension": {
"description": "A condition in which the force of the blood against the artery walls is too high.",
"medicines": ["Lisinopril", "Amlodipine", "Losartan", "Hydrochlorothiazide", "Metoprolol"],
"specialists": ["Cardiologist", "General Physician", "Nephrologist"]
},
"Asthma": {
"description": "A condition in which the airways narrow and swell, causing difficulty in breathing.",
"medicines": ["Albuterol", "Fluticasone", "Montelukast", "Budesonide", "Salmeterol"],
"specialists": ["Pulmonologist", "Allergist", "General Physician"]
},
"Gastroesophageal Reflux Disease (GERD)": {
"description": "A digestive disorder where stomach acid irritates the esophagus.",
"medicines": ["Omeprazole", "Esomeprazole", "Ranitidine", "Lansoprazole", "Pantoprazole"],
"specialists": ["Gastroenterologist", "General Physician", "Dietitian"]
}
}
# Function to Clean Text
def clean_text(text):
stop_words = set(stopwords.words('english'))
text = str(text).lower()
text = re.sub(r'\d+', '', text) # Remove numbers
text = re.sub(r'[^a-z\s]', '', text) # Remove punctuation
text = re.sub(r'\s+', ' ', text).strip() # Remove extra spaces
text = ' '.join([word for word in text.split() if word not in stop_words]) # Remove stopwords
return text
# Function to Predict Disease
def predict_disease(patient_note, model, tokenizer, label_encoder):
if not model or not tokenizer or not label_encoder:
return "Error: Model not loaded properly."
patient_note = clean_text(patient_note)
inputs = tokenizer(patient_note, return_tensors="pt", padding=True, truncation=True, max_length=512)
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits
predicted_label = torch.argmax(logits, dim=1).item()
predicted_disease = label_encoder.inverse_transform([predicted_label])[0]
return predicted_disease
# Function to Retrieve Disease Details
def get_disease_details(disease_name):
return disease_data.get(disease_name, {
"description": "No details available.",
"medicines": [],
"specialists": []
})
# Streamlit UI
st.title("🩺 Clinical Note Disease Prediction")
st.write("Upload a **medical note (PDF or TXT)**, and this app will predict the **disease** and provide relevant details.")
uploaded_file = st.file_uploader("Upload a clinical note (PDF/TXT)", type=["pdf", "txt"])
if uploaded_file is not None:
text = ""
if uploaded_file.name.endswith(".pdf"):
try:
pdf_reader = PyPDF2.PdfReader(BytesIO(uploaded_file.read()))
for page in pdf_reader.pages:
text += page.extract_text()
except Exception as e:
st.error(f"❌ Error reading PDF: {e}")
elif uploaded_file.name.endswith(".txt"):
text = uploaded_file.read().decode("utf-8")
if text:
st.subheader("Extracted Text from File:")
st.write(text[:1000]) # Show a snippet of extracted text
# Predict disease
predicted_disease = predict_disease(text, model, tokenizer, label_encoder)
disease_details = get_disease_details(predicted_disease)
st.success(f"### Predicted Disease: **{predicted_disease}**")
st.write(f"**Description:** {disease_details['description']}")
# Display Medicines
if disease_details["medicines"]:
st.write("💊 **Recommended Medicines:**")
st.write(", ".join(disease_details["medicines"]))
# Display Specialists
if disease_details["specialists"]:
st.write("🩺 **Recommended Specialists:**")
st.write(", ".join(disease_details["specialists"]))
# Download extracted text
st.download_button("Download Extracted Text", text, file_name="extracted_text.txt")
else:
st.error("Could not extract text from the file. Please try another file.")