|
|
import streamlit as st
|
|
|
from transformers import BertForSequenceClassification, BertTokenizer
|
|
|
from huggingface_hub import hf_hub_download
|
|
|
import torch
|
|
|
import PyPDF2
|
|
|
import pickle
|
|
|
import re
|
|
|
from nltk.corpus import stopwords
|
|
|
import nltk
|
|
|
from io import BytesIO
|
|
|
|
|
|
|
|
|
nltk.download('stopwords')
|
|
|
|
|
|
|
|
|
HF_MODEL_REPO = "DrSyedFaizan/medReport"
|
|
|
|
|
|
@st.cache_resource
|
|
|
def load_model():
|
|
|
"""Load model, tokenizer, and label encoder from Hugging Face."""
|
|
|
|
|
|
try:
|
|
|
st.info("🔄 Loading model from Hugging Face...")
|
|
|
|
|
|
|
|
|
model = BertForSequenceClassification.from_pretrained(HF_MODEL_REPO)
|
|
|
tokenizer = BertTokenizer.from_pretrained(HF_MODEL_REPO)
|
|
|
|
|
|
|
|
|
label_encoder_path = hf_hub_download(repo_id=HF_MODEL_REPO, filename="label_encoder.pkl")
|
|
|
with open(label_encoder_path, "rb") as f:
|
|
|
label_encoder = pickle.load(f)
|
|
|
|
|
|
st.success("✅ Model Loaded Successfully!")
|
|
|
return model, tokenizer, label_encoder
|
|
|
|
|
|
except Exception as e:
|
|
|
st.error(f"❌ Error loading model: {e}")
|
|
|
return None, None, None
|
|
|
|
|
|
|
|
|
model, tokenizer, label_encoder = load_model()
|
|
|
|
|
|
|
|
|
disease_data = {
|
|
|
"Peptic Ulcer Disease": {
|
|
|
"description": "A sore that develops on the lining of the esophagus, stomach, or small intestine.",
|
|
|
"medicines": ["Omeprazole", "Pantoprazole", "Ranitidine", "Esomeprazole", "Amoxicillin"],
|
|
|
"specialists": ["Gastroenterologist", "General Physician", "Internal Medicine Specialist"]
|
|
|
},
|
|
|
"Type 2 Diabetes Mellitus": {
|
|
|
"description": "A chronic condition that affects the way the body processes blood sugar (glucose).",
|
|
|
"medicines": ["Metformin", "Glipizide", "Insulin", "Sitagliptin", "Canagliflozin"],
|
|
|
"specialists": ["Endocrinologist", "Diabetologist", "Nutritionist"]
|
|
|
},
|
|
|
"Acute Myocardial Infarction": {
|
|
|
"description": "A medical emergency where the blood flow to the heart is blocked.",
|
|
|
"medicines": ["Aspirin", "Clopidogrel", "Statins", "Beta Blockers", "ACE Inhibitors"],
|
|
|
"specialists": ["Cardiologist", "Emergency Medicine Specialist"]
|
|
|
},
|
|
|
"Chronic Obstructive Pulmonary Disease": {
|
|
|
"description": "A group of lung diseases that block airflow and make breathing difficult.",
|
|
|
"medicines": ["Tiotropium", "Albuterol", "Ipratropium", "Fluticasone", "Salmeterol"],
|
|
|
"specialists": ["Pulmonologist", "General Physician", "Respiratory Therapist"]
|
|
|
},
|
|
|
"Cerebrovascular Accident (Stroke)": {
|
|
|
"description": "A condition caused by the interruption of blood flow to the brain.",
|
|
|
"medicines": ["Alteplase", "Aspirin", "Clopidogrel", "Warfarin", "Atorvastatin"],
|
|
|
"specialists": ["Neurologist", "Rehabilitation Specialist", "Neurosurgeon"]
|
|
|
},
|
|
|
"Deep Vein Thrombosis": {
|
|
|
"description": "A blood clot forms in a deep vein, usually in the legs.",
|
|
|
"medicines": ["Warfarin", "Heparin", "Apixaban", "Dabigatran", "Rivaroxaban"],
|
|
|
"specialists": ["Hematologist", "Vascular Surgeon", "Cardiologist"]
|
|
|
},
|
|
|
"Chronic Kidney Disease": {
|
|
|
"description": "The gradual loss of kidney function over time.",
|
|
|
"medicines": ["Erythropoietin", "Phosphate Binders", "ACE Inhibitors", "Diuretics", "Calcitriol"],
|
|
|
"specialists": ["Nephrologist", "Dietitian", "Internal Medicine Specialist"]
|
|
|
},
|
|
|
"Community-Acquired Pneumonia": {
|
|
|
"description": "A lung infection acquired outside of a hospital setting.",
|
|
|
"medicines": ["Amoxicillin", "Azithromycin", "Clarithromycin", "Ceftriaxone", "Levofloxacin"],
|
|
|
"specialists": ["Pulmonologist", "Infectious Disease Specialist", "General Physician"]
|
|
|
},
|
|
|
"Septic Shock": {
|
|
|
"description": "A severe infection leading to dangerously low blood pressure.",
|
|
|
"medicines": ["Norepinephrine", "Vancomycin", "Meropenem", "Hydrocortisone", "Dopamine"],
|
|
|
"specialists": ["Intensivist", "Infectious Disease Specialist", "Emergency Medicine Specialist"]
|
|
|
},
|
|
|
"Rheumatoid Arthritis": {
|
|
|
"description": "An autoimmune disorder causing inflammation in joints.",
|
|
|
"medicines": ["Methotrexate", "Sulfasalazine", "Hydroxychloroquine", "Adalimumab", "Etanercept"],
|
|
|
"specialists": ["Rheumatologist", "Orthopedic Specialist", "Physical Therapist"]
|
|
|
},
|
|
|
"Congestive Heart Failure": {
|
|
|
"description": "A chronic condition where the heart doesn't pump blood effectively.",
|
|
|
"medicines": ["ACE Inhibitors", "Beta Blockers", "Diuretics", "Spironolactone", "Digoxin"],
|
|
|
"specialists": ["Cardiologist", "General Physician", "Cardiac Surgeon"]
|
|
|
},
|
|
|
"Pulmonary Embolism": {
|
|
|
"description": "A blockage in one of the pulmonary arteries in the lungs.",
|
|
|
"medicines": ["Heparin", "Warfarin", "Alteplase", "Rivaroxaban", "Dabigatran"],
|
|
|
"specialists": ["Pulmonologist", "Hematologist", "Emergency Medicine Specialist"]
|
|
|
},
|
|
|
"Sepsis": {
|
|
|
"description": "A life-threatening organ dysfunction caused by a dysregulated immune response to infection.",
|
|
|
"medicines": ["Vancomycin", "Meropenem", "Piperacillin-Tazobactam", "Cefepime", "Dopamine"],
|
|
|
"specialists": ["Infectious Disease Specialist", "Intensivist", "Emergency Medicine Specialist"]
|
|
|
},
|
|
|
"Liver Cirrhosis": {
|
|
|
"description": "A late-stage liver disease caused by liver scarring and damage.",
|
|
|
"medicines": ["Spironolactone", "Furosemide", "Lactulose", "Nadolol", "Rifaximin"],
|
|
|
"specialists": ["Hepatologist", "Gastroenterologist", "Nutritionist"]
|
|
|
},
|
|
|
"Acute Renal Failure": {
|
|
|
"description": "A sudden loss of kidney function.",
|
|
|
"medicines": ["Diuretics", "Dopamine", "Calcium Gluconate", "Sodium Bicarbonate", "Epoetin"],
|
|
|
"specialists": ["Nephrologist", "Critical Care Specialist", "Internal Medicine Specialist"]
|
|
|
},
|
|
|
"Urinary Tract Infection": {
|
|
|
"description": "An infection in any part of the urinary system.",
|
|
|
"medicines": ["Nitrofurantoin", "Ciprofloxacin", "Amoxicillin-Clavulanate", "Trimethoprim-Sulfamethoxazole", "Cephalexin"],
|
|
|
"specialists": ["Urologist", "General Physician", "Infectious Disease Specialist"]
|
|
|
},
|
|
|
"Hypertension": {
|
|
|
"description": "A condition in which the force of the blood against the artery walls is too high.",
|
|
|
"medicines": ["Lisinopril", "Amlodipine", "Losartan", "Hydrochlorothiazide", "Metoprolol"],
|
|
|
"specialists": ["Cardiologist", "General Physician", "Nephrologist"]
|
|
|
},
|
|
|
"Asthma": {
|
|
|
"description": "A condition in which the airways narrow and swell, causing difficulty in breathing.",
|
|
|
"medicines": ["Albuterol", "Fluticasone", "Montelukast", "Budesonide", "Salmeterol"],
|
|
|
"specialists": ["Pulmonologist", "Allergist", "General Physician"]
|
|
|
},
|
|
|
"Gastroesophageal Reflux Disease (GERD)": {
|
|
|
"description": "A digestive disorder where stomach acid irritates the esophagus.",
|
|
|
"medicines": ["Omeprazole", "Esomeprazole", "Ranitidine", "Lansoprazole", "Pantoprazole"],
|
|
|
"specialists": ["Gastroenterologist", "General Physician", "Dietitian"]
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
def clean_text(text):
|
|
|
stop_words = set(stopwords.words('english'))
|
|
|
text = str(text).lower()
|
|
|
text = re.sub(r'\d+', '', text)
|
|
|
text = re.sub(r'[^a-z\s]', '', text)
|
|
|
text = re.sub(r'\s+', ' ', text).strip()
|
|
|
text = ' '.join([word for word in text.split() if word not in stop_words])
|
|
|
return text
|
|
|
|
|
|
|
|
|
def predict_disease(patient_note, model, tokenizer, label_encoder):
|
|
|
if not model or not tokenizer or not label_encoder:
|
|
|
return "Error: Model not loaded properly."
|
|
|
|
|
|
patient_note = clean_text(patient_note)
|
|
|
inputs = tokenizer(patient_note, return_tensors="pt", padding=True, truncation=True, max_length=512)
|
|
|
|
|
|
with torch.no_grad():
|
|
|
outputs = model(**inputs)
|
|
|
logits = outputs.logits
|
|
|
|
|
|
predicted_label = torch.argmax(logits, dim=1).item()
|
|
|
predicted_disease = label_encoder.inverse_transform([predicted_label])[0]
|
|
|
return predicted_disease
|
|
|
|
|
|
|
|
|
def get_disease_details(disease_name):
|
|
|
return disease_data.get(disease_name, {
|
|
|
"description": "No details available.",
|
|
|
"medicines": [],
|
|
|
"specialists": []
|
|
|
})
|
|
|
|
|
|
|
|
|
st.title("🩺 Clinical Note Disease Prediction")
|
|
|
st.write("Upload a **medical note (PDF or TXT)**, and this app will predict the **disease** and provide relevant details.")
|
|
|
|
|
|
uploaded_file = st.file_uploader("Upload a clinical note (PDF/TXT)", type=["pdf", "txt"])
|
|
|
|
|
|
if uploaded_file is not None:
|
|
|
text = ""
|
|
|
|
|
|
if uploaded_file.name.endswith(".pdf"):
|
|
|
try:
|
|
|
pdf_reader = PyPDF2.PdfReader(BytesIO(uploaded_file.read()))
|
|
|
for page in pdf_reader.pages:
|
|
|
text += page.extract_text()
|
|
|
except Exception as e:
|
|
|
st.error(f"❌ Error reading PDF: {e}")
|
|
|
elif uploaded_file.name.endswith(".txt"):
|
|
|
text = uploaded_file.read().decode("utf-8")
|
|
|
|
|
|
if text:
|
|
|
st.subheader("Extracted Text from File:")
|
|
|
st.write(text[:1000])
|
|
|
|
|
|
|
|
|
predicted_disease = predict_disease(text, model, tokenizer, label_encoder)
|
|
|
disease_details = get_disease_details(predicted_disease)
|
|
|
|
|
|
st.success(f"### Predicted Disease: **{predicted_disease}**")
|
|
|
st.write(f"**Description:** {disease_details['description']}")
|
|
|
|
|
|
|
|
|
if disease_details["medicines"]:
|
|
|
st.write("💊 **Recommended Medicines:**")
|
|
|
st.write(", ".join(disease_details["medicines"]))
|
|
|
|
|
|
|
|
|
if disease_details["specialists"]:
|
|
|
st.write("🩺 **Recommended Specialists:**")
|
|
|
st.write(", ".join(disease_details["specialists"]))
|
|
|
|
|
|
|
|
|
st.download_button("Download Extracted Text", text, file_name="extracted_text.txt")
|
|
|
|
|
|
else:
|
|
|
st.error("Could not extract text from the file. Please try another file.")
|
|
|
|
|
|
|