Bio-Medical-Information-Parser / streamlit_app.py
vbzvibin's picture
Rename app.py to streamlit_app.py
5feeceb verified
# --- VERY IMPORTANT: Environment Troubleshooting ---
# This script uses the official OpenAI library. If you have issues, create a clean environment.
#
# 1. Create a New, Clean Environment:
# - python -m venv venv
#
# 2. Activate the New Environment:
# - Windows: .\venv\Scripts\activate
# - macOS/Linux: source venv/bin/activate
#
# 3. Install Simplified Packages:
# - pip install streamlit pandas openai
#
# 4. Run the App:
# - streamlit run your_script_name.py
# ----------------------------------------------------
import streamlit as st
import pandas as pd
import openai
import json
import os
# --- BioMedicalParser Class Definition ---
class BioMedicalParser:
"""
A class to parse biomedical text using the OpenAI API to extract structured information.
"""
def __init__(self, api_key):
"""
Initializes the parser with the OpenAI API key.
"""
try:
self.client = openai.OpenAI(api_key=api_key)
except Exception as e:
raise ValueError(f"Failed to initialize OpenAI client. Check your API key. Error: {e}")
self.prompt_template_str = """
Extract the relevant information from the following medical text. Identify all relevant entities as per the given schemas. The input text will be an unstructured clinical note.
Alzheimer's Disease Evaluation:
- You must classify the patient's Alzheimer's status into one of the following categories under `Alzheimer_Status`:
- "Confirmed": Explicit diagnosis or clear confirmation (e.g., “patient diagnosed with Alzheimer’s”, or confirmed test results and symptoms).
- "Probable": Strong indicators such as short-term memory loss, poor judgment, amyloid plaque buildup, or disorganized speech but no confirmed diagnosis.
- "Possible": Mild cognitive signs or one indicative symptom without confirmation.
- "Not Present": No symptoms, tests, or history suggesting Alzheimer’s.
- Ignore family history for Alzheimer_Status classification.
- Add `Alzheimer_Reason` to explain the classification for Confirmed, Probable, or Possible statuses. Leave it blank for "Not Present".
- A mention of a test like "PET/CT scan for Alzheimer disease" does not imply presence unless accompanied by relevant symptoms or results.
- Negated symptoms, diseases, and test findings (e.g., "no memory issues") should be excluded from consideration.
Text:
{text}
Schema:
- Protected_Health_Information:
- Patient_Name: Extract the full name of the patient.
- Age: Extract the age of the patient.
- Gender: Extract the gender of the patient.
- Date_of_Visit: The date of the clinical encounter.
- Address: The patient's full mailing address.
- Phone_Number: The patient's phone number.
- Email: The patient's email address.
- Fax: Any mentioned fax numbers.
- Chief_Complaint: The primary reason for the patient's visit.
- History_of_Present_Illness: A detailed narrative of the current medical issue, including duration of symptoms.
- Past_Medical_History: All mentioned past diagnoses and conditions (e.g., Hypertension, Diabetes).
- Medications: All mentioned medications and their dosages.
- Allergies: Any known allergies to medications, food, etc.
- Social_History: Lifestyle factors like smoking, alcohol use, occupation, and living situation.
- Family_History: Extract family history of diseases.
- Review_of_Systems: A summary of symptoms by body system (e.g., Cardiovascular, Neurological).
- Physical_Examination:
- Vital_Signs: Blood pressure, heart rate, temperature, respiratory rate, SpO2.
- General: General appearance and condition of the patient.
- Specific_Findings: Findings from examination of specific body parts (e.g., HEENT, Cardiovascular, Lungs, Neurological).
- Diagnostic_and_Lab_Results:
- Lab_Tests: Names of lab tests performed (e.g., CBC, BMP, TSH).
- Lab_Values: Specific lab values with units (e.g., "WBC: 12.5 k/uL", "Glucose: 140 mg/dL").
- Imaging: Imaging studies performed and their findings (e.g., "Chest X-ray: clear", "Brain MRI: mild cortical atrophy").
- Assessment: The physician's summary of diagnoses or problems.
- Diagnosis_Codes:
- ICD_Codes: Extract all mentioned International Classification of Diseases (ICD) codes (e.g., "I10", "E11.9").
- SNOMED_Codes: Extract all mentioned Systematized Nomenclature of Medicine (SNOMED) codes (e.g., "38341003", "44054006").
- Plan: The proposed treatment and management plan, including therapeutic procedures, new medications, referrals, and follow-up instructions.
- Alzheimer_Status: One of ["Confirmed", "Probable", "Possible", "Not Present"] based on the medical text.
- Alzheimer_Reason: Provide the reason behind the assigned status if applicable (for Confirmed, Probable, or Possible).
Answer in JSON format:
"""
def parse(self, text: str) -> dict:
"""
Calls the OpenAI API with the input text and returns the parsed result.
"""
full_prompt = self.prompt_template_str.format(text=text)
try:
response = self.client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a helpful assistant designed to extract structured data from medical text and respond in JSON format."},
{"role": "user", "content": full_prompt}
],
temperature=0,
)
result_str = response.choices[0].message.content
except openai.APIError as e:
st.error(f"An OpenAI API error occurred: {e}")
return {}
except Exception as e:
st.error(f"An unexpected error occurred during API call: {e}")
return {}
try:
if result_str.strip().startswith("```json"):
result_str = result_str.strip()[7:-4]
return json.loads(result_str)
except json.JSONDecodeError:
st.error("Error: The model returned a response that could not be decoded into JSON.")
st.text("Raw output from model:")
st.text(result_str)
return {}
# --- Streamlit Application UI ---
def main():
"""
The main function that defines the Streamlit web application.
"""
st.set_page_config(page_title="Bio-Medical Parser", layout="wide")
st.title("🩺 Bio-Medical Information Parser")
st.markdown("This application uses the OpenAI API to extract structured information from unstructured medical text. Enter your OpenAI API key and the medical text below to begin.")
with st.sidebar:
st.header("Configuration")
api_key = st.text_input("Enter your OpenAI API Key:", type="password")
st.markdown("[Get an OpenAI API key](https://platform.openai.com/account/api-keys)")
st.subheader("Enter Medical Text")
sample_text = """
Patient Name: John Doe. DOB: 1947-05-12. Contact: 123-456-7890, john.doe@email.com. Address: 123 Main St, Anytown, USA 12345.
The patient is a 78-year-old male, John Doe, who visited on 2025-07-08. He was brought in by his daughter with a chief complaint of increasing forgetfulness and confusion over the last 6 months. His daughter reports that he has been increasingly forgetful, frequently misplacing items like his keys and wallet, and has had difficulty managing his finances. Last week, he got lost while driving home from the grocery store, a route he has taken for 30 years. He has also shown poor judgment, such as wearing a heavy winter coat on a warm day. The patient denies any hallucinations or delusions. His past medical history is significant for hypertension (diagnosed 15 years ago), type 2 diabetes mellitus (diagnosed 10 years ago), and hyperlipidemia. He is currently taking Lisinopril 20 mg daily, Metformin 1000 mg twice daily, and Atorvastatin 40 mg nightly. He has a known allergy to Penicillin, which causes a rash. Socially, Mr. Doe is a retired accountant. He is a widower and lives alone, though his daughter checks on him daily. He smoked 1 pack per day for 40 years but quit 15 years ago and drinks a glass of wine with dinner occasionally. His family history reveals that his mother had "memory problems" late in life, though no formal diagnosis was made, and his father died of a myocardial infarction at age 65. A review of systems is negative for fever or chills but positive for occasional fatigue. Neurologically, the review is consistent with the HPI, with no new headaches, dizziness, or focal weakness. Psychiatrically, his daughter notes increased apathy and social withdrawal. On physical examination, his vital signs were BP 135/85 mmHg, HR 76 bpm, Temp 98.6°F, and SpO2 98% on room air. He is a well-groomed male who appears calm but somewhat disoriented. Neurological exam showed cranial nerves II-XII intact, strength 5/5, and intact sensation. His gait is steady but slow. A clock-drawing test was performed poorly. Recent lab results include a normal CBC and a BMP showing glucose of 128 mg/dL. TSH and Vitamin B12 levels were normal. A brain MRI from one month ago showed mild generalized cortical atrophy and moderate hippocampal atrophy, greater than expected for age. The assessment is progressive cognitive decline, along with controlled hypertension and type 2 diabetes. The associated codes are: Hypertension (ICD-10: I10, SNOMED: 38341003), Type 2 Diabetes (ICD-10: E11.9, SNOMED: 44054006), and Age-Related Cognitive Decline (ICD-10: R41.81, SNOMED: 386805001). The plan is to refer him to Neurology for formal neuropsychological testing and to start Donepezil 5 mg daily. Safety concerns were discussed with his daughter, and a follow-up is scheduled in 3 months. No fax number on file.
"""
input_text = st.text_area("Paste the medical text here:", value=sample_text, height=300)
if st.button("Parse Text", type="primary"):
if not api_key:
st.error("⚠️ Please enter your OpenAI API Key in the sidebar to proceed.")
elif not input_text.strip():
st.warning("⚠️ Please enter some medical text to parse.")
else:
try:
with st.spinner("🤖 The model is analyzing the text... Please wait."):
parser = BioMedicalParser(api_key=api_key)
result = parser.parse(input_text)
# --- NEW: TABULAR DISPLAY LOGIC ---
st.subheader("Extracted Information")
if result:
# --- Display Key Metrics ---
col1, col2 = st.columns(2)
phi_info = result.get("Protected_Health_Information", {})
col1.metric(label="Patient Name", value=phi_info.get("Patient_Name", "N/A"))
alz_status = result.get("Alzheimer_Status", "N/A")
alz_reason = result.get("Alzheimer_Reason", "")
col2.metric(label="Alzheimer's Status", value=alz_status, help=f"Reason: {alz_reason}" if alz_reason else "No reason provided.")
st.markdown("---")
# --- Use tabs for different sections ---
tabs = ["Patient & Visit", "Clinical Findings", "Diagnostics & Codes", "Assessment & Plan"]
tab1, tab2, tab3, tab4 = st.tabs(tabs)
with tab1:
st.markdown("##### Patient Information")
if phi_info:
df_phi = pd.DataFrame(phi_info.items(), columns=["Field", "Details"])
st.table(df_phi)
st.markdown("##### Chief Complaint")
st.info(result.get("Chief_Complaint", "N/A"))
st.markdown("##### History of Present Illness")
st.write(result.get("History_of_Present_Illness", "N/A"))
with tab2:
col1, col2 = st.columns(2)
with col1:
st.markdown("##### Past Medical History")
pmh = result.get("Past_Medical_History", [])
st.dataframe({"Condition": pmh}, use_container_width=True)
st.markdown("##### Social History")
st.write(result.get("Social_History", "N/A"))
with col2:
st.markdown("##### Allergies")
allergies = result.get("Allergies", [])
st.dataframe({"Known Allergies": allergies}, use_container_width=True)
st.markdown("##### Family History")
st.write(result.get("Family_History", "N/A"))
st.markdown("##### Medications")
meds = result.get("Medications", [])
if meds:
st.dataframe({"Medication": meds}, use_container_width=True)
st.markdown("##### Physical Examination")
pe_data = result.get("Physical_Examination", {})
if pe_data:
# Convert dictionary to a DataFrame for table display
df_pe = pd.DataFrame(pe_data.items(), columns=["Category", "Findings"])
st.table(df_pe)
with tab3:
st.markdown("##### Diagnostic and Lab Results")
diag_data = result.get("Diagnostic_and_Lab_Results", {})
if diag_data:
# Flatten the data for better table display
flat_diag = {}
for k, v in diag_data.items():
if isinstance(v, list):
flat_diag[k.replace('_', ' ').title()] = ', '.join(v)
else:
flat_diag[k.replace('_', ' ').title()] = v
df_diag = pd.DataFrame(flat_diag.items(), columns=["Test Type", "Details"])
st.table(df_diag)
st.markdown("##### Diagnosis Codes")
codes_data = result.get("Diagnosis_Codes", {})
if codes_data:
# Convert dictionary to a DataFrame for table display
df_codes = pd.DataFrame({
"Code System": codes_data.keys(),
"Codes": [', '.join(v) if isinstance(v, list) else v for v in codes_data.values()]
})
st.table(df_codes)
with tab4:
st.markdown("##### Assessment")
st.success(result.get("Assessment", "N/A"))
st.markdown("##### Plan")
st.warning(result.get("Plan", "N/A"))
else:
st.info("The analysis returned no information. This could be due to an API error or an empty response from the model.")
except Exception as e:
st.error(f"An unexpected error occurred: {e}")
if __name__ == "__main__":
main()