arthurpendragon's picture
Update app.py
30f0910 verified
import streamlit as st
import pandas as pd
import numpy as np
from joblib import load
# Load the model
clf = load("SVC()13.joblib")
# Helper functions for feature engineering
def encode(data):
data['work_type'] = data['work_type'].replace({'Goverment job': 'Govt_job',
"Never worked": "Never_worked",
"Self-employed": "Self-employed"})
data_jobs = ['Govt_job', 'Never_worked', 'Private', 'Self-employed']
for job in data_jobs:
data[job] = (data['work_type'] == job).astype(int)
return data
def replace_with_numeric_one_patient(data):
data['ever_married'] = data['ever_married'].replace({'Yes': 1, 'No': 0})
data['residence_type'] = data['residence_type'].replace({'Urban': 1, 'Rural': 0})
data['smoking_status'] = data['smoking_status'].replace({'Never smoked': 0, 'Formerly smoked': 1, 'Smokes': 2})
data['gender'] = data['gender'].replace({'Male': -1, 'Female': 1, 'Other': 1})
data['avg_glucose_level'] = data['avg_glucose_level'].replace({"Normal (<100 mg/dL)": 0,
"Prediabetes (<100, 125> mg/dL)": 1,
"Diabetes (>125 mg/dL)": 2})
data['bmi'] = data['bmi'].replace({"Underweight (<18.4)": 0, "Normal (<18.5, 24.9>)": 1,
"Overweight (<25, 29.9>)": 2, "Obese (>29.9)": 3})
return data
def predict_stroke_from_one_patient(gender, age, hypertension, heartDisease, everMarried, residenceType, averageGlucoseLevel, bmi, smokingStatus, employementType):
# Create DataFrame for prediction
d = {'gender': [gender], 'age': [age], 'hypertension': [hypertension], 'heart_disease': [heartDisease],
'ever_married': [everMarried], 'residence_type': [residenceType], 'avg_glucose_level': [averageGlucoseLevel],
'bmi': [bmi], 'smoking_status': [smokingStatus], 'bmi_was_missing': [bmi is None], 'work_type': [employementType]}
data = pd.DataFrame(data=d)
# Feature engineering
data = encode(data)
data = replace_with_numeric_one_patient(data)
data = data.drop("work_type", axis=1)
# Ensure all expected features are present
for feature in ['gender', 'age', 'hypertension', 'heart_disease', 'ever_married', 'residence_type', 'avg_glucose_level', 'bmi', 'smoking_status', 'bmi_was_missing']:
if feature not in data.columns:
data[feature] = 0
# Predict
y_predicted = clf.predict(data)
return 'stroke' if y_predicted == 1 else 'no stroke'
# Streamlit interface
st.title("Stroke Prediction")
st.markdown("""
This predictive model uses various health and demographic features to estimate the likelihood of having a stroke. The model has been trained on historical health data and can assist in identifying individuals at risk based on their inputs.
""")
st.sidebar.header("Input Parameters")
gender = st.sidebar.radio("Gender", ["Male", "Female"])
age = st.sidebar.slider("Age", 40, 90, value=40, step=1)
hypertension = st.sidebar.checkbox("Hypertension")
heartDisease = st.sidebar.checkbox("Heart Disease")
everMarried = st.sidebar.checkbox("Is/Was Married?")
residenceType = st.sidebar.radio("Residence Type", ["Urban", "Rural"])
averageGlucoseLevel = st.sidebar.selectbox("Average Glucose Level", ["Normal (<100 mg/dL)", "Prediabetes (<100, 125> mg/dL)", "Diabetes (>125 mg/dL)"])
bmi = st.sidebar.selectbox("BMI", ["Underweight (<18.4)", "Normal (<18.5, 24.9>)", "Overweight (<25, 29.9>)", "Obese (>29.9)"])
smokingStatus = st.sidebar.selectbox("Smoking Status", ["Never smoked", "Formerly smoked", "Smokes"])
employementType = st.sidebar.selectbox("Employment Type", ["Goverment job", "Never worked", "Private", "Self-employed"])
if st.sidebar.button("Predict"):
prediction = predict_stroke_from_one_patient(gender, age, hypertension, heartDisease, everMarried, residenceType, averageGlucoseLevel, bmi, smokingStatus, employementType)
st.sidebar.write(f"Prediction: {prediction}")
# Reference table for standard values
st.markdown("## Reference Table")
reference_data = {
"Feature": ["Gender", "Age", "Hypertension", "Heart Disease", "Ever Married", "Residence Type", "Average Glucose Level", "BMI", "Smoking Status", "Employment Type"],
"Typical Value (Stroke)": ["Female", "65-75", "Yes", "Yes", "Yes", "Urban", "Diabetes (>125 mg/dL)", "Obese (>29.9)", "Smokes", "Self-employed"],
"Typical Value (No Stroke)": ["Male", "40-50", "No", "No", "No", "Rural", "Normal (<100 mg/dL)", "Normal (<18.5, 24.9>)", "Never smoked", "Govt_job"]
}
df_reference = pd.DataFrame(reference_data)
st.table(df_reference)
# Determine the current Streamlit theme (light or dark)
theme = st.get_option("theme.base")
# Define button styling based on theme
if theme == "light":
button_bg_color = "#2c2e35"
button_border_color = "1px solid black"
button_text_color = "black"
else:
button_bg_color = "#2c2e35"
button_border_color = "1px solid #fff"
button_text_color = "#fff"
# Rounded button-like element with dynamic styling
st.markdown(f"""
<style>
.rounded-button {{
display: inline-block;
padding: 7px 15px;
font-size: 16px;
color: {button_text_color};
background-color: {button_bg_color};
border: {button_border_color};
border-radius: 7px;
text-align: center;
text-decoration: none;
cursor: default;
}}
</style>
<div style="text-align: center;">
<div class="rounded-button">
Created by: Samuel Ameyaw
</div>
</div>
""", unsafe_allow_html=True)