Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import joblib | |
| from xgboost import XGBRegressor | |
| from sklearn.preprocessing import StandardScaler, OneHotEncoder | |
| import shap | |
| from streamlit_shap import st_shap | |
| # Page configuration | |
| st.set_page_config( | |
| page_title="Medical Costs Concern Prediction",) | |
| st.title('Predict Medical Costs Concern') | |
| # Load model and preprocessing objects | |
| def load_model_objects(): | |
| model_xgb = joblib.load('model_best.joblib') | |
| scaler = joblib.load('scaler.joblib') | |
| return model_xgb, scaler | |
| model_xgb, scaler = load_model_objects() | |
| # Create SHAP explainer | |
| explainer = shap.TreeExplainer(model_xgb) | |
| # App description | |
| with st.expander("What's this app?"): | |
| st.markdown(""" | |
| This app predicts how worried a person is about medical costs, based on factors like age, education, income, and employment status. | |
| We've trained an AI model to analyze these inputs and give a prediction. | |
| """) | |
| st.subheader('Describe yourself') | |
| # User inputs | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| age = st.number_input('Age', min_value=18, max_value=100, value=30) | |
| education = st.selectbox('Education Level', options=['Primary', 'Secondary', 'Tertiary'], index=1) | |
| income_quartile = st.radio('Income Quartile', options=['Lowest', 'Second', 'Third', 'Highest']) | |
| with col2: | |
| employment_status = st.selectbox('Employment Status', options=['Unemployed', 'Employed'], index=1) | |
| # Map user inputs to numerical and categorical features | |
| education_mapping = {'Primary': 1, 'Secondary': 2, 'Tertiary': 3} | |
| income_mapping = {'Lowest': 1, 'Second': 2, 'Third': 3, 'Highest': 4} | |
| employment_mapping = {'Unemployed': 0, 'Employed': 1} | |
| # Transform user input into a feature vector | |
| education_num = education_mapping[education] | |
| income_num = income_mapping[income_quartile] | |
| employment_num = employment_mapping[employment_status] | |
| # Prepare features for the model | |
| num_features = pd.DataFrame({ | |
| 'age': [age], | |
| 'educ': [education_num], | |
| 'inc_q': [income_num], | |
| 'emp_in': [employment_num] | |
| }) | |
| num_scaled = pd.DataFrame(scaler.transform(num_features), columns=num_features.columns) | |
| # Prediction button | |
| if st.button('Predict Concern Level'): | |
| # Make prediction | |
| predicted_concern = model_xgb.predict(num_scaled)[0] | |
| # Display prediction | |
| st.metric(label="Predicted concern level", value=f'{round(predicted_concern)} (1: Very Worried, 3: Not Worried)') | |
| # SHAP explanation | |
| st.subheader('Concern Factors Explained') | |
| shap_values = explainer.shap_values(num_scaled) | |
| st_shap(shap.force_plot(explainer.expected_value, shap_values, num_scaled), height=400, width=600) | |
| st.markdown(""" | |
| This plot shows how each feature contributes to the predicted concern level: | |
| - Blue bars push the concern level lower | |
| - Red bars push the concern level higher | |
| - The length of each bar indicates the strength of the feature's impact | |
| """) | |
| # Footer | |
| st.markdown("---") | |