Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import joblib | |
| import pandas as pd | |
| # Page config | |
| st.set_page_config( | |
| page_title="❤️ Heart Disease Prediction System", | |
| page_icon="❤️", | |
| layout="wide", | |
| initial_sidebar_state="expanded" | |
| ) | |
| # trained model | |
| def load_model(): | |
| try: | |
| production_model = joblib.load('models/uci_heart_disease_model.pkl') | |
| return production_model['model'], production_model['metadata']['threshold'] | |
| except Exception as e: | |
| st.error(f"Error loading model: {e}") | |
| st.stop() | |
| model, optimal_threshold = load_model() | |
| def predict_heart_disease(user_input): | |
| try: | |
| # Feature engineering | |
| user_input['hr_age_ratio'] = user_input['thalach'] / (user_input['age'] + 1e-5) | |
| user_input['bp_oldpeak'] = user_input['trestbps'] * (user_input['oldpeak'] + 1) | |
| user_input['risk_score'] = (user_input['age'] / 50 + user_input['chol'] / 200 + user_input['trestbps'] / 140) | |
| #prediction | |
| probabilities = model.predict_proba(user_input)[:, 1] | |
| predictions = (probabilities >= optimal_threshold).astype(int) | |
| # results DataFrame | |
| results = pd.DataFrame({ | |
| 'Prediction': predictions, | |
| 'Diagnosis': ['Heart Disease' if p == 1 else 'Healthy' for p in predictions], | |
| 'Probability': probabilities, | |
| }) | |
| # input features for display | |
| display_data = pd.concat([user_input[['age', 'sex', 'cp', 'trestbps', 'chol']], results], axis=1) | |
| return results, display_data | |
| except Exception as e: | |
| st.error(f"Prediction error: {e}") | |
| return None, None | |
| # Main app interface | |
| st.title("❤️ Heart Disease Prediction") | |
| # tabs | |
| tab1, tab2 ,tab3= st.tabs(["Single Prediction", "Batch Prediction","Data & Model Info"]) | |
| with tab1: | |
| st.header("Single Patient Prediction") | |
| # Input form | |
| with st.form("prediction_form"): | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.subheader("Patient Information") | |
| age = st.slider("Age", 18, 100, 50) | |
| sex = st.radio("Sex", ["Male (1)", "Female (0)"], index=0) | |
| cp = st.selectbox("Chest Pain Type", | |
| ["Typical angina (1)", "Atypical angina (2)", | |
| "Non-anginal pain (3)", "Asymptomatic (4)"]) | |
| trestbps = st.slider("Resting Blood Pressure (mmHg)", 90, 200, 120) | |
| chol = st.slider("Serum Cholesterol (mg/dl)", 150, 350, 200) | |
| with col2: | |
| st.subheader("Clinical Measurements") | |
| fbs = st.radio("Fasting Blood Sugar > 120 mg/dl", ["Yes (1)", "No (0)"], index=1) | |
| restecg = st.selectbox("Resting ECG Results", | |
| ["Normal (0)", "ST-T wave abnormality (1)", | |
| "Left ventricular hypertrophy (2)"]) | |
| thalach = st.slider("Maximum Heart Rate Achieved (bpm)", 60, 200, 150) | |
| exang = st.radio("Exercise Induced Angina", ["Yes (1)", "No (0)"], index=1) | |
| oldpeak = st.slider("ST Depression Induced by Exercise", 0.0, 6.0, 1.0, step=0.1) | |
| slope = st.selectbox("Slope of Peak Exercise ST Segment", | |
| ["Upsloping (1)", "Flat (2)", "Downsloping (3)"]) | |
| ca = st.slider("Number of Major Vessels", 0, 4, 0) | |
| thal = st.selectbox("Thalassemia", | |
| ["Normal (3)", "Fixed defect (6)", "Reversible defect (7)"]) | |
| submitted = st.form_submit_button("Predict Heart Disease Risk") | |
| if submitted: | |
| # Preprocess inputs | |
| user_input = pd.DataFrame({ | |
| 'age': [age], | |
| 'sex': [1 if sex.startswith("Male") else 0], | |
| 'cp': [int(cp.split("(")[1].strip(")"))], | |
| 'trestbps': [trestbps], | |
| 'chol': [chol], | |
| 'fbs': [1 if fbs.startswith("Yes") else 0], | |
| 'restecg': [int(restecg.split("(")[1].strip(")"))], | |
| 'thalach': [thalach], | |
| 'exang': [1 if exang.startswith("Yes") else 0], | |
| 'oldpeak': [oldpeak], | |
| 'slope': [int(slope.split("(")[1].strip(")"))], | |
| 'ca': [ca], | |
| 'thal': [int(thal.split("(")[1].strip(")"))], | |
| }) | |
| # predictions | |
| results, display_data = predict_heart_disease(user_input) | |
| if results is not None: | |
| st.subheader("Prediction Results") | |
| # formatted results | |
| st.markdown(f""" | |
| ### Heart Disease Prediction Results | |
| **Using threshold:** {optimal_threshold:.3f} | |
| """) | |
| # results section | |
| with st.expander("View Detailed Results"): | |
| st.dataframe(display_data) | |
| # risk assessment | |
| probability = results['Probability'].iloc[0] | |
| prediction = results['Diagnosis'].iloc[0] | |
| if probability > 0.7: | |
| risk_level = "High" | |
| recommendation = "Immediate consultation with cardiologist recommended" | |
| color = "red" | |
| elif probability > 0.4: | |
| risk_level = "Medium" | |
| recommendation = "Further tests recommended" | |
| color = "orange" | |
| else: | |
| risk_level = "Low" | |
| recommendation = "No immediate concerns, maintain regular checkups" | |
| color = "green" | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| st.metric("Prediction", prediction) | |
| with col2: | |
| st.metric("Probability", f"{probability * 100:.2f}%") | |
| with col3: | |
| st.metric("Risk Level", risk_level) | |
| # recommendation | |
| st.markdown(f""" | |
| <div style='background-color:#f0f2f6; padding:10px; border-radius:5px;'> | |
| <h4 style='color:{color};'>Recommendation: {recommendation}</h4> | |
| </div> | |
| """, unsafe_allow_html=True) | |
| with tab2: | |
| st.header("Batch Prediction") | |
| uploaded_file = st.file_uploader("Upload CSV file with patient data", type=["csv"]) | |
| if uploaded_file is not None: | |
| try: | |
| test_data = pd.read_csv(uploaded_file) | |
| st.success("File uploaded successfully!") | |
| # required columns | |
| required_cols = ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', | |
| 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal'] | |
| missing_cols = [col for col in required_cols if col not in test_data.columns] | |
| if missing_cols: | |
| st.error(f"Missing required columns: {', '.join(missing_cols)}") | |
| else: | |
| # predictions | |
| results, display_data = predict_heart_disease(test_data) | |
| if results is not None: | |
| st.subheader("Prediction Results") | |
| # summary statistics | |
| st.markdown(f""" | |
| ### Batch Prediction Results | |
| **Using threshold:** {optimal_threshold:.3f} | |
| """) | |
| # results with original data | |
| full_results = test_data.copy() | |
| full_results['Probability'] = results['Probability'] | |
| full_results['Prediction'] = results['Prediction'] | |
| full_results['Diagnosis'] = results['Diagnosis'] | |
| # results section | |
| with st.expander("View All Predictions"): | |
| st.dataframe(full_results) | |
| # statistics | |
| st.subheader("Statistics") | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| st.metric("Total Patients", len(full_results)) | |
| with col2: | |
| st.metric("Heart Disease Cases", full_results['Prediction'].sum()) | |
| with col3: | |
| st.metric("Healthy Cases", len(full_results) - full_results['Prediction'].sum()) | |
| #download button | |
| csv = full_results.to_csv(index=False) | |
| st.download_button( | |
| "Download Results", | |
| csv, | |
| "heart_disease_predictions.csv", | |
| "text/csv" | |
| ) | |
| except Exception as e: | |
| st.error(f"Error processing file: {e}") | |
| sample_data = pd.DataFrame({ | |
| 'age': [52, 63, 45, 67, 58], | |
| 'sex': [1, 1, 0, 0, 1], | |
| 'cp': [3, 4, 2, 3, 4], | |
| 'trestbps': [125, 145, 130, 120, 136], | |
| 'chol': [212, 233, 204, 228, 319], | |
| 'fbs': [0, 1, 0, 0, 0], | |
| 'restecg': [0, 1, 0, 1, 0], | |
| 'thalach': [168, 150, 172, 129, 152], | |
| 'exang': [0, 0, 0, 1, 0], | |
| 'oldpeak': [1.0, 2.3, 1.4, 2.6, 0.0], | |
| 'slope': [2, 3, 1, 2, 1], | |
| 'ca': [2, 0, 0, 1, 0], | |
| 'thal': [3, 3, 3, 7, 3] | |
| }) | |
| with tab3: | |
| st.header("Data & Model Information") | |
| st.subheader("🧠 Model & System Info") | |
| st.markdown(""" | |
| - **Developed by:** Musabbir KM | |
| - **Model Name:** Heart-Guard | |
| - **Version:** 1.1 | |
| - **Classifier:** XGBoost | |
| - **Optimized Threshold:** 0.327 | |
| """) | |
| st.subheader("Dataset Information") | |
| st.markdown(""" | |
| The model was trained on the UCI Heart Disease Dataset containing the following features: | |
| - **Demographic**: Age, Sex | |
| - **Clinical**: Blood Pressure, Cholesterol, etc. | |
| - **Electrocardiographic**: Resting ECG, Exercise ST segment, etc. | |
| """) | |
| st.subheader("Sample Data") | |
| st.dataframe(sample_data) | |
| st.subheader("Model Performance") | |
| st.markdown(""" | |
| - **Accuracy**: 85.2% (on test set) | |
| - **Precision**: 83.1% | |
| - **Recall**: 87.5% | |
| - **F1-score**: 85.2% | |
| **📈 Additional Metrics:** | |
| - **ROC AUC:** `0.909` | |
| - **Sensitivity (Recall):** `0.95` _(for Heart Disease)_ | |
| - **Specificity:** `0.76` _(for Healthy)_ | |
| - **Balanced Accuracy:** `0.855` | |
| - **False Positive Rate (FPR):** `0.24` | |
| - **False Negative Rate (FNR):** `0.05` | |
| - **Precision (Heart Disease):** `0.80` | |
| - **Precision (Healthy):** `0.95` | |
| - **F1 Score (Overall):** `0.85` | |
| - **Support Size:** `46` patients | |
| """) | |
| st.subheader("Risk Interpretation Guide") | |
| st.markdown(""" | |
| - **High Risk (>70%)**: Strong recommendation for cardiologist consultation | |
| - **Medium Risk (40-70%)**: Suggest additional tests | |
| - **Low Risk (<40%)**: Likely healthy, maintain regular checkups | |
| """) | |
| st.subheader("Terms of Use") | |
| st.markdown(""" | |
| This tool is for informational purposes only and should not replace | |
| professional medical advice. Always consult a healthcare provider | |
| for medical diagnosis and treatment. | |
| """) | |
| # Sidebar with info | |
| with st.sidebar: | |
| st.title("❤️ Heart Disease Prediction") | |
| st.markdown(""" | |
| ## 🧠 Model & System Info | |
| This application predicts the likelihood of heart disease based on clinical features using a machine learning model. | |
| - **Developed by:** Musabbir KM | |
| - **Model Name:** Heart-Guard | |
| - **Version:** 1.1 | |
| ### Model Information | |
| - **Algorithm**: Random Forest Classifier | |
| - **Dataset**: UCI Heart Disease Dataset | |
| - **Optimal Threshold**: {:.3f} | |
| - **Version**: 1.1 | |
| ### How It Works | |
| 1. Enter patient details | |
| 2. Click 'Predict' button | |
| 3. View prediction results | |
| """.format(optimal_threshold)) | |
| st.markdown("---") | |
| st.markdown(""" | |
| ### Feature Descriptions | |
| - **Age**: Patient's age in years | |
| - **Sex**: Gender (1 = Male, 0 = Female) | |
| - **CP**: Chest pain type (1-4) | |
| - **Trestbps**: Resting blood pressure (mmHg) | |
| - **Chol**: Serum cholesterol (mg/dl) | |
| - **FBS**: Fasting blood sugar > 120 mg/dl | |
| - **Restecg**: Resting ECG results | |
| - **Thalach**: Maximum heart rate achieved | |
| - **Exang**: Exercise induced angina | |
| - **Oldpeak**: ST depression induced by exercise | |
| - **Slope**: Slope of peak exercise ST segment | |
| - **CA**: Number of major vessels colored by fluoroscopy | |
| - **Thal**: Thalassemia (3,6,7) | |
| """) | |