Spaces:

mr-usman
/

colelithiasis

Sleeping

App Files Files Community

mr-usman commited on Jan 27, 2025

Commit

461222e

verified ·

1 Parent(s): fd6fbf7

Upload app.py

Browse files

Files changed (1) hide show

app.py +314 -0

app.py ADDED Viewed

	@@ -0,0 +1,314 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+import joblib
+from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
+from plotly import graph_objects as go
+st.set_page_config(layout="wide")
+# Load Dataset
+def load_data():
+    data = pd.read_excel(r'Model Training/colelithiasis_dataset.xlsx')  # Update with your dataset file path
+    data.drop('Patient No.', axis=1, inplace=True)
+    return data
+# Initialize Session State
+if "data" not in st.session_state:
+    st.session_state.data = load_data()
+def introduction_page():
+    st.title("Introduction")
+    st.markdown("""
+    ## Project Overview
+    This project analyzes the Colelithiasis dataset to perform exploratory data analysis (EDA) and prediction using pre-trained machine learning models. The goal is to provide insights into the data and make predictions efficiently.
+    ## Objectives
+    - Perform EDA to uncover patterns and insights.
+    - Use pre-trained machine learning models for predictions.
+    - Create an interactive Streamlit application.
+    """)
+def stats_page():
+    st.title("Exploratory Data Analysis")
+    # Dataset Overview
+    st.subheader("Dataset Overview")
+    st.dataframe(st.session_state.data.head())
+    # Summary Statistics
+    st.subheader("Summary Statistics")
+    st.write(st.session_state.data.describe())
+    # Correlation Matrix
+    st.subheader("Correlation Analysis")
+    # encode the target variable
+    data = st.session_state.data.copy()
+    data['Health_status'].replace({'healthy': 0, 'patient': 1}, inplace=True)
+    # apply ordinal encoding to the categorical columns
+    categorical_columns = ['Gender','Family history','Obese/non obese']
+    encoder = joblib.load('Model Training\encoder.pkl')
+    data[categorical_columns] = encoder.transform(data[categorical_columns])
+    correlation = data.corr()
+    plt.figure(figsize=(5, 3))
+    # reduce the font size of the heatmap
+    sns.set(font_scale=0.5)
+    sns.heatmap(correlation, annot=True, cmap="coolwarm", fmt=".2f")
+    st.pyplot(plt, use_container_width=False)
+def eda_page():
+    st.title("Exploratory Data Analysis")
+    # Interactive Visualizations
+    st.subheader("Visualizations")
+    chart_type = st.selectbox("Choose Chart Type", ["Histogram", "Scatter Plot", "Box Plot"])
+    if chart_type == "Histogram":
+        column = st.selectbox("Choose Column for Visualization", st.session_state.data.columns)
+        fig = go.Figure()
+        fig.add_trace(go.Histogram(x=st.session_state.data[column], name=column, marker_color="indigo"))
+        fig.update_layout(
+            title=dict(text="Histogram Analysis", x=0.5, font=dict(size=22)),
+            xaxis_title=column,
+            yaxis_title="Count",
+            legend=dict(title="Legend", orientation="h", x=0.5, xanchor="center"),
+            bargap=0.2,
+            hovermode="x unified",
+            template="plotly_dark"
+        )
+        st.plotly_chart(fig)
+    elif chart_type == "Scatter Plot":
+        x_col = st.selectbox("Choose X-axis Column", st.session_state.data.columns)
+        y_col = st.selectbox("Choose Y-axis Column", st.session_state.data.columns)
+        fig = go.Figure()
+        fig.add_trace(go.Scatter(
+            x=st.session_state.data[x_col],
+            y=st.session_state.data[y_col],
+            mode="markers",
+            marker=dict(size=10, color="purple", line=dict(width=1, color="white")),
+            name=f"{y_col} vs {x_col}"
+        ))
+        fig.update_layout(
+            title=dict(text="Scatter Plot Analysis", x=0.5, font=dict(size=22)),
+            xaxis_title=x_col,
+            yaxis_title=y_col,
+            legend=dict(title="Legend", orientation="h", x=0.5, xanchor="center"),
+            hovermode="closest",
+            template="plotly_dark"
+        )
+        st.plotly_chart(fig)
+    elif chart_type == "Box Plot":
+        column = st.selectbox("Choose Column for Visualization", st.session_state.data.columns)
+        fig = go.Figure()
+        fig.add_trace(go.Box(
+            y=st.session_state.data[column],
+            name=column,
+            boxmean="sd",
+            marker_color="teal"
+        ))
+        fig.update_layout(
+            title=dict(text="Boxplot Analysis", x=0.5, font=dict(size=22)),
+            yaxis_title=column,
+            legend=dict(title="Legend", orientation="h", x=0.5, xanchor="center"),
+            hovermode="y",
+            template="plotly_dark"
+        )
+        st.plotly_chart(fig)
+def model_page():
+    st.title("Model Evaluation")
+    test_data = pd.read_excel(r'Model Training\test_data.xlsx')
+    # encode the target variable
+    test_data['Health_status'].replace({'healthy': 0, 'patient': 1}, inplace=True)
+    # apply ordinal encoding to the categorical columns
+    categorical_columns = ['Gender','Family history','Obese/non obese']
+    encoder = joblib.load('Model Training\encoder.pkl')
+    X = test_data.drop( columns=['Health_status'])
+    X[categorical_columns] = encoder.transform(X[categorical_columns])
+    y = test_data['Health_status']
+    # apply standard scalling to numberical features in X
+    numerical_columns = [col_name for col_name in X.columns if col_name not in categorical_columns]
+    scaler = joblib.load('Model Training\scaler.pkl')
+    X[numerical_columns] = scaler.transform(X[numerical_columns])
+    # Model Selection
+    st.text("Model Selection")
+    model_choice = st.selectbox("Choose a Pre-trained Model", ["SVM - Linear", "SVM - Polynomial", "SVM - RBF",
+                                                                 "Random Forest","Random Forest Boosted", "Logistic Regression", "GDA"])
+    # Load pre-trained model
+    model = None
+    if model_choice == "SVM - Linear":
+        model = joblib.load('Model Training\svm_model_linear.pkl')
+    elif model_choice == "SVM - Polynomial":
+        model = joblib.load('Model Training\svm_model_poly.pkl')
+    elif model_choice == "SVM - RBF":
+        model = joblib.load('Model Training\svm_model_rbf.pkl')
+    elif model_choice == "Random Forest":
+        model = joblib.load('Model Training\rf_model.pkl')
+    elif model_choice == "Random Forest Boosted":
+        model = joblib.load('Model Training\rf_boosted.pkl')
+    elif model_choice == "Logistic Regression":
+        model = joblib.load('Model Training\lr_model.pkl')
+    elif model_choice == "GDA":
+        model = joblib.load('Model Training\gda.pkl')
+    if model:
+        # Make Predictions
+        y_pred = model.predict(X)
+        col1, col2 = st.columns(2)
+        with col1:
+            st.subheader("### Predictions on the Test Data:")
+            st.dataframe(pd.DataFrame({"Actual": y, "Predicted": y_pred}))
+        with col2:
+            st.subheader("Classification Report")
+            report = classification_report(y, y_pred, output_dict=True)
+            report_df = pd.DataFrame(report).transpose().reset_index()
+            report_df.drop('support', axis=1, inplace=True)
+            report_df.set_index(['index'], inplace=True)
+            report_df.rename(index={'0.0': 'Negative', '1.0': 'Positive'}, inplace=True)
+            report_df.iloc[report_df.index.get_loc('accuracy'), 0:2] = ''
+            st.table(report_df)
+        st.subheader("Confusion Matrix")
+        conf_matrix = confusion_matrix(y, y_pred)
+        # Generate text annotations for the confusion matrix
+        text_annotations = np.array([[str(value) for value in row] for row in conf_matrix])
+        col1, col2 = st.columns(2)
+        with col1:
+            # Create the heatmap using seaborn
+            plt.figure(figsize=(3 , 3))
+            sns.heatmap(conf_matrix, annot=text_annotations, fmt="", cmap="Blues", cbar=False, square=True)
+            plt.xlabel("Predicted")
+            plt.ylabel("Actual")
+            plt.title("Confusion Matrix")
+            st.pyplot(plt)
+def prediction_page():
+    st.title("Get Your Diagnosis")
+    st.subheader("Symptoms Entry Form")
+    # Model Selection
+    model_choice = st.selectbox("Choose a Pre-trained Model", ["SVM - Linear", "SVM - Polynomial", "SVM - RBF",
+                                                                "Random Forest","Random Forest Boosted", "Logistic Regression", "GDA"])
+    # Load pre-trained model
+    model = None
+    if model_choice == "SVM - Linear":
+        model = joblib.load('Model Training\svm_model_linear.pkl')
+    elif model_choice == "SVM - Polynomial":
+        model = joblib.load('Model Training\svm_model_poly.pkl')
+    elif model_choice == "SVM - RBF":
+        model = joblib.load('Model Training\svm_model_rbf.pkl')
+    elif model_choice == "Random Forest":
+        model = joblib.load('Model Training\rf_model.pkl')
+    elif model_choice == "Random Forest Boosted":
+        model = joblib.load('Model Training\rf_boosted.pkl')
+    elif model_choice == "Logistic Regression":
+        model = joblib.load('Model Training\lr_model.pkl')
+    elif model_choice == "GDA":
+        model = joblib.load('Model Training\gda.pkl')
+    with st.form(key="health_data_form"):
+        col1, col2, col3, col4 = st.columns(4)
+        with col1:
+            # Categorical features with dropdown selection
+            gender = st.selectbox("Gender", ["Male", "Female"], key="gender")
+            weight = st.number_input("Weight (kg)", min_value=0, step=1, key="weight")
+            cholesterol = st.number_input("Cholesterol (mg/dL)", min_value=0, step=1, key="cholesterol")
+        with col2:
+            family_history = st.selectbox("Family History of Illness", ["Yes", "No"], key="family_history")
+            bmi = st.number_input("BMI", min_value=0.0, step=0.1, key="bmi")
+            triglycerides = st.number_input("Triglycerides Level (mg/dL)", min_value=0, step=1, key="triglycerides")
+        with col3:
+            height = st.number_input("Height (cm)", min_value=0.0, step=0.1, key="height")
+            obese_status = st.selectbox("Obese/Non Obese", ["Obese", "Non-Obese"], key="obese_status")
+            ldl = st.number_input("LDL Level (mg/dL)", min_value=0.0, step=0.1, key="ldl")
+        with col4:
+            vldl = st.number_input("VLDL Level (mg/dL)", min_value=0.0, step=0.1, key="vldl")
+        # Submit button
+        submit_button = st.form_submit_button(label="Submit" )
+    if submit_button:
+        # Create a DataFrame directly with the user input data
+        data = pd.DataFrame({
+            "Gender": [gender],
+            "Family history": [family_history],
+            "Height": [height],
+            "Weight": [weight],
+            "BMI": [bmi],
+            "Obese/non obese": [obese_status],
+            "Cholesterol": [cholesterol],
+            "Triglycerides": [triglycerides],
+            "LDL level": [ldl],
+            "VLDL level": [vldl]
+        })
+        columns = ['Gender', 'Family history', 'Height', 'Weight', 'BMI', 'Obese/non obese', 'Cholesterol', 'Triglycerides level', 'LDL level', 'VLDL level']
+        data = data.reindex(columns=columns, fill_value=0)
+        categorical_columns = ['Gender','Family history','Obese/non obese']
+        numerical_columns = [col_name for col_name in data.columns if col_name not in categorical_columns]
+        # Encoding categorical data
+        encoder = joblib.load('Model Training\encoder.pkl')
+        data[categorical_columns] = encoder.transform(data[categorical_columns])
+        # Scaling the numeric features
+        scaler = joblib.load('Model Training\scaler.pkl')
+        data[numerical_columns] = scaler.transform(data[numerical_columns])
+        prediction = int(model.predict(data)[0])
+        st.write(f"### Predicted Diagnosis: {'Positive' if prediction == 1 else 'Negative'}")
+def conclusion_page():
+    st.title("Conclusion")
+    st.markdown("""
+    ## Key Takeaways
+    - Comprehensive EDA provides actionable insights into the data.
+    - Pre-trained machine learning models allow efficient predictions.
+    - The interactive app makes the analysis accessible and engaging.
+    Thank you for exploring this project!
+    """)
+# Sidebar Navigation Menu with radio buttons for page selection
+page = st.sidebar.radio("Navigation Menu", ["Introduction","Descriptive Statistics", "Data Analytics", "Model Evaluation", "Get Your Diagnosis", "Conclusion"])
+if page == "Introduction":
+    introduction_page()
+elif page == "Descriptive Statistics":
+    stats_page()
+elif page == "Data Analytics":
+    eda_page()
+elif page == "Model Evaluation":
+    model_page()
+elif page == "Get Your Diagnosis":
+    prediction_page()
+elif page == "Conclusion":
+    conclusion_page()