mr-usman commited on
Commit
461222e
·
verified ·
1 Parent(s): fd6fbf7

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +314 -0
app.py ADDED
@@ -0,0 +1,314 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+ import joblib
7
+ from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
8
+ from plotly import graph_objects as go
9
+
10
+ st.set_page_config(layout="wide")
11
+
12
+ # Load Dataset
13
+ def load_data():
14
+ data = pd.read_excel(r'Model Training/colelithiasis_dataset.xlsx') # Update with your dataset file path
15
+ data.drop('Patient No.', axis=1, inplace=True)
16
+ return data
17
+
18
+ # Initialize Session State
19
+ if "data" not in st.session_state:
20
+ st.session_state.data = load_data()
21
+
22
+ def introduction_page():
23
+ st.title("Introduction")
24
+ st.markdown("""
25
+ ## Project Overview
26
+ This project analyzes the Colelithiasis dataset to perform exploratory data analysis (EDA) and prediction using pre-trained machine learning models. The goal is to provide insights into the data and make predictions efficiently.
27
+
28
+ ## Objectives
29
+ - Perform EDA to uncover patterns and insights.
30
+ - Use pre-trained machine learning models for predictions.
31
+ - Create an interactive Streamlit application.
32
+ """)
33
+
34
+ def stats_page():
35
+ st.title("Exploratory Data Analysis")
36
+
37
+ # Dataset Overview
38
+ st.subheader("Dataset Overview")
39
+ st.dataframe(st.session_state.data.head())
40
+
41
+ # Summary Statistics
42
+ st.subheader("Summary Statistics")
43
+ st.write(st.session_state.data.describe())
44
+
45
+ # Correlation Matrix
46
+ st.subheader("Correlation Analysis")
47
+
48
+ # encode the target variable
49
+ data = st.session_state.data.copy()
50
+ data['Health_status'].replace({'healthy': 0, 'patient': 1}, inplace=True)
51
+
52
+ # apply ordinal encoding to the categorical columns
53
+ categorical_columns = ['Gender','Family history','Obese/non obese']
54
+ encoder = joblib.load('Model Training\encoder.pkl')
55
+ data[categorical_columns] = encoder.transform(data[categorical_columns])
56
+
57
+ correlation = data.corr()
58
+ plt.figure(figsize=(5, 3))
59
+ # reduce the font size of the heatmap
60
+ sns.set(font_scale=0.5)
61
+ sns.heatmap(correlation, annot=True, cmap="coolwarm", fmt=".2f")
62
+ st.pyplot(plt, use_container_width=False)
63
+
64
+ def eda_page():
65
+ st.title("Exploratory Data Analysis")
66
+
67
+ # Interactive Visualizations
68
+ st.subheader("Visualizations")
69
+ chart_type = st.selectbox("Choose Chart Type", ["Histogram", "Scatter Plot", "Box Plot"])
70
+
71
+ if chart_type == "Histogram":
72
+ column = st.selectbox("Choose Column for Visualization", st.session_state.data.columns)
73
+ fig = go.Figure()
74
+ fig.add_trace(go.Histogram(x=st.session_state.data[column], name=column, marker_color="indigo"))
75
+ fig.update_layout(
76
+ title=dict(text="Histogram Analysis", x=0.5, font=dict(size=22)),
77
+ xaxis_title=column,
78
+ yaxis_title="Count",
79
+ legend=dict(title="Legend", orientation="h", x=0.5, xanchor="center"),
80
+ bargap=0.2,
81
+ hovermode="x unified",
82
+ template="plotly_dark"
83
+ )
84
+ st.plotly_chart(fig)
85
+
86
+ elif chart_type == "Scatter Plot":
87
+ x_col = st.selectbox("Choose X-axis Column", st.session_state.data.columns)
88
+ y_col = st.selectbox("Choose Y-axis Column", st.session_state.data.columns)
89
+ fig = go.Figure()
90
+ fig.add_trace(go.Scatter(
91
+ x=st.session_state.data[x_col],
92
+ y=st.session_state.data[y_col],
93
+ mode="markers",
94
+ marker=dict(size=10, color="purple", line=dict(width=1, color="white")),
95
+ name=f"{y_col} vs {x_col}"
96
+ ))
97
+ fig.update_layout(
98
+ title=dict(text="Scatter Plot Analysis", x=0.5, font=dict(size=22)),
99
+ xaxis_title=x_col,
100
+ yaxis_title=y_col,
101
+ legend=dict(title="Legend", orientation="h", x=0.5, xanchor="center"),
102
+ hovermode="closest",
103
+ template="plotly_dark"
104
+ )
105
+ st.plotly_chart(fig)
106
+
107
+ elif chart_type == "Box Plot":
108
+ column = st.selectbox("Choose Column for Visualization", st.session_state.data.columns)
109
+ fig = go.Figure()
110
+ fig.add_trace(go.Box(
111
+ y=st.session_state.data[column],
112
+ name=column,
113
+ boxmean="sd",
114
+ marker_color="teal"
115
+ ))
116
+ fig.update_layout(
117
+ title=dict(text="Boxplot Analysis", x=0.5, font=dict(size=22)),
118
+ yaxis_title=column,
119
+ legend=dict(title="Legend", orientation="h", x=0.5, xanchor="center"),
120
+ hovermode="y",
121
+ template="plotly_dark"
122
+ )
123
+ st.plotly_chart(fig)
124
+
125
+
126
+ def model_page():
127
+ st.title("Model Evaluation")
128
+ test_data = pd.read_excel(r'Model Training\test_data.xlsx')
129
+
130
+
131
+ # encode the target variable
132
+ test_data['Health_status'].replace({'healthy': 0, 'patient': 1}, inplace=True)
133
+
134
+ # apply ordinal encoding to the categorical columns
135
+ categorical_columns = ['Gender','Family history','Obese/non obese']
136
+ encoder = joblib.load('Model Training\encoder.pkl')
137
+
138
+ X = test_data.drop( columns=['Health_status'])
139
+ X[categorical_columns] = encoder.transform(X[categorical_columns])
140
+ y = test_data['Health_status']
141
+
142
+ # apply standard scalling to numberical features in X
143
+ numerical_columns = [col_name for col_name in X.columns if col_name not in categorical_columns]
144
+ scaler = joblib.load('Model Training\scaler.pkl')
145
+ X[numerical_columns] = scaler.transform(X[numerical_columns])
146
+
147
+ # Model Selection
148
+ st.text("Model Selection")
149
+ model_choice = st.selectbox("Choose a Pre-trained Model", ["SVM - Linear", "SVM - Polynomial", "SVM - RBF",
150
+ "Random Forest","Random Forest Boosted", "Logistic Regression", "GDA"])
151
+
152
+ # Load pre-trained model
153
+ model = None
154
+ if model_choice == "SVM - Linear":
155
+ model = joblib.load('Model Training\svm_model_linear.pkl')
156
+ elif model_choice == "SVM - Polynomial":
157
+ model = joblib.load('Model Training\svm_model_poly.pkl')
158
+ elif model_choice == "SVM - RBF":
159
+ model = joblib.load('Model Training\svm_model_rbf.pkl')
160
+ elif model_choice == "Random Forest":
161
+ model = joblib.load('Model Training\rf_model.pkl')
162
+ elif model_choice == "Random Forest Boosted":
163
+ model = joblib.load('Model Training\rf_boosted.pkl')
164
+ elif model_choice == "Logistic Regression":
165
+ model = joblib.load('Model Training\lr_model.pkl')
166
+ elif model_choice == "GDA":
167
+ model = joblib.load('Model Training\gda.pkl')
168
+
169
+
170
+ if model:
171
+ # Make Predictions
172
+ y_pred = model.predict(X)
173
+ col1, col2 = st.columns(2)
174
+ with col1:
175
+ st.subheader("### Predictions on the Test Data:")
176
+ st.dataframe(pd.DataFrame({"Actual": y, "Predicted": y_pred}))
177
+
178
+ with col2:
179
+ st.subheader("Classification Report")
180
+ report = classification_report(y, y_pred, output_dict=True)
181
+ report_df = pd.DataFrame(report).transpose().reset_index()
182
+ report_df.drop('support', axis=1, inplace=True)
183
+ report_df.set_index(['index'], inplace=True)
184
+ report_df.rename(index={'0.0': 'Negative', '1.0': 'Positive'}, inplace=True)
185
+ report_df.iloc[report_df.index.get_loc('accuracy'), 0:2] = ''
186
+ st.table(report_df)
187
+
188
+ st.subheader("Confusion Matrix")
189
+ conf_matrix = confusion_matrix(y, y_pred)
190
+ # Generate text annotations for the confusion matrix
191
+ text_annotations = np.array([[str(value) for value in row] for row in conf_matrix])
192
+
193
+ col1, col2 = st.columns(2)
194
+ with col1:
195
+ # Create the heatmap using seaborn
196
+ plt.figure(figsize=(3 , 3))
197
+ sns.heatmap(conf_matrix, annot=text_annotations, fmt="", cmap="Blues", cbar=False, square=True)
198
+ plt.xlabel("Predicted")
199
+ plt.ylabel("Actual")
200
+ plt.title("Confusion Matrix")
201
+ st.pyplot(plt)
202
+
203
+
204
+ def prediction_page():
205
+ st.title("Get Your Diagnosis")
206
+ st.subheader("Symptoms Entry Form")
207
+ # Model Selection
208
+ model_choice = st.selectbox("Choose a Pre-trained Model", ["SVM - Linear", "SVM - Polynomial", "SVM - RBF",
209
+ "Random Forest","Random Forest Boosted", "Logistic Regression", "GDA"])
210
+
211
+ # Load pre-trained model
212
+ model = None
213
+ if model_choice == "SVM - Linear":
214
+ model = joblib.load('Model Training\svm_model_linear.pkl')
215
+ elif model_choice == "SVM - Polynomial":
216
+ model = joblib.load('Model Training\svm_model_poly.pkl')
217
+ elif model_choice == "SVM - RBF":
218
+ model = joblib.load('Model Training\svm_model_rbf.pkl')
219
+ elif model_choice == "Random Forest":
220
+ model = joblib.load('Model Training\rf_model.pkl')
221
+ elif model_choice == "Random Forest Boosted":
222
+ model = joblib.load('Model Training\rf_boosted.pkl')
223
+ elif model_choice == "Logistic Regression":
224
+ model = joblib.load('Model Training\lr_model.pkl')
225
+ elif model_choice == "GDA":
226
+ model = joblib.load('Model Training\gda.pkl')
227
+
228
+ with st.form(key="health_data_form"):
229
+ col1, col2, col3, col4 = st.columns(4)
230
+
231
+ with col1:
232
+ # Categorical features with dropdown selection
233
+ gender = st.selectbox("Gender", ["Male", "Female"], key="gender")
234
+ weight = st.number_input("Weight (kg)", min_value=0, step=1, key="weight")
235
+ cholesterol = st.number_input("Cholesterol (mg/dL)", min_value=0, step=1, key="cholesterol")
236
+ with col2:
237
+ family_history = st.selectbox("Family History of Illness", ["Yes", "No"], key="family_history")
238
+ bmi = st.number_input("BMI", min_value=0.0, step=0.1, key="bmi")
239
+ triglycerides = st.number_input("Triglycerides Level (mg/dL)", min_value=0, step=1, key="triglycerides")
240
+
241
+ with col3:
242
+ height = st.number_input("Height (cm)", min_value=0.0, step=0.1, key="height")
243
+ obese_status = st.selectbox("Obese/Non Obese", ["Obese", "Non-Obese"], key="obese_status")
244
+ ldl = st.number_input("LDL Level (mg/dL)", min_value=0.0, step=0.1, key="ldl")
245
+
246
+ with col4:
247
+ vldl = st.number_input("VLDL Level (mg/dL)", min_value=0.0, step=0.1, key="vldl")
248
+
249
+
250
+
251
+ # Submit button
252
+ submit_button = st.form_submit_button(label="Submit" )
253
+
254
+ if submit_button:
255
+ # Create a DataFrame directly with the user input data
256
+ data = pd.DataFrame({
257
+ "Gender": [gender],
258
+ "Family history": [family_history],
259
+ "Height": [height],
260
+ "Weight": [weight],
261
+ "BMI": [bmi],
262
+ "Obese/non obese": [obese_status],
263
+ "Cholesterol": [cholesterol],
264
+ "Triglycerides": [triglycerides],
265
+ "LDL level": [ldl],
266
+ "VLDL level": [vldl]
267
+ })
268
+
269
+
270
+ columns = ['Gender', 'Family history', 'Height', 'Weight', 'BMI', 'Obese/non obese', 'Cholesterol', 'Triglycerides level', 'LDL level', 'VLDL level']
271
+ data = data.reindex(columns=columns, fill_value=0)
272
+
273
+ categorical_columns = ['Gender','Family history','Obese/non obese']
274
+ numerical_columns = [col_name for col_name in data.columns if col_name not in categorical_columns]
275
+ # Encoding categorical data
276
+ encoder = joblib.load('Model Training\encoder.pkl')
277
+ data[categorical_columns] = encoder.transform(data[categorical_columns])
278
+
279
+ # Scaling the numeric features
280
+ scaler = joblib.load('Model Training\scaler.pkl')
281
+ data[numerical_columns] = scaler.transform(data[numerical_columns])
282
+
283
+
284
+
285
+ prediction = int(model.predict(data)[0])
286
+ st.write(f"### Predicted Diagnosis: {'Positive' if prediction == 1 else 'Negative'}")
287
+
288
+
289
+ def conclusion_page():
290
+ st.title("Conclusion")
291
+ st.markdown("""
292
+ ## Key Takeaways
293
+ - Comprehensive EDA provides actionable insights into the data.
294
+ - Pre-trained machine learning models allow efficient predictions.
295
+ - The interactive app makes the analysis accessible and engaging.
296
+
297
+ Thank you for exploring this project!
298
+ """)
299
+
300
+ # Sidebar Navigation Menu with radio buttons for page selection
301
+ page = st.sidebar.radio("Navigation Menu", ["Introduction","Descriptive Statistics", "Data Analytics", "Model Evaluation", "Get Your Diagnosis", "Conclusion"])
302
+
303
+ if page == "Introduction":
304
+ introduction_page()
305
+ elif page == "Descriptive Statistics":
306
+ stats_page()
307
+ elif page == "Data Analytics":
308
+ eda_page()
309
+ elif page == "Model Evaluation":
310
+ model_page()
311
+ elif page == "Get Your Diagnosis":
312
+ prediction_page()
313
+ elif page == "Conclusion":
314
+ conclusion_page()