HugMeBytes commited on
Commit
7a9862d
ยท
verified ยท
1 Parent(s): 82ca89b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +338 -343
app.py CHANGED
@@ -1,343 +1,338 @@
1
- import streamlit as st
2
- import pandas as pd
3
- import seaborn as sns
4
- import matplotlib.pyplot as plt
5
- import joblib
6
- import numpy as np
7
- from fpdf import FPDF
8
- import tempfile
9
- import plotly.express as px
10
-
11
- # Load the dataset
12
- @st.cache_data
13
- def load_data():
14
- return pd.read_csv("Obesity prediction.csv")
15
-
16
- df = load_data()
17
-
18
- # Load the model and supporting objects
19
- @st.cache_resource
20
- def load_model():
21
- model_bundle = joblib.load("obesity_model.pkl")
22
- model = model_bundle['model']
23
- label_encoders = model_bundle['encoders']
24
- scaler = model_bundle['scaler']
25
- feature_names = model_bundle['feature_names']
26
- return model, scaler, label_encoders, feature_names
27
-
28
- model, scaler, label_encoders, feature_names = load_model()
29
-
30
- # Emoji mapping
31
- emoji_map = {
32
- "FAVC": "๐Ÿ•", "CH2O": "๐Ÿงƒ", "Gender": "๐Ÿšป", "Age": "๐ŸŽ‚", "Height": "๐Ÿ“", "Weight": "โš–๏ธ",
33
- "family_history": "๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ", "FCVC": "๐Ÿฅ—", "NCP": "๐Ÿฝ๏ธ", "CAEC": "๐Ÿฉ", "SMOKE": "๐Ÿšฌ",
34
- "SCC": "๐Ÿ“Š", "FAF": "๐Ÿƒ", "TUE": "๐Ÿ’ป", "CALC": "๐Ÿท", "MTRANS": "๐ŸšŒ"
35
- }
36
-
37
- # Navigation Sidebar
38
- st.sidebar.title("๐Ÿงญ Navigation")
39
- page = st.sidebar.radio("Go to", ["๐Ÿ“˜ Introduction", "๐Ÿ“Š EDA", "๐Ÿ”ฎ Predict", "๐Ÿ“Œ Feature Importance"])
40
-
41
- # Introduction Page
42
- if page == "๐Ÿ“˜ Introduction":
43
- st.title("๐Ÿฅ Obesity Prediction Analysis")
44
- st.markdown("""
45
- ## ๐Ÿ“Œ Project Overview
46
- This project analyzes factors contributing to obesity and builds machine learning models to predict obesity levels.
47
-
48
- ๐ŸŽฏ **Objective:** Identify key lifestyle and physical attributes that influence obesity and use them for accurate predictions.
49
-
50
- ### ๐Ÿ“‚ Dataset Information:
51
- """)
52
-
53
- col1, col2, col3 = st.columns(3)
54
- with col1:
55
- st.metric("๐Ÿ“„ Total Records", df.shape[0])
56
- with col2:
57
- st.metric("๐Ÿงพ Total Features", df.shape[1])
58
- with col3:
59
- st.metric("โš–๏ธ Obesity Classes", df['Obesity'].nunique())
60
-
61
- st.subheader("๐Ÿ” Sample Data")
62
- # for col in df.columns:
63
- # if df[col].dtype == 'object':
64
- # df[col] = df[col].apply(lambda x: str(x) if not isinstance(x, (int, float, bool, str)) else x)
65
-
66
- tempdf= df.astype(str)
67
- st.dataframe(tempdf.head())
68
-
69
- st.subheader("๐Ÿ“Š Dataset Features")
70
- st.markdown("""
71
- - **๐ŸŽฏ Target Variable:** `Obesity` โ€” Represents different levels of obesity.
72
- - **๐Ÿงฌ Input Features:** Includes both physical and behavioral attributes such as:
73
- - ๐Ÿ‘ค **Demographics:** `Age`, `Gender`
74
- - ๐Ÿ“ **Physical Metrics:** `Height`, `Weight`
75
- - ๐Ÿ” **Dietary Habits:** Frequent consumption of high-calorie food (FAVC), number of main meals, vegetable intake (FCVC), etc.
76
- - ๐Ÿƒ **Activity Level:** Physical activity frequency (FAF), use of technology (TUE), transportation type, etc.
77
- - ๐Ÿšฌ **Other Habits:** Smoking, alcohol intake, daily water intake (CH2O), etc.
78
- """)
79
-
80
- st.subheader("๐ŸŽฏ Project Goals")
81
- st.markdown("""
82
- - ๐Ÿ“ˆ Understand the correlation between lifestyle factors and obesity.
83
- - ๐Ÿง  Visualize and analyze health-related behavior patterns.
84
- - ๐Ÿค– Build a predictive model to classify individuals into obesity categories.
85
- """)
86
-
87
- st.subheader("โš™๏ธ How the Model Works")
88
- st.markdown("""
89
- - ๐Ÿงน Data is preprocessed using **Label Encoding** and **Feature Scaling**.
90
- - ๐ŸŒฒ A pre-trained **Random Forest Classifier** is used for making predictions.
91
- - ๐Ÿงพ Users enter their information through an interactive input form.
92
- - ๐Ÿ“Š The model processes inputs and returns a predicted **obesity level**.
93
- """)
94
-
95
-
96
- # ๐Ÿ“Š EDA Page
97
- elif page == "๐Ÿ“Š EDA":
98
- st.title("๐Ÿ“Š Exploratory Data Analysis")
99
-
100
- with st.expander("1๏ธโƒฃ ๐Ÿ“‹ Dataset Basic Information", expanded=False):
101
- col1, col2 = st.columns(2)
102
- tempdf1 = df
103
- with col1:
104
- st.write("๐Ÿงฌ **Data Types:**")
105
- st.write(tempdf1.dtypes)
106
- with col2:
107
- st.write("โ“ **Missing Values:**")
108
- st.write(tempdf1.isnull().sum())
109
-
110
- with st.expander("2๏ธโƒฃ ๐Ÿ“ˆ Summary Statistics"):
111
- st.write(tempdf1.describe())
112
-
113
- with st.expander("3๏ธโƒฃ ๐Ÿงฎ Obesity Distribution"):
114
- col1, col2 = st.columns(2)
115
-
116
- with col1:
117
- obesity_counts = df['Obesity'].value_counts().reset_index()
118
- obesity_counts.columns = ['Obesity Level', 'Count']
119
-
120
- fig = px.bar(
121
- obesity_counts,
122
- x='Obesity Level', y='Count',
123
- color='Obesity Level',
124
- color_discrete_sequence=px.colors.qualitative.Set3,
125
- labels={'Obesity Level': 'Obesity Level', 'Count': 'Count'},
126
- title="๐Ÿ“Š Obesity Levels Count",
127
- hover_data=['Count']
128
- )
129
- st.plotly_chart(fig)
130
-
131
- with col2:
132
- obesity_pct = df['Obesity'].value_counts(normalize=True).reset_index()
133
- obesity_pct.columns = ['Obesity Level', 'Proportion']
134
-
135
- fig = px.pie(
136
- obesity_pct,
137
- names='Obesity Level', values='Proportion',
138
- color_discrete_sequence=px.colors.qualitative.Pastel,
139
- title="๐Ÿ“Œ Obesity Distribution (%)",
140
- hole=0.3
141
- )
142
- st.plotly_chart(fig)
143
-
144
- with st.expander("4๏ธโƒฃ ๐Ÿ”ข Numerical Features Distribution"):
145
- numerical_cols = df.select_dtypes(include=[np.number]).columns[:4]
146
- for col in numerical_cols:
147
- fig = px.histogram(
148
- df, x=col,
149
- nbins=20,
150
- title=f"๐Ÿ“‰ Distribution of {col}",
151
- color_discrete_sequence=['#636EFA']
152
- )
153
- st.plotly_chart(fig)
154
-
155
- with st.expander("5๏ธโƒฃ ๐Ÿ”— Correlation Matrix"):
156
- corr = df.corr(numeric_only=True)
157
- fig = px.imshow(
158
- corr,
159
- text_auto=True,
160
- color_continuous_scale='RdBu_r',
161
- title="๐Ÿ“Š Feature Correlations",
162
- width=400,
163
- height=700
164
- )
165
- st.plotly_chart(fig)
166
-
167
- with st.expander("6๏ธโƒฃ ๐Ÿ“ฆ Outlier Detection (Box Plots)"):
168
- for col in df.select_dtypes(include=[np.number]).columns[:6]:
169
- fig = px.box(df, y=col, title=f"๐Ÿ“ฆ Box Plot for {col}")
170
- st.plotly_chart(fig)
171
-
172
- with st.expander("7๏ธโƒฃ ๐Ÿ˜ป Gender vs Obesity Analysis"):
173
- fig = px.histogram(
174
- df, x='Obesity', color='Gender',
175
- barmode='group',
176
- title="๐Ÿ˜ป Obesity Distribution by Gender",
177
- color_discrete_sequence=px.colors.qualitative.Vivid
178
- )
179
- st.plotly_chart(fig)
180
-
181
- with st.expander("8๏ธโƒฃ ๐Ÿ‘ถ Age vs Obesity Analysis"):
182
- fig = px.box(
183
- df, x='Obesity', y='Age',
184
- color='Obesity',
185
- title="๐Ÿ‘ถ Age Distribution by Obesity Level"
186
- )
187
- st.plotly_chart(fig)
188
-
189
- with st.expander("9๏ธโƒฃ ๐Ÿ‘จโ€๐Ÿ‘ง Family History vs Obesity"):
190
- fig = px.histogram(
191
- df, x='Obesity', color='family_history',
192
- barmode='group',
193
- title="๐Ÿ‘จโ€๐Ÿ‘ง Obesity Distribution by Family History"
194
- )
195
- st.plotly_chart(fig)
196
-
197
- with st.expander("๐Ÿ”น ๐Ÿƒโ€โ™‚๏ธ Physical Activity Frequency (FAF) Analysis"):
198
- fig = px.box(
199
- df, x='Obesity', y='FAF',
200
- color='Obesity',
201
- title="๐Ÿƒโ€โ™‚๏ธ Physical Activity Frequency by Obesity Level"
202
- )
203
- st.plotly_chart(fig)
204
-
205
- with st.expander("1๏ธโƒฃ 1๏ธโƒฃ ๐Ÿ’ง Water Consumption (CH2O) Analysis"):
206
- fig = px.box(
207
- df, x='Obesity', y='CH2O',
208
- color='Obesity',
209
- title="๐Ÿ’ง Daily Water Consumption by Obesity Level"
210
- )
211
- st.plotly_chart(fig)
212
-
213
- with st.expander("1๏ธโƒฃ 2๏ธโƒฃ ๐Ÿ• High Caloric Food Consumption (FAVC) Analysis"):
214
- fig = px.histogram(
215
- df, x='Obesity', color='FAVC',
216
- barmode='group',
217
- title="๐Ÿ• Obesity Distribution by High Caloric Food Consumption"
218
- )
219
- st.plotly_chart(fig)
220
-
221
- with st.expander("1๏ธโƒฃ 3๏ธโƒฃ ๐Ÿ’ป Technology Usage Time (TUE) Analysis"):
222
- fig = px.box(
223
- df, x='Obesity', y='TUE',
224
- color='Obesity',
225
- title="๐Ÿ’ป Technology Usage Time by Obesity Level"
226
- )
227
- st.plotly_chart(fig)
228
-
229
- with st.expander("1๏ธโƒฃ 4๏ธโƒฃ ๐Ÿท Alcohol Consumption (CALC) Analysis"):
230
- fig = px.histogram(
231
- df, x='Obesity', color='CALC',
232
- barmode='group',
233
- title="๐Ÿท Obesity Distribution by Alcohol Consumption"
234
- )
235
- st.plotly_chart(fig)
236
-
237
- with st.expander("1๏ธโƒฃ 5๏ธโƒฃ ๐Ÿš— Transportation Mode (MTRANS) vs Obesity"):
238
- fig = px.histogram(
239
- df, x='MTRANS', color='Obesity',
240
- barmode='group',
241
- title="๐Ÿš— Transportation Mode vs Obesity Levels"
242
- )
243
- st.plotly_chart(fig)
244
-
245
-
246
- # ๐Ÿ”ฎ Predict Page
247
- elif page == "๐Ÿ”ฎ Predict":
248
- st.title("๐Ÿ”ฎ Obesity Prediction")
249
- st.markdown("Fill in the details below to predict your obesity level:")
250
-
251
- col1, col2, col3 = st.columns(3)
252
- with col1:
253
- gender = st.selectbox("๐Ÿšป Gender", ["Male", "Female"])
254
- age = st.number_input("๐Ÿ“… Age", 10, 100, 25)
255
- height = st.number_input("๐Ÿ“ Height (m)", 1.0, 2.5, 1.70)
256
- weight = st.number_input("โš–๏ธ Weight (kg)", 30, 200, 70)
257
- family_history = st.selectbox("๐Ÿงฌ Family History of Obesity", ["yes", "no"])
258
-
259
- with col2:
260
- favc = st.selectbox("๐Ÿ” Frequent High-Calorie Food (FAVC)", ["yes", "no"])
261
- fcvc = st.slider("๐Ÿฅฆ Veggie Intake Frequency (FCVC)", 1.0, 3.0, 2.0)
262
- ncp = st.number_input("๐Ÿฝ๏ธ Number of Main Meals (NCP)", 1.0, 4.0, 3.0)
263
- caec = st.selectbox("๐ŸŸ Snacking Between Meals (CAEC)", ["no", "Sometimes", "Frequently", "Always"])
264
- smoke = st.selectbox("๐Ÿšฌ Do you Smoke?", ["yes", "no"])
265
-
266
- with col3:
267
- ch2o = st.slider("๐Ÿ’ง Water Intake (CH2O)", 0.0, 3.0, 1.0)
268
- scc = st.selectbox("๐Ÿ“‰ Calorie Monitoring (SCC)", ["yes", "no"])
269
- faf = st.slider("๐Ÿƒ Physical Activity (FAF)", 0.0, 3.0, 1.0)
270
- tue = st.slider("๐Ÿ“ฑ Tech Usage Time (TUE)", 0.0, 3.0, 1.0)
271
- calc = st.selectbox("๐Ÿท Alcohol (CALC)", ["no", "Sometimes", "Frequently", "Always"])
272
- mtrans = st.selectbox("๐Ÿš— Transport Mode (MTRANS)", ["Walking", "Public_Transportation", "Automobile", "Bike", "Motorbike"])
273
-
274
- input_data = {
275
- "Gender": gender, "Age": age, "Height": height, "Weight": weight,
276
- "family_history": family_history, "FAVC": favc, "FCVC": fcvc, "NCP": ncp,
277
- "CAEC": caec, "SMOKE": smoke, "CH2O": ch2o, "SCC": scc,
278
- "FAF": faf, "TUE": tue, "CALC": calc, "MTRANS": mtrans
279
- }
280
- if st.button("๐Ÿ” Predict"):
281
- input_df = pd.DataFrame([input_data])
282
-
283
- # Apply Label Encoding
284
- for col in input_df.columns:
285
- if col in label_encoders:
286
- input_df[col] = label_encoders[col].transform(input_df[col])
287
-
288
- # Scale features
289
- input_scaled = scaler.transform(input_df)
290
-
291
- # Predict
292
- prediction = model.predict(input_scaled)
293
-
294
- # โœ… Decode the numeric prediction
295
- decoded_prediction = label_encoders["Obesity"].inverse_transform([prediction[0]])[0]
296
-
297
- # Save decoded prediction in session_state for PDF/report use
298
- st.session_state["prediction"] = decoded_prediction
299
- st.session_state["input_data"] = input_data
300
-
301
- # Display result
302
- st.success(f"๐ŸŽฏ **Predicted Obesity Level**: `{decoded_prediction}`")
303
-
304
-
305
- # ๐Ÿ“Œ Feature Importance Page (Interactive with Plotly)
306
- elif page == "๐Ÿ“Œ Feature Importance":
307
- st.title("๐Ÿ“Œ Feature Importance")
308
-
309
- importances = model.feature_importances_
310
- sorted_idx = np.argsort(importances)[::-1]
311
- sorted_features = [feature_names[i] for i in sorted_idx]
312
- sorted_importances = importances[sorted_idx]
313
-
314
- # ๐Ÿ† Top 5 Features
315
- top_features = sorted_features[:5]
316
- top_importances = sorted_importances[:5]
317
- top_labels = [f"{emoji_map.get(f, '')} {f}" for f in top_features]
318
-
319
- # ๐Ÿ“Š Create Plotly Bar Chart
320
- fig = px.bar(
321
- x=top_importances[::-1],
322
- y=top_labels[::-1],
323
- orientation='h',
324
- labels={'x': 'Importance', 'y': 'Feature'},
325
- color=top_importances[::-1],
326
- color_continuous_scale='Turbo',
327
- title="๐ŸŽฏ Top 5 Influential Features",
328
- text=[f"{val:.2f}" for val in top_importances[::-1]]
329
- )
330
-
331
- fig.update_layout(
332
- xaxis_title="Importance Score",
333
- yaxis_title="",
334
- plot_bgcolor="rgba(0,0,0,0)",
335
- paper_bgcolor="rgba(0,0,0,0)",
336
- font=dict(size=14),
337
- coloraxis_showscale=False
338
- )
339
- fig.update_traces(textposition='outside', marker_line_width=1.2)
340
-
341
- st.plotly_chart(fig, use_container_width=True)
342
- st.markdown("โœจ These features contribute the most to your predicted obesity level.")
343
-
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import seaborn as sns
4
+ import matplotlib.pyplot as plt
5
+ import joblib
6
+ import numpy as np
7
+ import tempfile
8
+ import plotly.express as px
9
+
10
+ # Load the dataset
11
+ @st.cache_data
12
+ def load_data():
13
+ return pd.read_csv("Obesity prediction.csv")
14
+
15
+ df = load_data()
16
+
17
+ # Load the model and supporting objects
18
+ @st.cache_resource
19
+ def load_model():
20
+ model_bundle = joblib.load("obesity_model.pkl")
21
+ model = model_bundle['model']
22
+ label_encoders = model_bundle['encoders']
23
+ scaler = model_bundle['scaler']
24
+ feature_names = model_bundle['feature_names']
25
+ return model, scaler, label_encoders, feature_names
26
+
27
+ model, scaler, label_encoders, feature_names = load_model()
28
+
29
+ # Emoji mapping
30
+ emoji_map = {
31
+ "FAVC": "๐Ÿ•", "CH2O": "๐Ÿงƒ", "Gender": "๐Ÿšป", "Age": "๐ŸŽ‚", "Height": "๐Ÿ“", "Weight": "โš–๏ธ",
32
+ "family_history": "๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ", "FCVC": "๐Ÿฅ—", "NCP": "๐Ÿฝ๏ธ", "CAEC": "๐Ÿฉ", "SMOKE": "๐Ÿšฌ",
33
+ "SCC": "๐Ÿ“Š", "FAF": "๐Ÿƒ", "TUE": "๐Ÿ’ป", "CALC": "๐Ÿท", "MTRANS": "๐ŸšŒ"
34
+ }
35
+
36
+ # Navigation Sidebar
37
+ st.sidebar.title("๐Ÿงญ Navigation")
38
+ page = st.sidebar.radio("Go to", ["๐Ÿ“˜ Introduction", "๐Ÿ“Š EDA", "๐Ÿ”ฎ Predict", "๐Ÿ“Œ Feature Importance"])
39
+
40
+ # Introduction Page
41
+ if page == "๐Ÿ“˜ Introduction":
42
+ st.title("๐Ÿฅ Obesity Prediction App")
43
+ st.markdown("""
44
+ ## ๐Ÿ“Œ App Overview
45
+ This app allows users to analyze lifestyle factors and predict obesity levels using a machine learning model.
46
+
47
+ ๐ŸŽฏ **Objective:**
48
+ Provide users with personalized predictions about obesity levels based on their lifestyle and physical attributes.
49
+
50
+ ### ๐Ÿ“‚ Dataset Foundation:
51
+ """)
52
+
53
+ col1, col2, col3 = st.columns(3)
54
+ with col1:
55
+ st.metric("๐Ÿ“„ Total Records", df.shape[0])
56
+ with col2:
57
+ st.metric("๐Ÿงพ Total Features", df.shape[1])
58
+ with col3:
59
+ st.metric("โš–๏ธ Obesity Classes", df['Obesity'].nunique())
60
+
61
+ st.subheader("๐Ÿ” Sample Data")
62
+ tempdf = df.astype(str)
63
+ st.dataframe(tempdf.head())
64
+
65
+ st.subheader("๐Ÿ“Š Dataset Features")
66
+ st.markdown("""
67
+ - **๐ŸŽฏ Target Variable:** `Obesity` โ€” Represents different levels of obesity.
68
+ - **๐Ÿงฌ Input Features:** The app takes both physical and behavioral attributes such as:
69
+ - ๐Ÿ‘ค **Demographics:** `Age`, `Gender`
70
+ - ๐Ÿ“ **Physical Metrics:** `Height`, `Weight`
71
+ - ๐Ÿ” **Dietary Habits:** Frequency of high-calorie food (FAVC), number of main meals, vegetable intake (FCVC), etc.
72
+ - ๐Ÿƒ **Activity Level:** Physical activity frequency (FAF), use of technology (TUE), transportation type, etc.
73
+ - ๐Ÿšฌ **Other Habits:** Smoking, alcohol intake, daily water intake (CH2O), etc.
74
+ """)
75
+
76
+ st.subheader("๐ŸŽฏ App Goals")
77
+ st.markdown("""
78
+ - ๐Ÿ“ˆ Help users understand how lifestyle factors relate to obesity.
79
+ - ๐Ÿง  Provide interactive visualizations to explore health behavior patterns.
80
+ - ๐Ÿค– Offer personalized obesity level predictions.
81
+ """)
82
+
83
+ st.subheader("โš™๏ธ How the App Works")
84
+ st.markdown("""
85
+ - ๐Ÿงน User inputs are preprocessed using **Label Encoding**.
86
+ - ๐ŸŒฒ The app uses a trained **Random Forest Classifier** to predict obesity levels.
87
+ - ๐Ÿงพ Users enter their details via a friendly input form.
88
+ - ๐Ÿ“Š The app displays the predicted **obesity level** along with helpful visual feedback.
89
+ """)
90
+
91
+ # ๐Ÿ“Š EDA Page
92
+ elif page == "๐Ÿ“Š EDA":
93
+ st.title("๐Ÿ“Š Exploratory Data Analysis")
94
+
95
+ with st.expander("1๏ธโƒฃ ๐Ÿ“‹ Dataset Basic Information", expanded=False):
96
+ col1, col2 = st.columns(2)
97
+ tempdf1 = df
98
+ with col1:
99
+ st.write("๐Ÿงฌ **Data Types:**")
100
+ st.write(tempdf1.dtypes)
101
+ with col2:
102
+ st.write("โ“ **Missing Values:**")
103
+ st.write(tempdf1.isnull().sum())
104
+
105
+ with st.expander("2๏ธโƒฃ ๐Ÿ“ˆ Summary Statistics"):
106
+ st.write(tempdf1.describe())
107
+
108
+ with st.expander("3๏ธโƒฃ ๐Ÿงฎ Obesity Distribution"):
109
+ col1, col2 = st.columns(2)
110
+
111
+ with col1:
112
+ obesity_counts = df['Obesity'].value_counts().reset_index()
113
+ obesity_counts.columns = ['Obesity Level', 'Count']
114
+
115
+ fig = px.bar(
116
+ obesity_counts,
117
+ x='Obesity Level', y='Count',
118
+ color='Obesity Level',
119
+ color_discrete_sequence=px.colors.qualitative.Set3,
120
+ labels={'Obesity Level': 'Obesity Level', 'Count': 'Count'},
121
+ title="๐Ÿ“Š Obesity Levels Count",
122
+ hover_data=['Count']
123
+ )
124
+ st.plotly_chart(fig)
125
+
126
+ with col2:
127
+ obesity_pct = df['Obesity'].value_counts(normalize=True).reset_index()
128
+ obesity_pct.columns = ['Obesity Level', 'Proportion']
129
+
130
+ fig = px.pie(
131
+ obesity_pct,
132
+ names='Obesity Level', values='Proportion',
133
+ color_discrete_sequence=px.colors.qualitative.Pastel,
134
+ title="๐Ÿ“Œ Obesity Distribution (%)",
135
+ hole=0.3
136
+ )
137
+ st.plotly_chart(fig)
138
+
139
+ with st.expander("4๏ธโƒฃ ๐Ÿ”ข Numerical Features Distribution"):
140
+ numerical_cols = df.select_dtypes(include=[np.number]).columns[:4]
141
+ for col in numerical_cols:
142
+ fig = px.histogram(
143
+ df, x=col,
144
+ nbins=20,
145
+ title=f"๐Ÿ“‰ Distribution of {col}",
146
+ color_discrete_sequence=['#636EFA']
147
+ )
148
+ st.plotly_chart(fig)
149
+
150
+ with st.expander("5๏ธโƒฃ ๐Ÿ”— Correlation Matrix"):
151
+ corr = df.corr(numeric_only=True)
152
+ fig = px.imshow(
153
+ corr,
154
+ text_auto=True,
155
+ color_continuous_scale='RdBu_r',
156
+ title="๐Ÿ“Š Feature Correlations",
157
+ width=400,
158
+ height=700
159
+ )
160
+ st.plotly_chart(fig)
161
+
162
+ with st.expander("6๏ธโƒฃ ๐Ÿ“ฆ Outlier Detection (Box Plots)"):
163
+ for col in df.select_dtypes(include=[np.number]).columns[:6]:
164
+ fig = px.box(df, y=col, title=f"๐Ÿ“ฆ Box Plot for {col}")
165
+ st.plotly_chart(fig)
166
+
167
+ with st.expander("7๏ธโƒฃ ๐Ÿ˜ป Gender vs Obesity Analysis"):
168
+ fig = px.histogram(
169
+ df, x='Obesity', color='Gender',
170
+ barmode='group',
171
+ title="๐Ÿ˜ป Obesity Distribution by Gender",
172
+ color_discrete_sequence=px.colors.qualitative.Vivid
173
+ )
174
+ st.plotly_chart(fig)
175
+
176
+ with st.expander("8๏ธโƒฃ ๐Ÿ‘ถ Age vs Obesity Analysis"):
177
+ fig = px.box(
178
+ df, x='Obesity', y='Age',
179
+ color='Obesity',
180
+ title="๐Ÿ‘ถ Age Distribution by Obesity Level"
181
+ )
182
+ st.plotly_chart(fig)
183
+
184
+ with st.expander("9๏ธโƒฃ ๐Ÿ‘จโ€๐Ÿ‘ง Family History vs Obesity"):
185
+ fig = px.histogram(
186
+ df, x='Obesity', color='family_history',
187
+ barmode='group',
188
+ title="๐Ÿ‘จโ€๐Ÿ‘ง Obesity Distribution by Family History"
189
+ )
190
+ st.plotly_chart(fig)
191
+
192
+ with st.expander("๐Ÿ”น ๐Ÿƒโ€โ™‚๏ธ Physical Activity Frequency (FAF) Analysis"):
193
+ fig = px.box(
194
+ df, x='Obesity', y='FAF',
195
+ color='Obesity',
196
+ title="๐Ÿƒโ€โ™‚๏ธ Physical Activity Frequency by Obesity Level"
197
+ )
198
+ st.plotly_chart(fig)
199
+
200
+ with st.expander("1๏ธโƒฃ 1๏ธโƒฃ ๐Ÿ’ง Water Consumption (CH2O) Analysis"):
201
+ fig = px.box(
202
+ df, x='Obesity', y='CH2O',
203
+ color='Obesity',
204
+ title="๐Ÿ’ง Daily Water Consumption by Obesity Level"
205
+ )
206
+ st.plotly_chart(fig)
207
+
208
+ with st.expander("1๏ธโƒฃ 2๏ธโƒฃ ๐Ÿ• High Caloric Food Consumption (FAVC) Analysis"):
209
+ fig = px.histogram(
210
+ df, x='Obesity', color='FAVC',
211
+ barmode='group',
212
+ title="๐Ÿ• Obesity Distribution by High Caloric Food Consumption"
213
+ )
214
+ st.plotly_chart(fig)
215
+
216
+ with st.expander("1๏ธโƒฃ 3๏ธโƒฃ ๐Ÿ’ป Technology Usage Time (TUE) Analysis"):
217
+ fig = px.box(
218
+ df, x='Obesity', y='TUE',
219
+ color='Obesity',
220
+ title="๐Ÿ’ป Technology Usage Time by Obesity Level"
221
+ )
222
+ st.plotly_chart(fig)
223
+
224
+ with st.expander("1๏ธโƒฃ 4๏ธโƒฃ ๐Ÿท Alcohol Consumption (CALC) Analysis"):
225
+ fig = px.histogram(
226
+ df, x='Obesity', color='CALC',
227
+ barmode='group',
228
+ title="๐Ÿท Obesity Distribution by Alcohol Consumption"
229
+ )
230
+ st.plotly_chart(fig)
231
+
232
+ with st.expander("1๏ธโƒฃ 5๏ธโƒฃ ๐Ÿš— Transportation Mode (MTRANS) vs Obesity"):
233
+ fig = px.histogram(
234
+ df, x='MTRANS', color='Obesity',
235
+ barmode='group',
236
+ title="๐Ÿš— Transportation Mode vs Obesity Levels"
237
+ )
238
+ st.plotly_chart(fig)
239
+
240
+
241
+ # ๐Ÿ”ฎ Predict Page
242
+ elif page == "๐Ÿ”ฎ Predict":
243
+ st.title("๐Ÿ”ฎ Obesity Prediction")
244
+ st.markdown("Fill in the details below to predict your obesity level:")
245
+
246
+ col1, col2, col3 = st.columns(3)
247
+ with col1:
248
+ gender = st.selectbox("๐Ÿšป Gender", ["Male", "Female"])
249
+ age = st.number_input("๐Ÿ“… Age", 10, 100, 25)
250
+ height = st.number_input("๐Ÿ“ Height (m)", 1.0, 2.5, 1.70)
251
+ weight = st.number_input("โš–๏ธ Weight (kg)", 30, 200, 70)
252
+ family_history = st.selectbox("๐Ÿงฌ Family History of Obesity", ["yes", "no"])
253
+
254
+ with col2:
255
+ favc = st.selectbox("๐Ÿ” Frequent High-Calorie Food (FAVC)", ["yes", "no"])
256
+ fcvc = st.slider("๐Ÿฅฆ Veggie Intake Frequency (FCVC)", 1.0, 3.0, 2.0)
257
+ ncp = st.number_input("๐Ÿฝ๏ธ Number of Main Meals (NCP)", 1.0, 4.0, 3.0)
258
+ caec = st.selectbox("๐ŸŸ Snacking Between Meals (CAEC)", ["no", "Sometimes", "Frequently", "Always"])
259
+ smoke = st.selectbox("๐Ÿšฌ Do you Smoke?", ["yes", "no"])
260
+
261
+ with col3:
262
+ ch2o = st.slider("๐Ÿ’ง Water Intake (CH2O)", 0.0, 3.0, 1.0)
263
+ scc = st.selectbox("๐Ÿ“‰ Calorie Monitoring (SCC)", ["yes", "no"])
264
+ faf = st.slider("๐Ÿƒ Physical Activity (FAF)", 0.0, 3.0, 1.0)
265
+ tue = st.slider("๐Ÿ“ฑ Tech Usage Time (TUE)", 0.0, 3.0, 1.0)
266
+ calc = st.selectbox("๐Ÿท Alcohol (CALC)", ["no", "Sometimes", "Frequently", "Always"])
267
+ mtrans = st.selectbox("๐Ÿš— Transport Mode (MTRANS)", ["Walking", "Public_Transportation", "Automobile", "Bike", "Motorbike"])
268
+
269
+ input_data = {
270
+ "Gender": gender, "Age": age, "Height": height, "Weight": weight,
271
+ "family_history": family_history, "FAVC": favc, "FCVC": fcvc, "NCP": ncp,
272
+ "CAEC": caec, "SMOKE": smoke, "CH2O": ch2o, "SCC": scc,
273
+ "FAF": faf, "TUE": tue, "CALC": calc, "MTRANS": mtrans
274
+ }
275
+ if st.button("๐Ÿ” Predict"):
276
+ input_df = pd.DataFrame([input_data])
277
+
278
+ # Apply Label Encoding
279
+ for col in input_df.columns:
280
+ if col in label_encoders:
281
+ input_df[col] = label_encoders[col].transform(input_df[col])
282
+
283
+ # Scale features
284
+ input_scaled = scaler.transform(input_df)
285
+
286
+ # Predict
287
+ prediction = model.predict(input_scaled)
288
+
289
+ # โœ… Decode the numeric prediction
290
+ decoded_prediction = label_encoders["Obesity"].inverse_transform([prediction[0]])[0]
291
+
292
+ # Save decoded prediction in session_state for PDF/report use
293
+ st.session_state["prediction"] = decoded_prediction
294
+ st.session_state["input_data"] = input_data
295
+
296
+ # Display result
297
+ st.success(f"๐ŸŽฏ **Predicted Obesity Level**: `{decoded_prediction}`")
298
+
299
+
300
+ # ๐Ÿ“Œ Feature Importance Page (Interactive with Plotly)
301
+ elif page == "๐Ÿ“Œ Feature Importance":
302
+ st.title("๐Ÿ“Œ Feature Importance")
303
+
304
+ importances = model.feature_importances_
305
+ sorted_idx = np.argsort(importances)[::-1]
306
+ sorted_features = [feature_names[i] for i in sorted_idx]
307
+ sorted_importances = importances[sorted_idx]
308
+
309
+ # ๐Ÿ† Top 5 Features
310
+ top_features = sorted_features[:5]
311
+ top_importances = sorted_importances[:5]
312
+ top_labels = [f"{emoji_map.get(f, '')} {f}" for f in top_features]
313
+
314
+ # ๐Ÿ“Š Create Plotly Bar Chart
315
+ fig = px.bar(
316
+ x=top_importances[::-1],
317
+ y=top_labels[::-1],
318
+ orientation='h',
319
+ labels={'x': 'Importance', 'y': 'Feature'},
320
+ color=top_importances[::-1],
321
+ color_continuous_scale='Turbo',
322
+ title="๐ŸŽฏ Top 5 Influential Features",
323
+ text=[f"{val:.2f}" for val in top_importances[::-1]]
324
+ )
325
+
326
+ fig.update_layout(
327
+ xaxis_title="Importance Score",
328
+ yaxis_title="",
329
+ plot_bgcolor="rgba(0,0,0,0)",
330
+ paper_bgcolor="rgba(0,0,0,0)",
331
+ font=dict(size=14),
332
+ coloraxis_showscale=False
333
+ )
334
+ fig.update_traces(textposition='outside', marker_line_width=1.2)
335
+
336
+ st.plotly_chart(fig, use_container_width=True)
337
+ st.markdown("โœจ These features contribute the most to your predicted obesity level.")
338
+