HugMeBytes's picture
Update app.py
a771ffb verified
import streamlit as st
import pandas as pd
import joblib
import numpy as np
import tempfile
import plotly.express as px
# Load the dataset
@st.cache_data
def load_data():
return pd.read_csv("Obesity prediction.csv")
df = load_data()
# Load the model and supporting objects
@st.cache_resource
def load_model():
model_bundle = joblib.load("obesity_model.pkl")
model = model_bundle['model']
label_encoders = model_bundle['encoders']
scaler = model_bundle['scaler']
feature_names = model_bundle['feature_names']
return model, scaler, label_encoders, feature_names
model, scaler, label_encoders, feature_names = load_model()
# Emoji mapping
emoji_map = {
"FAVC": "๐Ÿ•", "CH2O": "๐Ÿงƒ", "Gender": "๐Ÿšป", "Age": "๐ŸŽ‚", "Height": "๐Ÿ“", "Weight": "โš–๏ธ",
"family_history": "๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ", "FCVC": "๐Ÿฅ—", "NCP": "๐Ÿฝ๏ธ", "CAEC": "๐Ÿฉ", "SMOKE": "๐Ÿšฌ",
"SCC": "๐Ÿ“Š", "FAF": "๐Ÿƒ", "TUE": "๐Ÿ’ป", "CALC": "๐Ÿท", "MTRANS": "๐ŸšŒ"
}
# Navigation Sidebar
st.sidebar.title("๐Ÿงญ Navigation")
page = st.sidebar.radio("Go to", ["๐Ÿ“˜ Introduction", "๐Ÿ“Š EDA", "๐Ÿ”ฎ Predict", "๐Ÿ“Œ Feature Importance"])
# Introduction Page
if page == "๐Ÿ“˜ Introduction":
st.title("๐Ÿฅ Obesity Prediction App")
st.markdown("""
## ๐Ÿ“Œ App Overview
This app allows users to analyze lifestyle factors and predict obesity levels using a machine learning model.
๐ŸŽฏ **Objective:**
Provide users with personalized predictions about obesity levels based on their lifestyle and physical attributes.
### ๐Ÿ“‚ Dataset Foundation:
""")
col1, col2, col3 = st.columns(3)
with col1:
st.metric("๐Ÿ“„ Total Records", df.shape[0])
with col2:
st.metric("๐Ÿงพ Total Features", df.shape[1])
with col3:
st.metric("โš–๏ธ Obesity Classes", df['Obesity'].nunique())
st.subheader("๐Ÿ” Sample Data")
tempdf = df.astype(str)
st.dataframe(tempdf.head())
st.subheader("๐Ÿ“Š Dataset Features")
st.markdown("""
- **๐ŸŽฏ Target Variable:** `Obesity` โ€” Represents different levels of obesity.
- **๐Ÿงฌ Input Features:** The app takes both physical and behavioral attributes such as:
- ๐Ÿ‘ค **Demographics:** `Age`, `Gender`
- ๐Ÿ“ **Physical Metrics:** `Height`, `Weight`
- ๐Ÿ” **Dietary Habits:** Frequency of high-calorie food (FAVC), number of main meals, vegetable intake (FCVC), etc.
- ๐Ÿƒ **Activity Level:** Physical activity frequency (FAF), use of technology (TUE), transportation type, etc.
- ๐Ÿšฌ **Other Habits:** Smoking, alcohol intake, daily water intake (CH2O), etc.
""")
st.subheader("๐ŸŽฏ App Goals")
st.markdown("""
- ๐Ÿ“ˆ Help users understand how lifestyle factors relate to obesity.
- ๐Ÿง  Provide interactive visualizations to explore health behavior patterns.
- ๐Ÿค– Offer personalized obesity level predictions.
""")
st.subheader("โš™๏ธ How the App Works")
st.markdown("""
- ๐Ÿงน User inputs are preprocessed using **Label Encoding**.
- ๐ŸŒฒ The app uses a trained **Random Forest Classifier** to predict obesity levels.
- ๐Ÿงพ Users enter their details via a friendly input form.
- ๐Ÿ“Š The app displays the predicted **obesity level** along with helpful visual feedback.
""")
# ๐Ÿ“Š EDA Page
elif page == "๐Ÿ“Š EDA":
st.title("๐Ÿ“Š Exploratory Data Analysis")
with st.expander("1๏ธโƒฃ ๐Ÿ“‹ Dataset Basic Information", expanded=False):
col1, col2 = st.columns(2)
tempdf1 = df
with col1:
st.write("๐Ÿงฌ **Data Types:**")
st.write(tempdf1.dtypes)
with col2:
st.write("โ“ **Missing Values:**")
st.write(tempdf1.isnull().sum())
with st.expander("2๏ธโƒฃ ๐Ÿ“ˆ Summary Statistics"):
st.write(tempdf1.describe())
with st.expander("3๏ธโƒฃ ๐Ÿงฎ Obesity Distribution"):
col1, col2 = st.columns(2)
with col1:
obesity_counts = df['Obesity'].value_counts().reset_index()
obesity_counts.columns = ['Obesity Level', 'Count']
fig = px.bar(
obesity_counts,
x='Obesity Level', y='Count',
color='Obesity Level',
color_discrete_sequence=px.colors.qualitative.Set3,
labels={'Obesity Level': 'Obesity Level', 'Count': 'Count'},
title="๐Ÿ“Š Obesity Levels Count",
hover_data=['Count']
)
st.plotly_chart(fig)
with col2:
obesity_pct = df['Obesity'].value_counts(normalize=True).reset_index()
obesity_pct.columns = ['Obesity Level', 'Proportion']
fig = px.pie(
obesity_pct,
names='Obesity Level', values='Proportion',
color_discrete_sequence=px.colors.qualitative.Pastel,
title="๐Ÿ“Œ Obesity Distribution (%)",
hole=0.3
)
st.plotly_chart(fig)
with st.expander("4๏ธโƒฃ ๐Ÿ”ข Numerical Features Distribution"):
numerical_cols = df.select_dtypes(include=[np.number]).columns[:4]
for col in numerical_cols:
fig = px.histogram(
df, x=col,
nbins=20,
title=f"๐Ÿ“‰ Distribution of {col}",
color_discrete_sequence=['#636EFA']
)
st.plotly_chart(fig)
with st.expander("5๏ธโƒฃ ๐Ÿ”— Correlation Matrix"):
corr = df.corr(numeric_only=True)
fig = px.imshow(
corr,
text_auto=True,
color_continuous_scale='RdBu_r',
title="๐Ÿ“Š Feature Correlations",
width=400,
height=700
)
st.plotly_chart(fig)
with st.expander("6๏ธโƒฃ ๐Ÿ“ฆ Outlier Detection (Box Plots)"):
for col in df.select_dtypes(include=[np.number]).columns[:6]:
fig = px.box(df, y=col, title=f"๐Ÿ“ฆ Box Plot for {col}")
st.plotly_chart(fig)
with st.expander("7๏ธโƒฃ ๐Ÿ˜ป Gender vs Obesity Analysis"):
fig = px.histogram(
df, x='Obesity', color='Gender',
barmode='group',
title="๐Ÿ˜ป Obesity Distribution by Gender",
color_discrete_sequence=px.colors.qualitative.Vivid
)
st.plotly_chart(fig)
with st.expander("8๏ธโƒฃ ๐Ÿ‘ถ Age vs Obesity Analysis"):
fig = px.box(
df, x='Obesity', y='Age',
color='Obesity',
title="๐Ÿ‘ถ Age Distribution by Obesity Level"
)
st.plotly_chart(fig)
with st.expander("9๏ธโƒฃ ๐Ÿ‘จโ€๐Ÿ‘ง Family History vs Obesity"):
fig = px.histogram(
df, x='Obesity', color='family_history',
barmode='group',
title="๐Ÿ‘จโ€๐Ÿ‘ง Obesity Distribution by Family History"
)
st.plotly_chart(fig)
with st.expander("๐Ÿ”น ๐Ÿƒโ€โ™‚๏ธ Physical Activity Frequency (FAF) Analysis"):
fig = px.box(
df, x='Obesity', y='FAF',
color='Obesity',
title="๐Ÿƒโ€โ™‚๏ธ Physical Activity Frequency by Obesity Level"
)
st.plotly_chart(fig)
with st.expander("1๏ธโƒฃ 1๏ธโƒฃ ๐Ÿ’ง Water Consumption (CH2O) Analysis"):
fig = px.box(
df, x='Obesity', y='CH2O',
color='Obesity',
title="๐Ÿ’ง Daily Water Consumption by Obesity Level"
)
st.plotly_chart(fig)
with st.expander("1๏ธโƒฃ 2๏ธโƒฃ ๐Ÿ• High Caloric Food Consumption (FAVC) Analysis"):
fig = px.histogram(
df, x='Obesity', color='FAVC',
barmode='group',
title="๐Ÿ• Obesity Distribution by High Caloric Food Consumption"
)
st.plotly_chart(fig)
with st.expander("1๏ธโƒฃ 3๏ธโƒฃ ๐Ÿ’ป Technology Usage Time (TUE) Analysis"):
fig = px.box(
df, x='Obesity', y='TUE',
color='Obesity',
title="๐Ÿ’ป Technology Usage Time by Obesity Level"
)
st.plotly_chart(fig)
with st.expander("1๏ธโƒฃ 4๏ธโƒฃ ๐Ÿท Alcohol Consumption (CALC) Analysis"):
fig = px.histogram(
df, x='Obesity', color='CALC',
barmode='group',
title="๐Ÿท Obesity Distribution by Alcohol Consumption"
)
st.plotly_chart(fig)
with st.expander("1๏ธโƒฃ 5๏ธโƒฃ ๐Ÿš— Transportation Mode (MTRANS) vs Obesity"):
fig = px.histogram(
df, x='MTRANS', color='Obesity',
barmode='group',
title="๐Ÿš— Transportation Mode vs Obesity Levels"
)
st.plotly_chart(fig)
# ๐Ÿ”ฎ Predict Page
elif page == "๐Ÿ”ฎ Predict":
st.title("๐Ÿ”ฎ Obesity Prediction")
st.markdown("Fill in the details below to predict your obesity level:")
col1, col2, col3 = st.columns(3)
with col1:
gender = st.selectbox("๐Ÿšป Gender", ["Male", "Female"])
age = st.number_input("๐Ÿ“… Age", 10, 100, 25)
height = st.number_input("๐Ÿ“ Height (m)", 1.0, 2.5, 1.70)
weight = st.number_input("โš–๏ธ Weight (kg)", 30, 200, 70)
family_history = st.selectbox("๐Ÿงฌ Family History of Obesity", ["yes", "no"])
with col2:
favc = st.selectbox("๐Ÿ” Frequent High-Calorie Food (FAVC)", ["yes", "no"])
fcvc = st.slider("๐Ÿฅฆ Veggie Intake Frequency (FCVC)", 1.0, 3.0, 2.0)
ncp = st.number_input("๐Ÿฝ๏ธ Number of Main Meals (NCP)", 1.0, 4.0, 3.0)
caec = st.selectbox("๐ŸŸ Snacking Between Meals (CAEC)", ["no", "Sometimes", "Frequently", "Always"])
smoke = st.selectbox("๐Ÿšฌ Do you Smoke?", ["yes", "no"])
with col3:
ch2o = st.slider("๐Ÿ’ง Water Intake (CH2O)", 0.0, 3.0, 1.0)
scc = st.selectbox("๐Ÿ“‰ Calorie Monitoring (SCC)", ["yes", "no"])
faf = st.slider("๐Ÿƒ Physical Activity (FAF)", 0.0, 3.0, 1.0)
tue = st.slider("๐Ÿ“ฑ Tech Usage Time (TUE)", 0.0, 3.0, 1.0)
calc = st.selectbox("๐Ÿท Alcohol (CALC)", ["no", "Sometimes", "Frequently", "Always"])
mtrans = st.selectbox("๐Ÿš— Transport Mode (MTRANS)", ["Walking", "Public_Transportation", "Automobile", "Bike", "Motorbike"])
input_data = {
"Gender": gender, "Age": age, "Height": height, "Weight": weight,
"family_history": family_history, "FAVC": favc, "FCVC": fcvc, "NCP": ncp,
"CAEC": caec, "SMOKE": smoke, "CH2O": ch2o, "SCC": scc,
"FAF": faf, "TUE": tue, "CALC": calc, "MTRANS": mtrans
}
if st.button("๐Ÿ” Predict"):
input_df = pd.DataFrame([input_data])
# Apply Label Encoding
for col in input_df.columns:
if col in label_encoders:
input_df[col] = label_encoders[col].transform(input_df[col])
# Scale features
input_scaled = scaler.transform(input_df)
# Predict
prediction = model.predict(input_scaled)
# โœ… Decode the numeric prediction
decoded_prediction = label_encoders["Obesity"].inverse_transform([prediction[0]])[0]
# Save decoded prediction in session_state for PDF/report use
st.session_state["prediction"] = decoded_prediction
st.session_state["input_data"] = input_data
# Display result
st.success(f"๐ŸŽฏ **Predicted Obesity Level**: `{decoded_prediction}`")
# ๐Ÿ“Œ Feature Importance Page (Interactive with Plotly)
elif page == "๐Ÿ“Œ Feature Importance":
st.title("๐Ÿ“Œ Feature Importance")
importances = model.feature_importances_
sorted_idx = np.argsort(importances)[::-1]
sorted_features = [feature_names[i] for i in sorted_idx]
sorted_importances = importances[sorted_idx]
# ๐Ÿ† Top 5 Features
top_features = sorted_features[:5]
top_importances = sorted_importances[:5]
top_labels = [f"{emoji_map.get(f, '')} {f}" for f in top_features]
# ๐Ÿ“Š Create Plotly Bar Chart
fig = px.bar(
x=top_importances[::-1],
y=top_labels[::-1],
orientation='h',
labels={'x': 'Importance', 'y': 'Feature'},
color=top_importances[::-1],
color_continuous_scale='Turbo',
title="๐ŸŽฏ Top 5 Influential Features",
text=[f"{val:.2f}" for val in top_importances[::-1]]
)
fig.update_layout(
xaxis_title="Importance Score",
yaxis_title="",
plot_bgcolor="rgba(0,0,0,0)",
paper_bgcolor="rgba(0,0,0,0)",
font=dict(size=14),
coloraxis_showscale=False
)
fig.update_traces(textposition='outside', marker_line_width=1.2)
st.plotly_chart(fig, use_container_width=True)
st.markdown("โœจ These features contribute the most to your predicted obesity level.")