Spaces:

HugMeBytes
/

Obesity_Predictor

Sleeping

File size: 12,686 Bytes

7a9862d

import streamlit as st
import pandas as pd
import joblib
import numpy as np
import tempfile
import plotly.express as px

# Load the dataset
@st.cache_data
def load_data():
    return pd.read_csv("Obesity prediction.csv")

df = load_data()

# Load the model and supporting objects
@st.cache_resource
def load_model():
    model_bundle = joblib.load("obesity_model.pkl")
    model = model_bundle['model']
    label_encoders = model_bundle['encoders']
    scaler = model_bundle['scaler']
    feature_names = model_bundle['feature_names']
    return model, scaler, label_encoders, feature_names

model, scaler, label_encoders, feature_names = load_model()

# Emoji mapping
emoji_map = {
    "FAVC": "🍕", "CH2O": "🧃", "Gender": "🚻", "Age": "🎂", "Height": "📏", "Weight": "⚖️",
    "family_history": "👨‍👩‍👧‍👦", "FCVC": "🥗", "NCP": "🍽️", "CAEC": "🍩", "SMOKE": "🚬",
    "SCC": "📊", "FAF": "🏃", "TUE": "💻", "CALC": "🍷", "MTRANS": "🚌"
}

# Navigation Sidebar
st.sidebar.title("🧭 Navigation")
page = st.sidebar.radio("Go to", ["📘 Introduction", "📊 EDA", "🔮 Predict", "📌 Feature Importance"])

# Introduction Page
if page == "📘 Introduction":
    st.title("🏥 Obesity Prediction App")
    st.markdown("""
    ## 📌 App Overview  
    This app allows users to analyze lifestyle factors and predict obesity levels using a machine learning model.
    
    🎯 **Objective:**  
    Provide users with personalized predictions about obesity levels based on their lifestyle and physical attributes.
    
    ### 📂 Dataset Foundation:
    """)
    
    col1, col2, col3 = st.columns(3)
    with col1:
        st.metric("📄 Total Records", df.shape[0])
    with col2:
        st.metric("🧾 Total Features", df.shape[1])
    with col3:
        st.metric("⚖️ Obesity Classes", df['Obesity'].nunique())
    
    st.subheader("🔍 Sample Data")
    tempdf = df.astype(str)
    st.dataframe(tempdf.head())

    st.subheader("📊 Dataset Features")
    st.markdown("""
    - **🎯 Target Variable:** `Obesity` — Represents different levels of obesity.
    - **🧬 Input Features:** The app takes both physical and behavioral attributes such as:
        - 👤 **Demographics:** `Age`, `Gender`
        - 📏 **Physical Metrics:** `Height`, `Weight`
        - 🍔 **Dietary Habits:** Frequency of high-calorie food (FAVC), number of main meals, vegetable intake (FCVC), etc.
        - 🏃 **Activity Level:** Physical activity frequency (FAF), use of technology (TUE), transportation type, etc.
        - 🚬 **Other Habits:** Smoking, alcohol intake, daily water intake (CH2O), etc.
    """)

    st.subheader("🎯 App Goals")
    st.markdown("""
    - 📈 Help users understand how lifestyle factors relate to obesity.
    - 🧠 Provide interactive visualizations to explore health behavior patterns.
    - 🤖 Offer personalized obesity level predictions.
    """)

    st.subheader("⚙️ How the App Works")
    st.markdown("""
    - 🧹 User inputs are preprocessed using **Label Encoding**.
    - 🌲 The app uses a trained **Random Forest Classifier** to predict obesity levels.
    - 🧾 Users enter their details via a friendly input form.
    - 📊 The app displays the predicted **obesity level** along with helpful visual feedback.
    """)

# 📊 EDA Page
elif page == "📊 EDA":
    st.title("📊 Exploratory Data Analysis")

    with st.expander("1️⃣ 📋 Dataset Basic Information", expanded=False):
        col1, col2 = st.columns(2)
        tempdf1 = df
        with col1:
            st.write("🧬 **Data Types:**")
            st.write(tempdf1.dtypes)
        with col2:
            st.write("❓ **Missing Values:**")
            st.write(tempdf1.isnull().sum())

    with st.expander("2️⃣ 📈 Summary Statistics"):
        st.write(tempdf1.describe())

    with st.expander("3️⃣ 🧮 Obesity Distribution"):
        col1, col2 = st.columns(2)

        with col1:
            obesity_counts = df['Obesity'].value_counts().reset_index()
            obesity_counts.columns = ['Obesity Level', 'Count']

            fig = px.bar(
                obesity_counts,
                x='Obesity Level', y='Count',
                color='Obesity Level',
                color_discrete_sequence=px.colors.qualitative.Set3,
                labels={'Obesity Level': 'Obesity Level', 'Count': 'Count'},
                title="📊 Obesity Levels Count",
                hover_data=['Count']
            )
            st.plotly_chart(fig)

        with col2:
            obesity_pct = df['Obesity'].value_counts(normalize=True).reset_index()
            obesity_pct.columns = ['Obesity Level', 'Proportion']

            fig = px.pie(
                obesity_pct,
                names='Obesity Level', values='Proportion',
                color_discrete_sequence=px.colors.qualitative.Pastel,
                title="📌 Obesity Distribution (%)",
                hole=0.3
            )
            st.plotly_chart(fig)

    with st.expander("4️⃣ 🔢 Numerical Features Distribution"):
        numerical_cols = df.select_dtypes(include=[np.number]).columns[:4]
        for col in numerical_cols:
            fig = px.histogram(
                df, x=col,
                nbins=20,
                title=f"📉 Distribution of {col}",
                color_discrete_sequence=['#636EFA']
            )
            st.plotly_chart(fig)

    with st.expander("5️⃣ 🔗 Correlation Matrix"):
        corr = df.corr(numeric_only=True)
        fig = px.imshow(
            corr,
            text_auto=True,
            color_continuous_scale='RdBu_r',
            title="📊 Feature Correlations",
            width=400,
            height=700
        )
        st.plotly_chart(fig)

    with st.expander("6️⃣ 📦 Outlier Detection (Box Plots)"):
        for col in df.select_dtypes(include=[np.number]).columns[:6]:
            fig = px.box(df, y=col, title=f"📦 Box Plot for {col}")
            st.plotly_chart(fig)

    with st.expander("7️⃣ 😻 Gender vs Obesity Analysis"):
        fig = px.histogram(
            df, x='Obesity', color='Gender',
            barmode='group',
            title="😻 Obesity Distribution by Gender",
            color_discrete_sequence=px.colors.qualitative.Vivid
        )
        st.plotly_chart(fig)

    with st.expander("8️⃣ 👶 Age vs Obesity Analysis"):
        fig = px.box(
            df, x='Obesity', y='Age',
            color='Obesity',
            title="👶 Age Distribution by Obesity Level"
        )
        st.plotly_chart(fig)

    with st.expander("9️⃣ 👨‍👧 Family History vs Obesity"):
        fig = px.histogram(
            df, x='Obesity', color='family_history',
            barmode='group',
            title="👨‍👧 Obesity Distribution by Family History"
        )
        st.plotly_chart(fig)

    with st.expander("🔹 🏃‍♂️ Physical Activity Frequency (FAF) Analysis"):
        fig = px.box(
            df, x='Obesity', y='FAF',
            color='Obesity',
            title="🏃‍♂️ Physical Activity Frequency by Obesity Level"
        )
        st.plotly_chart(fig)

    with st.expander("1️⃣ 1️⃣ 💧 Water Consumption (CH2O) Analysis"):
        fig = px.box(
            df, x='Obesity', y='CH2O',
            color='Obesity',
            title="💧 Daily Water Consumption by Obesity Level"
        )
        st.plotly_chart(fig)

    with st.expander("1️⃣ 2️⃣ 🍕 High Caloric Food Consumption (FAVC) Analysis"):
        fig = px.histogram(
            df, x='Obesity', color='FAVC',
            barmode='group',
            title="🍕 Obesity Distribution by High Caloric Food Consumption"
        )
        st.plotly_chart(fig)

    with st.expander("1️⃣ 3️⃣ 💻 Technology Usage Time (TUE) Analysis"):
        fig = px.box(
            df, x='Obesity', y='TUE',
            color='Obesity',
            title="💻 Technology Usage Time by Obesity Level"
        )
        st.plotly_chart(fig)

    with st.expander("1️⃣ 4️⃣ 🍷 Alcohol Consumption (CALC) Analysis"):
        fig = px.histogram(
            df, x='Obesity', color='CALC',
            barmode='group',
            title="🍷 Obesity Distribution by Alcohol Consumption"
        )
        st.plotly_chart(fig)

    with st.expander("1️⃣ 5️⃣ 🚗 Transportation Mode (MTRANS) vs Obesity"):
        fig = px.histogram(
            df, x='MTRANS', color='Obesity',
            barmode='group',
            title="🚗 Transportation Mode vs Obesity Levels"
        )
        st.plotly_chart(fig)


# 🔮 Predict Page
elif page == "🔮 Predict":
    st.title("🔮 Obesity Prediction")
    st.markdown("Fill in the details below to predict your obesity level:")

    col1, col2, col3 = st.columns(3)
    with col1:
        gender = st.selectbox("🚻 Gender", ["Male", "Female"])
        age = st.number_input("📅 Age", 10, 100, 25)
        height = st.number_input("📏 Height (m)", 1.0, 2.5, 1.70)
        weight = st.number_input("⚖️ Weight (kg)", 30, 200, 70)
        family_history = st.selectbox("🧬 Family History of Obesity", ["yes", "no"])

    with col2:
        favc = st.selectbox("🍔 Frequent High-Calorie Food (FAVC)", ["yes", "no"])
        fcvc = st.slider("🥦 Veggie Intake Frequency (FCVC)", 1.0, 3.0, 2.0)
        ncp = st.number_input("🍽️ Number of Main Meals (NCP)", 1.0, 4.0, 3.0)
        caec = st.selectbox("🍟 Snacking Between Meals (CAEC)", ["no", "Sometimes", "Frequently", "Always"])
        smoke = st.selectbox("🚬 Do you Smoke?", ["yes", "no"])

    with col3:
        ch2o = st.slider("💧 Water Intake (CH2O)", 0.0, 3.0, 1.0)
        scc = st.selectbox("📉 Calorie Monitoring (SCC)", ["yes", "no"])
        faf = st.slider("🏃 Physical Activity (FAF)", 0.0, 3.0, 1.0)
        tue = st.slider("📱 Tech Usage Time (TUE)", 0.0, 3.0, 1.0)
        calc = st.selectbox("🍷 Alcohol (CALC)", ["no", "Sometimes", "Frequently", "Always"])
        mtrans = st.selectbox("🚗 Transport Mode (MTRANS)", ["Walking", "Public_Transportation", "Automobile", "Bike", "Motorbike"])

    input_data = {
        "Gender": gender, "Age": age, "Height": height, "Weight": weight,
        "family_history": family_history, "FAVC": favc, "FCVC": fcvc, "NCP": ncp,
        "CAEC": caec, "SMOKE": smoke, "CH2O": ch2o, "SCC": scc,
        "FAF": faf, "TUE": tue, "CALC": calc, "MTRANS": mtrans
    }
    if st.button("🔍 Predict"):
        input_df = pd.DataFrame([input_data])

        # Apply Label Encoding
        for col in input_df.columns:
            if col in label_encoders:
                input_df[col] = label_encoders[col].transform(input_df[col])

        # Scale features
        input_scaled = scaler.transform(input_df)

        # Predict
        prediction = model.predict(input_scaled)

        # ✅ Decode the numeric prediction
        decoded_prediction = label_encoders["Obesity"].inverse_transform([prediction[0]])[0]

        # Save decoded prediction in session_state for PDF/report use
        st.session_state["prediction"] = decoded_prediction
        st.session_state["input_data"] = input_data

        # Display result
        st.success(f"🎯 **Predicted Obesity Level**: `{decoded_prediction}`")


# 📌 Feature Importance Page (Interactive with Plotly)
elif page == "📌 Feature Importance":
    st.title("📌 Feature Importance")

    importances = model.feature_importances_
    sorted_idx = np.argsort(importances)[::-1]
    sorted_features = [feature_names[i] for i in sorted_idx]
    sorted_importances = importances[sorted_idx]

    # 🏆 Top 5 Features
    top_features = sorted_features[:5]
    top_importances = sorted_importances[:5]
    top_labels = [f"{emoji_map.get(f, '')} {f}" for f in top_features]

    # 📊 Create Plotly Bar Chart
    fig = px.bar(
        x=top_importances[::-1],
        y=top_labels[::-1],
        orientation='h',
        labels={'x': 'Importance', 'y': 'Feature'},
        color=top_importances[::-1],
        color_continuous_scale='Turbo',
        title="🎯 Top 5 Influential Features",
        text=[f"{val:.2f}" for val in top_importances[::-1]]
    )

    fig.update_layout(
        xaxis_title="Importance Score",
        yaxis_title="",
        plot_bgcolor="rgba(0,0,0,0)",
        paper_bgcolor="rgba(0,0,0,0)",
        font=dict(size=14),
        coloraxis_showscale=False
    )
    fig.update_traces(textposition='outside', marker_line_width=1.2)

    st.plotly_chart(fig, use_container_width=True)
    st.markdown("✨ These features contribute the most to your predicted obesity level.")