Spaces:

HugMeBytes
/

Obesity_Predictor

Sleeping

App Files Files Community

HugMeBytes commited on Jun 11, 2025

Commit

82ca89b

verified ·

1 Parent(s): 1495aff

Initial commit

Browse files

Files changed (5) hide show

Dockerfile +12 -14
Obesity prediction.csv +0 -0
app.py +343 -0
obesity_model.pkl +3 -0
requirements.txt +9 -3

Dockerfile CHANGED Viewed

@@ -1,21 +1,19 @@
-FROM python:3.9-slim
-WORKDIR /app
-RUN apt-get update && apt-get install -y \
-    build-essential \
-    curl \
-    software-properties-common \
-    git \
-    && rm -rf /var/lib/apt/lists/*
-COPY requirements.txt ./
-COPY src/ ./src/
-RUN pip3 install -r requirements.txt
 EXPOSE 8501
-HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
-ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]

+# Use a lightweight Python base image
+FROM python:3.10-slim
+# Set working directory
+WORKDIR /app
+# Copy everything into the container
+COPY . .
+# Install dependencies
+RUN pip install --upgrade pip && \
+    pip install -r requirements.txt
+# Expose Streamlit's default port
 EXPOSE 8501
+# Run your Streamlit app
+CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]

Obesity prediction.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

app.py ADDED Viewed

	@@ -0,0 +1,343 @@

+import streamlit as st
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+import joblib
+import numpy as np
+from fpdf import FPDF
+import tempfile
+import plotly.express as px
+# Load the dataset
+@st.cache_data
+def load_data():
+    return pd.read_csv("Obesity prediction.csv")
+df = load_data()
+# Load the model and supporting objects
+@st.cache_resource
+def load_model():
+    model_bundle = joblib.load("obesity_model.pkl")
+    model = model_bundle['model']
+    label_encoders = model_bundle['encoders']
+    scaler = model_bundle['scaler']
+    feature_names = model_bundle['feature_names']
+    return model, scaler, label_encoders, feature_names
+model, scaler, label_encoders, feature_names = load_model()
+# Emoji mapping
+emoji_map = {
+    "FAVC": "🍕", "CH2O": "🧃", "Gender": "🚻", "Age": "🎂", "Height": "📏", "Weight": "⚖️",
+    "family_history": "👨‍👩‍👧‍👦", "FCVC": "🥗", "NCP": "🍽️", "CAEC": "🍩", "SMOKE": "🚬",
+    "SCC": "📊", "FAF": "🏃", "TUE": "💻", "CALC": "🍷", "MTRANS": "🚌"
+}
+# Navigation Sidebar
+st.sidebar.title("🧭 Navigation")
+page = st.sidebar.radio("Go to", ["📘 Introduction", "📊 EDA", "🔮 Predict", "📌 Feature Importance"])
+# Introduction Page
+if page == "📘 Introduction":
+    st.title("🏥 Obesity Prediction Analysis")
+    st.markdown("""
+    ## 📌 Project Overview
+    This project analyzes factors contributing to obesity and builds machine learning models to predict obesity levels.
+    🎯 **Objective:** Identify key lifestyle and physical attributes that influence obesity and use them for accurate predictions.
+    ### 📂 Dataset Information:
+    """)
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        st.metric("📄 Total Records", df.shape[0])
+    with col2:
+        st.metric("🧾 Total Features", df.shape[1])
+    with col3:
+        st.metric("⚖️ Obesity Classes", df['Obesity'].nunique())
+    st.subheader("🔍 Sample Data")
+    # for col in df.columns:
+    #     if df[col].dtype == 'object':
+    #         df[col] = df[col].apply(lambda x: str(x) if not isinstance(x, (int, float, bool, str)) else x)
+    tempdf= df.astype(str)
+    st.dataframe(tempdf.head())
+    st.subheader("📊 Dataset Features")
+    st.markdown("""
+    - **🎯 Target Variable:** `Obesity` — Represents different levels of obesity.
+    - **🧬 Input Features:** Includes both physical and behavioral attributes such as:
+        - 👤 **Demographics:** `Age`, `Gender`
+        - 📏 **Physical Metrics:** `Height`, `Weight`
+        - 🍔 **Dietary Habits:** Frequent consumption of high-calorie food (FAVC), number of main meals, vegetable intake (FCVC), etc.
+        - 🏃 **Activity Level:** Physical activity frequency (FAF), use of technology (TUE), transportation type, etc.
+        - 🚬 **Other Habits:** Smoking, alcohol intake, daily water intake (CH2O), etc.
+    """)
+    st.subheader("🎯 Project Goals")
+    st.markdown("""
+    - 📈 Understand the correlation between lifestyle factors and obesity.
+    - 🧠 Visualize and analyze health-related behavior patterns.
+    - 🤖 Build a predictive model to classify individuals into obesity categories.
+    """)
+    st.subheader("⚙️ How the Model Works")
+    st.markdown("""
+    - 🧹 Data is preprocessed using **Label Encoding** and **Feature Scaling**.
+    - 🌲 A pre-trained **Random Forest Classifier** is used for making predictions.
+    - 🧾 Users enter their information through an interactive input form.
+    - 📊 The model processes inputs and returns a predicted **obesity level**.
+    """)
+# 📊 EDA Page
+elif page == "📊 EDA":
+    st.title("📊 Exploratory Data Analysis")
+    with st.expander("1️⃣ 📋 Dataset Basic Information", expanded=False):
+        col1, col2 = st.columns(2)
+        tempdf1 = df
+        with col1:
+            st.write("🧬 **Data Types:**")
+            st.write(tempdf1.dtypes)
+        with col2:
+            st.write("❓ **Missing Values:**")
+            st.write(tempdf1.isnull().sum())
+    with st.expander("2️⃣ 📈 Summary Statistics"):
+        st.write(tempdf1.describe())
+    with st.expander("3️⃣ 🧮 Obesity Distribution"):
+        col1, col2 = st.columns(2)
+        with col1:
+            obesity_counts = df['Obesity'].value_counts().reset_index()
+            obesity_counts.columns = ['Obesity Level', 'Count']
+            fig = px.bar(
+                obesity_counts,
+                x='Obesity Level', y='Count',
+                color='Obesity Level',
+                color_discrete_sequence=px.colors.qualitative.Set3,
+                labels={'Obesity Level': 'Obesity Level', 'Count': 'Count'},
+                title="📊 Obesity Levels Count",
+                hover_data=['Count']
+            )
+            st.plotly_chart(fig)
+        with col2:
+            obesity_pct = df['Obesity'].value_counts(normalize=True).reset_index()
+            obesity_pct.columns = ['Obesity Level', 'Proportion']
+            fig = px.pie(
+                obesity_pct,
+                names='Obesity Level', values='Proportion',
+                color_discrete_sequence=px.colors.qualitative.Pastel,
+                title="📌 Obesity Distribution (%)",
+                hole=0.3
+            )
+            st.plotly_chart(fig)
+    with st.expander("4️⃣ 🔢 Numerical Features Distribution"):
+        numerical_cols = df.select_dtypes(include=[np.number]).columns[:4]
+        for col in numerical_cols:
+            fig = px.histogram(
+                df, x=col,
+                nbins=20,
+                title=f"📉 Distribution of {col}",
+                color_discrete_sequence=['#636EFA']
+            )
+            st.plotly_chart(fig)
+    with st.expander("5️⃣ 🔗 Correlation Matrix"):
+        corr = df.corr(numeric_only=True)
+        fig = px.imshow(
+            corr,
+            text_auto=True,
+            color_continuous_scale='RdBu_r',
+            title="📊 Feature Correlations",
+            width=400,
+            height=700
+        )
+        st.plotly_chart(fig)
+    with st.expander("6️⃣ 📦 Outlier Detection (Box Plots)"):
+        for col in df.select_dtypes(include=[np.number]).columns[:6]:
+            fig = px.box(df, y=col, title=f"📦 Box Plot for {col}")
+            st.plotly_chart(fig)
+    with st.expander("7️⃣ 😻 Gender vs Obesity Analysis"):
+        fig = px.histogram(
+            df, x='Obesity', color='Gender',
+            barmode='group',
+            title="😻 Obesity Distribution by Gender",
+            color_discrete_sequence=px.colors.qualitative.Vivid
+        )
+        st.plotly_chart(fig)
+    with st.expander("8️⃣ 👶 Age vs Obesity Analysis"):
+        fig = px.box(
+            df, x='Obesity', y='Age',
+            color='Obesity',
+            title="👶 Age Distribution by Obesity Level"
+        )
+        st.plotly_chart(fig)
+    with st.expander("9️⃣ 👨‍👧 Family History vs Obesity"):
+        fig = px.histogram(
+            df, x='Obesity', color='family_history',
+            barmode='group',
+            title="👨‍👧 Obesity Distribution by Family History"
+        )
+        st.plotly_chart(fig)
+    with st.expander("🔹 🏃‍♂️ Physical Activity Frequency (FAF) Analysis"):
+        fig = px.box(
+            df, x='Obesity', y='FAF',
+            color='Obesity',
+            title="🏃‍♂️ Physical Activity Frequency by Obesity Level"
+        )
+        st.plotly_chart(fig)
+    with st.expander("1️⃣ 1️⃣ 💧 Water Consumption (CH2O) Analysis"):
+        fig = px.box(
+            df, x='Obesity', y='CH2O',
+            color='Obesity',
+            title="💧 Daily Water Consumption by Obesity Level"
+        )
+        st.plotly_chart(fig)
+    with st.expander("1️⃣ 2️⃣ 🍕 High Caloric Food Consumption (FAVC) Analysis"):
+        fig = px.histogram(
+            df, x='Obesity', color='FAVC',
+            barmode='group',
+            title="🍕 Obesity Distribution by High Caloric Food Consumption"
+        )
+        st.plotly_chart(fig)
+    with st.expander("1️⃣ 3️⃣ 💻 Technology Usage Time (TUE) Analysis"):
+        fig = px.box(
+            df, x='Obesity', y='TUE',
+            color='Obesity',
+            title="💻 Technology Usage Time by Obesity Level"
+        )
+        st.plotly_chart(fig)
+    with st.expander("1️⃣ 4️⃣ 🍷 Alcohol Consumption (CALC) Analysis"):
+        fig = px.histogram(
+            df, x='Obesity', color='CALC',
+            barmode='group',
+            title="🍷 Obesity Distribution by Alcohol Consumption"
+        )
+        st.plotly_chart(fig)
+    with st.expander("1️⃣ 5️⃣ 🚗 Transportation Mode (MTRANS) vs Obesity"):
+        fig = px.histogram(
+            df, x='MTRANS', color='Obesity',
+            barmode='group',
+            title="🚗 Transportation Mode vs Obesity Levels"
+        )
+        st.plotly_chart(fig)
+# 🔮 Predict Page
+elif page == "🔮 Predict":
+    st.title("🔮 Obesity Prediction")
+    st.markdown("Fill in the details below to predict your obesity level:")
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        gender = st.selectbox("🚻 Gender", ["Male", "Female"])
+        age = st.number_input("📅 Age", 10, 100, 25)
+        height = st.number_input("📏 Height (m)", 1.0, 2.5, 1.70)
+        weight = st.number_input("⚖️ Weight (kg)", 30, 200, 70)
+        family_history = st.selectbox("🧬 Family History of Obesity", ["yes", "no"])
+    with col2:
+        favc = st.selectbox("🍔 Frequent High-Calorie Food (FAVC)", ["yes", "no"])
+        fcvc = st.slider("🥦 Veggie Intake Frequency (FCVC)", 1.0, 3.0, 2.0)
+        ncp = st.number_input("🍽️ Number of Main Meals (NCP)", 1.0, 4.0, 3.0)
+        caec = st.selectbox("🍟 Snacking Between Meals (CAEC)", ["no", "Sometimes", "Frequently", "Always"])
+        smoke = st.selectbox("🚬 Do you Smoke?", ["yes", "no"])
+    with col3:
+        ch2o = st.slider("💧 Water Intake (CH2O)", 0.0, 3.0, 1.0)
+        scc = st.selectbox("📉 Calorie Monitoring (SCC)", ["yes", "no"])
+        faf = st.slider("🏃 Physical Activity (FAF)", 0.0, 3.0, 1.0)
+        tue = st.slider("📱 Tech Usage Time (TUE)", 0.0, 3.0, 1.0)
+        calc = st.selectbox("🍷 Alcohol (CALC)", ["no", "Sometimes", "Frequently", "Always"])
+        mtrans = st.selectbox("🚗 Transport Mode (MTRANS)", ["Walking", "Public_Transportation", "Automobile", "Bike", "Motorbike"])
+    input_data = {
+        "Gender": gender, "Age": age, "Height": height, "Weight": weight,
+        "family_history": family_history, "FAVC": favc, "FCVC": fcvc, "NCP": ncp,
+        "CAEC": caec, "SMOKE": smoke, "CH2O": ch2o, "SCC": scc,
+        "FAF": faf, "TUE": tue, "CALC": calc, "MTRANS": mtrans
+    }
+    if st.button("🔍 Predict"):
+        input_df = pd.DataFrame([input_data])
+        # Apply Label Encoding
+        for col in input_df.columns:
+            if col in label_encoders:
+                input_df[col] = label_encoders[col].transform(input_df[col])
+        # Scale features
+        input_scaled = scaler.transform(input_df)
+        # Predict
+        prediction = model.predict(input_scaled)
+        # ✅ Decode the numeric prediction
+        decoded_prediction = label_encoders["Obesity"].inverse_transform([prediction[0]])[0]
+        # Save decoded prediction in session_state for PDF/report use
+        st.session_state["prediction"] = decoded_prediction
+        st.session_state["input_data"] = input_data
+        # Display result
+        st.success(f"🎯 **Predicted Obesity Level**: `{decoded_prediction}`")
+# 📌 Feature Importance Page (Interactive with Plotly)
+elif page == "📌 Feature Importance":
+    st.title("📌 Feature Importance")
+    importances = model.feature_importances_
+    sorted_idx = np.argsort(importances)[::-1]
+    sorted_features = [feature_names[i] for i in sorted_idx]
+    sorted_importances = importances[sorted_idx]
+    # 🏆 Top 5 Features
+    top_features = sorted_features[:5]
+    top_importances = sorted_importances[:5]
+    top_labels = [f"{emoji_map.get(f, '')} {f}" for f in top_features]
+    # 📊 Create Plotly Bar Chart
+    fig = px.bar(
+        x=top_importances[::-1],
+        y=top_labels[::-1],
+        orientation='h',
+        labels={'x': 'Importance', 'y': 'Feature'},
+        color=top_importances[::-1],
+        color_continuous_scale='Turbo',
+        title="🎯 Top 5 Influential Features",
+        text=[f"{val:.2f}" for val in top_importances[::-1]]
+    )
+    fig.update_layout(
+        xaxis_title="Importance Score",
+        yaxis_title="",
+        plot_bgcolor="rgba(0,0,0,0)",
+        paper_bgcolor="rgba(0,0,0,0)",
+        font=dict(size=14),
+        coloraxis_showscale=False
+    )
+    fig.update_traces(textposition='outside', marker_line_width=1.2)
+    st.plotly_chart(fig, use_container_width=True)
+    st.markdown("✨ These features contribute the most to your predicted obesity level.")

obesity_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ca9e412767abbb71f5d6ccb5dba63b128516ecaa868f5de49eb66f34ca5dbe7e
+size 5211497

requirements.txt CHANGED Viewed

@@ -1,3 +1,9 @@
-altair
-pandas
-streamlit

+streamlit==1.35.0
+pandas==2.2.2
+seaborn==0.13.2
+matplotlib==3.8.4
+joblib==1.4.2
+numpy==1.26.4
+fpdf==1.7.2
+plotly==5.22.0
+scikit-learn==1.4.2