Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import joblib | |
| import numpy as np | |
| import tempfile | |
| import plotly.express as px | |
| # Load the dataset | |
| def load_data(): | |
| return pd.read_csv("Obesity prediction.csv") | |
| df = load_data() | |
| # Load the model and supporting objects | |
| def load_model(): | |
| model_bundle = joblib.load("obesity_model.pkl") | |
| model = model_bundle['model'] | |
| label_encoders = model_bundle['encoders'] | |
| scaler = model_bundle['scaler'] | |
| feature_names = model_bundle['feature_names'] | |
| return model, scaler, label_encoders, feature_names | |
| model, scaler, label_encoders, feature_names = load_model() | |
| # Emoji mapping | |
| emoji_map = { | |
| "FAVC": "๐", "CH2O": "๐ง", "Gender": "๐ป", "Age": "๐", "Height": "๐", "Weight": "โ๏ธ", | |
| "family_history": "๐จโ๐ฉโ๐งโ๐ฆ", "FCVC": "๐ฅ", "NCP": "๐ฝ๏ธ", "CAEC": "๐ฉ", "SMOKE": "๐ฌ", | |
| "SCC": "๐", "FAF": "๐", "TUE": "๐ป", "CALC": "๐ท", "MTRANS": "๐" | |
| } | |
| # Navigation Sidebar | |
| st.sidebar.title("๐งญ Navigation") | |
| page = st.sidebar.radio("Go to", ["๐ Introduction", "๐ EDA", "๐ฎ Predict", "๐ Feature Importance"]) | |
| # Introduction Page | |
| if page == "๐ Introduction": | |
| st.title("๐ฅ Obesity Prediction App") | |
| st.markdown(""" | |
| ## ๐ App Overview | |
| This app allows users to analyze lifestyle factors and predict obesity levels using a machine learning model. | |
| ๐ฏ **Objective:** | |
| Provide users with personalized predictions about obesity levels based on their lifestyle and physical attributes. | |
| ### ๐ Dataset Foundation: | |
| """) | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| st.metric("๐ Total Records", df.shape[0]) | |
| with col2: | |
| st.metric("๐งพ Total Features", df.shape[1]) | |
| with col3: | |
| st.metric("โ๏ธ Obesity Classes", df['Obesity'].nunique()) | |
| st.subheader("๐ Sample Data") | |
| tempdf = df.astype(str) | |
| st.dataframe(tempdf.head()) | |
| st.subheader("๐ Dataset Features") | |
| st.markdown(""" | |
| - **๐ฏ Target Variable:** `Obesity` โ Represents different levels of obesity. | |
| - **๐งฌ Input Features:** The app takes both physical and behavioral attributes such as: | |
| - ๐ค **Demographics:** `Age`, `Gender` | |
| - ๐ **Physical Metrics:** `Height`, `Weight` | |
| - ๐ **Dietary Habits:** Frequency of high-calorie food (FAVC), number of main meals, vegetable intake (FCVC), etc. | |
| - ๐ **Activity Level:** Physical activity frequency (FAF), use of technology (TUE), transportation type, etc. | |
| - ๐ฌ **Other Habits:** Smoking, alcohol intake, daily water intake (CH2O), etc. | |
| """) | |
| st.subheader("๐ฏ App Goals") | |
| st.markdown(""" | |
| - ๐ Help users understand how lifestyle factors relate to obesity. | |
| - ๐ง Provide interactive visualizations to explore health behavior patterns. | |
| - ๐ค Offer personalized obesity level predictions. | |
| """) | |
| st.subheader("โ๏ธ How the App Works") | |
| st.markdown(""" | |
| - ๐งน User inputs are preprocessed using **Label Encoding**. | |
| - ๐ฒ The app uses a trained **Random Forest Classifier** to predict obesity levels. | |
| - ๐งพ Users enter their details via a friendly input form. | |
| - ๐ The app displays the predicted **obesity level** along with helpful visual feedback. | |
| """) | |
| # ๐ EDA Page | |
| elif page == "๐ EDA": | |
| st.title("๐ Exploratory Data Analysis") | |
| with st.expander("1๏ธโฃ ๐ Dataset Basic Information", expanded=False): | |
| col1, col2 = st.columns(2) | |
| tempdf1 = df | |
| with col1: | |
| st.write("๐งฌ **Data Types:**") | |
| st.write(tempdf1.dtypes) | |
| with col2: | |
| st.write("โ **Missing Values:**") | |
| st.write(tempdf1.isnull().sum()) | |
| with st.expander("2๏ธโฃ ๐ Summary Statistics"): | |
| st.write(tempdf1.describe()) | |
| with st.expander("3๏ธโฃ ๐งฎ Obesity Distribution"): | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| obesity_counts = df['Obesity'].value_counts().reset_index() | |
| obesity_counts.columns = ['Obesity Level', 'Count'] | |
| fig = px.bar( | |
| obesity_counts, | |
| x='Obesity Level', y='Count', | |
| color='Obesity Level', | |
| color_discrete_sequence=px.colors.qualitative.Set3, | |
| labels={'Obesity Level': 'Obesity Level', 'Count': 'Count'}, | |
| title="๐ Obesity Levels Count", | |
| hover_data=['Count'] | |
| ) | |
| st.plotly_chart(fig) | |
| with col2: | |
| obesity_pct = df['Obesity'].value_counts(normalize=True).reset_index() | |
| obesity_pct.columns = ['Obesity Level', 'Proportion'] | |
| fig = px.pie( | |
| obesity_pct, | |
| names='Obesity Level', values='Proportion', | |
| color_discrete_sequence=px.colors.qualitative.Pastel, | |
| title="๐ Obesity Distribution (%)", | |
| hole=0.3 | |
| ) | |
| st.plotly_chart(fig) | |
| with st.expander("4๏ธโฃ ๐ข Numerical Features Distribution"): | |
| numerical_cols = df.select_dtypes(include=[np.number]).columns[:4] | |
| for col in numerical_cols: | |
| fig = px.histogram( | |
| df, x=col, | |
| nbins=20, | |
| title=f"๐ Distribution of {col}", | |
| color_discrete_sequence=['#636EFA'] | |
| ) | |
| st.plotly_chart(fig) | |
| with st.expander("5๏ธโฃ ๐ Correlation Matrix"): | |
| corr = df.corr(numeric_only=True) | |
| fig = px.imshow( | |
| corr, | |
| text_auto=True, | |
| color_continuous_scale='RdBu_r', | |
| title="๐ Feature Correlations", | |
| width=400, | |
| height=700 | |
| ) | |
| st.plotly_chart(fig) | |
| with st.expander("6๏ธโฃ ๐ฆ Outlier Detection (Box Plots)"): | |
| for col in df.select_dtypes(include=[np.number]).columns[:6]: | |
| fig = px.box(df, y=col, title=f"๐ฆ Box Plot for {col}") | |
| st.plotly_chart(fig) | |
| with st.expander("7๏ธโฃ ๐ป Gender vs Obesity Analysis"): | |
| fig = px.histogram( | |
| df, x='Obesity', color='Gender', | |
| barmode='group', | |
| title="๐ป Obesity Distribution by Gender", | |
| color_discrete_sequence=px.colors.qualitative.Vivid | |
| ) | |
| st.plotly_chart(fig) | |
| with st.expander("8๏ธโฃ ๐ถ Age vs Obesity Analysis"): | |
| fig = px.box( | |
| df, x='Obesity', y='Age', | |
| color='Obesity', | |
| title="๐ถ Age Distribution by Obesity Level" | |
| ) | |
| st.plotly_chart(fig) | |
| with st.expander("9๏ธโฃ ๐จโ๐ง Family History vs Obesity"): | |
| fig = px.histogram( | |
| df, x='Obesity', color='family_history', | |
| barmode='group', | |
| title="๐จโ๐ง Obesity Distribution by Family History" | |
| ) | |
| st.plotly_chart(fig) | |
| with st.expander("๐น ๐โโ๏ธ Physical Activity Frequency (FAF) Analysis"): | |
| fig = px.box( | |
| df, x='Obesity', y='FAF', | |
| color='Obesity', | |
| title="๐โโ๏ธ Physical Activity Frequency by Obesity Level" | |
| ) | |
| st.plotly_chart(fig) | |
| with st.expander("1๏ธโฃ 1๏ธโฃ ๐ง Water Consumption (CH2O) Analysis"): | |
| fig = px.box( | |
| df, x='Obesity', y='CH2O', | |
| color='Obesity', | |
| title="๐ง Daily Water Consumption by Obesity Level" | |
| ) | |
| st.plotly_chart(fig) | |
| with st.expander("1๏ธโฃ 2๏ธโฃ ๐ High Caloric Food Consumption (FAVC) Analysis"): | |
| fig = px.histogram( | |
| df, x='Obesity', color='FAVC', | |
| barmode='group', | |
| title="๐ Obesity Distribution by High Caloric Food Consumption" | |
| ) | |
| st.plotly_chart(fig) | |
| with st.expander("1๏ธโฃ 3๏ธโฃ ๐ป Technology Usage Time (TUE) Analysis"): | |
| fig = px.box( | |
| df, x='Obesity', y='TUE', | |
| color='Obesity', | |
| title="๐ป Technology Usage Time by Obesity Level" | |
| ) | |
| st.plotly_chart(fig) | |
| with st.expander("1๏ธโฃ 4๏ธโฃ ๐ท Alcohol Consumption (CALC) Analysis"): | |
| fig = px.histogram( | |
| df, x='Obesity', color='CALC', | |
| barmode='group', | |
| title="๐ท Obesity Distribution by Alcohol Consumption" | |
| ) | |
| st.plotly_chart(fig) | |
| with st.expander("1๏ธโฃ 5๏ธโฃ ๐ Transportation Mode (MTRANS) vs Obesity"): | |
| fig = px.histogram( | |
| df, x='MTRANS', color='Obesity', | |
| barmode='group', | |
| title="๐ Transportation Mode vs Obesity Levels" | |
| ) | |
| st.plotly_chart(fig) | |
| # ๐ฎ Predict Page | |
| elif page == "๐ฎ Predict": | |
| st.title("๐ฎ Obesity Prediction") | |
| st.markdown("Fill in the details below to predict your obesity level:") | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| gender = st.selectbox("๐ป Gender", ["Male", "Female"]) | |
| age = st.number_input("๐ Age", 10, 100, 25) | |
| height = st.number_input("๐ Height (m)", 1.0, 2.5, 1.70) | |
| weight = st.number_input("โ๏ธ Weight (kg)", 30, 200, 70) | |
| family_history = st.selectbox("๐งฌ Family History of Obesity", ["yes", "no"]) | |
| with col2: | |
| favc = st.selectbox("๐ Frequent High-Calorie Food (FAVC)", ["yes", "no"]) | |
| fcvc = st.slider("๐ฅฆ Veggie Intake Frequency (FCVC)", 1.0, 3.0, 2.0) | |
| ncp = st.number_input("๐ฝ๏ธ Number of Main Meals (NCP)", 1.0, 4.0, 3.0) | |
| caec = st.selectbox("๐ Snacking Between Meals (CAEC)", ["no", "Sometimes", "Frequently", "Always"]) | |
| smoke = st.selectbox("๐ฌ Do you Smoke?", ["yes", "no"]) | |
| with col3: | |
| ch2o = st.slider("๐ง Water Intake (CH2O)", 0.0, 3.0, 1.0) | |
| scc = st.selectbox("๐ Calorie Monitoring (SCC)", ["yes", "no"]) | |
| faf = st.slider("๐ Physical Activity (FAF)", 0.0, 3.0, 1.0) | |
| tue = st.slider("๐ฑ Tech Usage Time (TUE)", 0.0, 3.0, 1.0) | |
| calc = st.selectbox("๐ท Alcohol (CALC)", ["no", "Sometimes", "Frequently", "Always"]) | |
| mtrans = st.selectbox("๐ Transport Mode (MTRANS)", ["Walking", "Public_Transportation", "Automobile", "Bike", "Motorbike"]) | |
| input_data = { | |
| "Gender": gender, "Age": age, "Height": height, "Weight": weight, | |
| "family_history": family_history, "FAVC": favc, "FCVC": fcvc, "NCP": ncp, | |
| "CAEC": caec, "SMOKE": smoke, "CH2O": ch2o, "SCC": scc, | |
| "FAF": faf, "TUE": tue, "CALC": calc, "MTRANS": mtrans | |
| } | |
| if st.button("๐ Predict"): | |
| input_df = pd.DataFrame([input_data]) | |
| # Apply Label Encoding | |
| for col in input_df.columns: | |
| if col in label_encoders: | |
| input_df[col] = label_encoders[col].transform(input_df[col]) | |
| # Scale features | |
| input_scaled = scaler.transform(input_df) | |
| # Predict | |
| prediction = model.predict(input_scaled) | |
| # โ Decode the numeric prediction | |
| decoded_prediction = label_encoders["Obesity"].inverse_transform([prediction[0]])[0] | |
| # Save decoded prediction in session_state for PDF/report use | |
| st.session_state["prediction"] = decoded_prediction | |
| st.session_state["input_data"] = input_data | |
| # Display result | |
| st.success(f"๐ฏ **Predicted Obesity Level**: `{decoded_prediction}`") | |
| # ๐ Feature Importance Page (Interactive with Plotly) | |
| elif page == "๐ Feature Importance": | |
| st.title("๐ Feature Importance") | |
| importances = model.feature_importances_ | |
| sorted_idx = np.argsort(importances)[::-1] | |
| sorted_features = [feature_names[i] for i in sorted_idx] | |
| sorted_importances = importances[sorted_idx] | |
| # ๐ Top 5 Features | |
| top_features = sorted_features[:5] | |
| top_importances = sorted_importances[:5] | |
| top_labels = [f"{emoji_map.get(f, '')} {f}" for f in top_features] | |
| # ๐ Create Plotly Bar Chart | |
| fig = px.bar( | |
| x=top_importances[::-1], | |
| y=top_labels[::-1], | |
| orientation='h', | |
| labels={'x': 'Importance', 'y': 'Feature'}, | |
| color=top_importances[::-1], | |
| color_continuous_scale='Turbo', | |
| title="๐ฏ Top 5 Influential Features", | |
| text=[f"{val:.2f}" for val in top_importances[::-1]] | |
| ) | |
| fig.update_layout( | |
| xaxis_title="Importance Score", | |
| yaxis_title="", | |
| plot_bgcolor="rgba(0,0,0,0)", | |
| paper_bgcolor="rgba(0,0,0,0)", | |
| font=dict(size=14), | |
| coloraxis_showscale=False | |
| ) | |
| fig.update_traces(textposition='outside', marker_line_width=1.2) | |
| st.plotly_chart(fig, use_container_width=True) | |
| st.markdown("โจ These features contribute the most to your predicted obesity level.") | |