import streamlit as st import pandas as pd import joblib import numpy as np import tempfile import plotly.express as px # Load the dataset @st.cache_data def load_data(): return pd.read_csv("Obesity prediction.csv") df = load_data() # Load the model and supporting objects @st.cache_resource def load_model(): model_bundle = joblib.load("obesity_model.pkl") model = model_bundle['model'] label_encoders = model_bundle['encoders'] scaler = model_bundle['scaler'] feature_names = model_bundle['feature_names'] return model, scaler, label_encoders, feature_names model, scaler, label_encoders, feature_names = load_model() # Emoji mapping emoji_map = { "FAVC": "๐Ÿ•", "CH2O": "๐Ÿงƒ", "Gender": "๐Ÿšป", "Age": "๐ŸŽ‚", "Height": "๐Ÿ“", "Weight": "โš–๏ธ", "family_history": "๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ", "FCVC": "๐Ÿฅ—", "NCP": "๐Ÿฝ๏ธ", "CAEC": "๐Ÿฉ", "SMOKE": "๐Ÿšฌ", "SCC": "๐Ÿ“Š", "FAF": "๐Ÿƒ", "TUE": "๐Ÿ’ป", "CALC": "๐Ÿท", "MTRANS": "๐ŸšŒ" } # Navigation Sidebar st.sidebar.title("๐Ÿงญ Navigation") page = st.sidebar.radio("Go to", ["๐Ÿ“˜ Introduction", "๐Ÿ“Š EDA", "๐Ÿ”ฎ Predict", "๐Ÿ“Œ Feature Importance"]) # Introduction Page if page == "๐Ÿ“˜ Introduction": st.title("๐Ÿฅ Obesity Prediction App") st.markdown(""" ## ๐Ÿ“Œ App Overview This app allows users to analyze lifestyle factors and predict obesity levels using a machine learning model. ๐ŸŽฏ **Objective:** Provide users with personalized predictions about obesity levels based on their lifestyle and physical attributes. ### ๐Ÿ“‚ Dataset Foundation: """) col1, col2, col3 = st.columns(3) with col1: st.metric("๐Ÿ“„ Total Records", df.shape[0]) with col2: st.metric("๐Ÿงพ Total Features", df.shape[1]) with col3: st.metric("โš–๏ธ Obesity Classes", df['Obesity'].nunique()) st.subheader("๐Ÿ” Sample Data") tempdf = df.astype(str) st.dataframe(tempdf.head()) st.subheader("๐Ÿ“Š Dataset Features") st.markdown(""" - **๐ŸŽฏ Target Variable:** `Obesity` โ€” Represents different levels of obesity. - **๐Ÿงฌ Input Features:** The app takes both physical and behavioral attributes such as: - ๐Ÿ‘ค **Demographics:** `Age`, `Gender` - ๐Ÿ“ **Physical Metrics:** `Height`, `Weight` - ๐Ÿ” **Dietary Habits:** Frequency of high-calorie food (FAVC), number of main meals, vegetable intake (FCVC), etc. - ๐Ÿƒ **Activity Level:** Physical activity frequency (FAF), use of technology (TUE), transportation type, etc. - ๐Ÿšฌ **Other Habits:** Smoking, alcohol intake, daily water intake (CH2O), etc. """) st.subheader("๐ŸŽฏ App Goals") st.markdown(""" - ๐Ÿ“ˆ Help users understand how lifestyle factors relate to obesity. - ๐Ÿง  Provide interactive visualizations to explore health behavior patterns. - ๐Ÿค– Offer personalized obesity level predictions. """) st.subheader("โš™๏ธ How the App Works") st.markdown(""" - ๐Ÿงน User inputs are preprocessed using **Label Encoding**. - ๐ŸŒฒ The app uses a trained **Random Forest Classifier** to predict obesity levels. - ๐Ÿงพ Users enter their details via a friendly input form. - ๐Ÿ“Š The app displays the predicted **obesity level** along with helpful visual feedback. """) # ๐Ÿ“Š EDA Page elif page == "๐Ÿ“Š EDA": st.title("๐Ÿ“Š Exploratory Data Analysis") with st.expander("1๏ธโƒฃ ๐Ÿ“‹ Dataset Basic Information", expanded=False): col1, col2 = st.columns(2) tempdf1 = df with col1: st.write("๐Ÿงฌ **Data Types:**") st.write(tempdf1.dtypes) with col2: st.write("โ“ **Missing Values:**") st.write(tempdf1.isnull().sum()) with st.expander("2๏ธโƒฃ ๐Ÿ“ˆ Summary Statistics"): st.write(tempdf1.describe()) with st.expander("3๏ธโƒฃ ๐Ÿงฎ Obesity Distribution"): col1, col2 = st.columns(2) with col1: obesity_counts = df['Obesity'].value_counts().reset_index() obesity_counts.columns = ['Obesity Level', 'Count'] fig = px.bar( obesity_counts, x='Obesity Level', y='Count', color='Obesity Level', color_discrete_sequence=px.colors.qualitative.Set3, labels={'Obesity Level': 'Obesity Level', 'Count': 'Count'}, title="๐Ÿ“Š Obesity Levels Count", hover_data=['Count'] ) st.plotly_chart(fig) with col2: obesity_pct = df['Obesity'].value_counts(normalize=True).reset_index() obesity_pct.columns = ['Obesity Level', 'Proportion'] fig = px.pie( obesity_pct, names='Obesity Level', values='Proportion', color_discrete_sequence=px.colors.qualitative.Pastel, title="๐Ÿ“Œ Obesity Distribution (%)", hole=0.3 ) st.plotly_chart(fig) with st.expander("4๏ธโƒฃ ๐Ÿ”ข Numerical Features Distribution"): numerical_cols = df.select_dtypes(include=[np.number]).columns[:4] for col in numerical_cols: fig = px.histogram( df, x=col, nbins=20, title=f"๐Ÿ“‰ Distribution of {col}", color_discrete_sequence=['#636EFA'] ) st.plotly_chart(fig) with st.expander("5๏ธโƒฃ ๐Ÿ”— Correlation Matrix"): corr = df.corr(numeric_only=True) fig = px.imshow( corr, text_auto=True, color_continuous_scale='RdBu_r', title="๐Ÿ“Š Feature Correlations", width=400, height=700 ) st.plotly_chart(fig) with st.expander("6๏ธโƒฃ ๐Ÿ“ฆ Outlier Detection (Box Plots)"): for col in df.select_dtypes(include=[np.number]).columns[:6]: fig = px.box(df, y=col, title=f"๐Ÿ“ฆ Box Plot for {col}") st.plotly_chart(fig) with st.expander("7๏ธโƒฃ ๐Ÿ˜ป Gender vs Obesity Analysis"): fig = px.histogram( df, x='Obesity', color='Gender', barmode='group', title="๐Ÿ˜ป Obesity Distribution by Gender", color_discrete_sequence=px.colors.qualitative.Vivid ) st.plotly_chart(fig) with st.expander("8๏ธโƒฃ ๐Ÿ‘ถ Age vs Obesity Analysis"): fig = px.box( df, x='Obesity', y='Age', color='Obesity', title="๐Ÿ‘ถ Age Distribution by Obesity Level" ) st.plotly_chart(fig) with st.expander("9๏ธโƒฃ ๐Ÿ‘จโ€๐Ÿ‘ง Family History vs Obesity"): fig = px.histogram( df, x='Obesity', color='family_history', barmode='group', title="๐Ÿ‘จโ€๐Ÿ‘ง Obesity Distribution by Family History" ) st.plotly_chart(fig) with st.expander("๐Ÿ”น ๐Ÿƒโ€โ™‚๏ธ Physical Activity Frequency (FAF) Analysis"): fig = px.box( df, x='Obesity', y='FAF', color='Obesity', title="๐Ÿƒโ€โ™‚๏ธ Physical Activity Frequency by Obesity Level" ) st.plotly_chart(fig) with st.expander("1๏ธโƒฃ 1๏ธโƒฃ ๐Ÿ’ง Water Consumption (CH2O) Analysis"): fig = px.box( df, x='Obesity', y='CH2O', color='Obesity', title="๐Ÿ’ง Daily Water Consumption by Obesity Level" ) st.plotly_chart(fig) with st.expander("1๏ธโƒฃ 2๏ธโƒฃ ๐Ÿ• High Caloric Food Consumption (FAVC) Analysis"): fig = px.histogram( df, x='Obesity', color='FAVC', barmode='group', title="๐Ÿ• Obesity Distribution by High Caloric Food Consumption" ) st.plotly_chart(fig) with st.expander("1๏ธโƒฃ 3๏ธโƒฃ ๐Ÿ’ป Technology Usage Time (TUE) Analysis"): fig = px.box( df, x='Obesity', y='TUE', color='Obesity', title="๐Ÿ’ป Technology Usage Time by Obesity Level" ) st.plotly_chart(fig) with st.expander("1๏ธโƒฃ 4๏ธโƒฃ ๐Ÿท Alcohol Consumption (CALC) Analysis"): fig = px.histogram( df, x='Obesity', color='CALC', barmode='group', title="๐Ÿท Obesity Distribution by Alcohol Consumption" ) st.plotly_chart(fig) with st.expander("1๏ธโƒฃ 5๏ธโƒฃ ๐Ÿš— Transportation Mode (MTRANS) vs Obesity"): fig = px.histogram( df, x='MTRANS', color='Obesity', barmode='group', title="๐Ÿš— Transportation Mode vs Obesity Levels" ) st.plotly_chart(fig) # ๐Ÿ”ฎ Predict Page elif page == "๐Ÿ”ฎ Predict": st.title("๐Ÿ”ฎ Obesity Prediction") st.markdown("Fill in the details below to predict your obesity level:") col1, col2, col3 = st.columns(3) with col1: gender = st.selectbox("๐Ÿšป Gender", ["Male", "Female"]) age = st.number_input("๐Ÿ“… Age", 10, 100, 25) height = st.number_input("๐Ÿ“ Height (m)", 1.0, 2.5, 1.70) weight = st.number_input("โš–๏ธ Weight (kg)", 30, 200, 70) family_history = st.selectbox("๐Ÿงฌ Family History of Obesity", ["yes", "no"]) with col2: favc = st.selectbox("๐Ÿ” Frequent High-Calorie Food (FAVC)", ["yes", "no"]) fcvc = st.slider("๐Ÿฅฆ Veggie Intake Frequency (FCVC)", 1.0, 3.0, 2.0) ncp = st.number_input("๐Ÿฝ๏ธ Number of Main Meals (NCP)", 1.0, 4.0, 3.0) caec = st.selectbox("๐ŸŸ Snacking Between Meals (CAEC)", ["no", "Sometimes", "Frequently", "Always"]) smoke = st.selectbox("๐Ÿšฌ Do you Smoke?", ["yes", "no"]) with col3: ch2o = st.slider("๐Ÿ’ง Water Intake (CH2O)", 0.0, 3.0, 1.0) scc = st.selectbox("๐Ÿ“‰ Calorie Monitoring (SCC)", ["yes", "no"]) faf = st.slider("๐Ÿƒ Physical Activity (FAF)", 0.0, 3.0, 1.0) tue = st.slider("๐Ÿ“ฑ Tech Usage Time (TUE)", 0.0, 3.0, 1.0) calc = st.selectbox("๐Ÿท Alcohol (CALC)", ["no", "Sometimes", "Frequently", "Always"]) mtrans = st.selectbox("๐Ÿš— Transport Mode (MTRANS)", ["Walking", "Public_Transportation", "Automobile", "Bike", "Motorbike"]) input_data = { "Gender": gender, "Age": age, "Height": height, "Weight": weight, "family_history": family_history, "FAVC": favc, "FCVC": fcvc, "NCP": ncp, "CAEC": caec, "SMOKE": smoke, "CH2O": ch2o, "SCC": scc, "FAF": faf, "TUE": tue, "CALC": calc, "MTRANS": mtrans } if st.button("๐Ÿ” Predict"): input_df = pd.DataFrame([input_data]) # Apply Label Encoding for col in input_df.columns: if col in label_encoders: input_df[col] = label_encoders[col].transform(input_df[col]) # Scale features input_scaled = scaler.transform(input_df) # Predict prediction = model.predict(input_scaled) # โœ… Decode the numeric prediction decoded_prediction = label_encoders["Obesity"].inverse_transform([prediction[0]])[0] # Save decoded prediction in session_state for PDF/report use st.session_state["prediction"] = decoded_prediction st.session_state["input_data"] = input_data # Display result st.success(f"๐ŸŽฏ **Predicted Obesity Level**: `{decoded_prediction}`") # ๐Ÿ“Œ Feature Importance Page (Interactive with Plotly) elif page == "๐Ÿ“Œ Feature Importance": st.title("๐Ÿ“Œ Feature Importance") importances = model.feature_importances_ sorted_idx = np.argsort(importances)[::-1] sorted_features = [feature_names[i] for i in sorted_idx] sorted_importances = importances[sorted_idx] # ๐Ÿ† Top 5 Features top_features = sorted_features[:5] top_importances = sorted_importances[:5] top_labels = [f"{emoji_map.get(f, '')} {f}" for f in top_features] # ๐Ÿ“Š Create Plotly Bar Chart fig = px.bar( x=top_importances[::-1], y=top_labels[::-1], orientation='h', labels={'x': 'Importance', 'y': 'Feature'}, color=top_importances[::-1], color_continuous_scale='Turbo', title="๐ŸŽฏ Top 5 Influential Features", text=[f"{val:.2f}" for val in top_importances[::-1]] ) fig.update_layout( xaxis_title="Importance Score", yaxis_title="", plot_bgcolor="rgba(0,0,0,0)", paper_bgcolor="rgba(0,0,0,0)", font=dict(size=14), coloraxis_showscale=False ) fig.update_traces(textposition='outside', marker_line_width=1.2) st.plotly_chart(fig, use_container_width=True) st.markdown("โœจ These features contribute the most to your predicted obesity level.")