Spaces:

HugMeBytes
/

Obesity_Predictor

Sleeping

App Files Files Community

Obesity_Predictor / app.py

HugMeBytes

Update app.py

a771ffb verified 9 months ago

raw

history blame contribute delete

12.7 kB

	import streamlit as st
	import pandas as pd
	import joblib
	import numpy as np
	import tempfile
	import plotly.express as px

	# Load the dataset
	@st.cache_data
	def load_data():
	return pd.read_csv("Obesity prediction.csv")

	df = load_data()

	# Load the model and supporting objects
	@st.cache_resource
	def load_model():
	model_bundle = joblib.load("obesity_model.pkl")
	model = model_bundle['model']
	label_encoders = model_bundle['encoders']
	scaler = model_bundle['scaler']
	feature_names = model_bundle['feature_names']
	return model, scaler, label_encoders, feature_names

	model, scaler, label_encoders, feature_names = load_model()

	# Emoji mapping
	emoji_map = {
	"FAVC": "🍕", "CH2O": "🧃", "Gender": "🚻", "Age": "🎂", "Height": "📏", "Weight": "⚖️",
	"family_history": "👨‍👩‍👧‍👦", "FCVC": "🥗", "NCP": "🍽️", "CAEC": "🍩", "SMOKE": "🚬",
	"SCC": "📊", "FAF": "🏃", "TUE": "💻", "CALC": "🍷", "MTRANS": "🚌"
	}

	# Navigation Sidebar
	st.sidebar.title("🧭 Navigation")
	page = st.sidebar.radio("Go to", ["📘 Introduction", "📊 EDA", "🔮 Predict", "📌 Feature Importance"])

	# Introduction Page
	if page == "📘 Introduction":
	st.title("🏥 Obesity Prediction App")
	st.markdown("""
	## 📌 App Overview
	This app allows users to analyze lifestyle factors and predict obesity levels using a machine learning model.

	🎯 Objective:
	Provide users with personalized predictions about obesity levels based on their lifestyle and physical attributes.

	### 📂 Dataset Foundation:
	""")

	col1, col2, col3 = st.columns(3)
	with col1:
	st.metric("📄 Total Records", df.shape[0])
	with col2:
	st.metric("🧾 Total Features", df.shape[1])
	with col3:
	st.metric("⚖️ Obesity Classes", df['Obesity'].nunique())

	st.subheader("🔍 Sample Data")
	tempdf = df.astype(str)
	st.dataframe(tempdf.head())

	st.subheader("📊 Dataset Features")
	st.markdown("""
	- 🎯 Target Variable: `Obesity` — Represents different levels of obesity.
	- 🧬 Input Features: The app takes both physical and behavioral attributes such as:
	- 👤 Demographics: `Age`, `Gender`
	- 📏 Physical Metrics: `Height`, `Weight`
	- 🍔 Dietary Habits: Frequency of high-calorie food (FAVC), number of main meals, vegetable intake (FCVC), etc.
	- 🏃 Activity Level: Physical activity frequency (FAF), use of technology (TUE), transportation type, etc.
	- 🚬 Other Habits: Smoking, alcohol intake, daily water intake (CH2O), etc.
	""")

	st.subheader("🎯 App Goals")
	st.markdown("""
	- 📈 Help users understand how lifestyle factors relate to obesity.
	- 🧠 Provide interactive visualizations to explore health behavior patterns.
	- 🤖 Offer personalized obesity level predictions.
	""")

	st.subheader("⚙️ How the App Works")
	st.markdown("""
	- 🧹 User inputs are preprocessed using Label Encoding.
	- 🌲 The app uses a trained Random Forest Classifier to predict obesity levels.
	- 🧾 Users enter their details via a friendly input form.
	- 📊 The app displays the predicted obesity level along with helpful visual feedback.
	""")

	# 📊 EDA Page
	elif page == "📊 EDA":
	st.title("📊 Exploratory Data Analysis")

	with st.expander("1️⃣ 📋 Dataset Basic Information", expanded=False):
	col1, col2 = st.columns(2)
	tempdf1 = df
	with col1:
	st.write("🧬 Data Types:")
	st.write(tempdf1.dtypes)
	with col2:
	st.write("❓ Missing Values:")
	st.write(tempdf1.isnull().sum())

	with st.expander("2️⃣ 📈 Summary Statistics"):
	st.write(tempdf1.describe())

	with st.expander("3️⃣ 🧮 Obesity Distribution"):
	col1, col2 = st.columns(2)

	with col1:
	obesity_counts = df['Obesity'].value_counts().reset_index()
	obesity_counts.columns = ['Obesity Level', 'Count']

	fig = px.bar(
	obesity_counts,
	x='Obesity Level', y='Count',
	color='Obesity Level',
	color_discrete_sequence=px.colors.qualitative.Set3,
	labels={'Obesity Level': 'Obesity Level', 'Count': 'Count'},
	title="📊 Obesity Levels Count",
	hover_data=['Count']
	)
	st.plotly_chart(fig)

	with col2:
	obesity_pct = df['Obesity'].value_counts(normalize=True).reset_index()
	obesity_pct.columns = ['Obesity Level', 'Proportion']

	fig = px.pie(
	obesity_pct,
	names='Obesity Level', values='Proportion',
	color_discrete_sequence=px.colors.qualitative.Pastel,
	title="📌 Obesity Distribution (%)",
	hole=0.3
	)
	st.plotly_chart(fig)

	with st.expander("4️⃣ 🔢 Numerical Features Distribution"):
	numerical_cols = df.select_dtypes(include=[np.number]).columns[:4]
	for col in numerical_cols:
	fig = px.histogram(
	df, x=col,
	nbins=20,
	title=f"📉 Distribution of {col}",
	color_discrete_sequence=['#636EFA']
	)
	st.plotly_chart(fig)

	with st.expander("5️⃣ 🔗 Correlation Matrix"):
	corr = df.corr(numeric_only=True)
	fig = px.imshow(
	corr,
	text_auto=True,
	color_continuous_scale='RdBu_r',
	title="📊 Feature Correlations",
	width=400,
	height=700
	)
	st.plotly_chart(fig)

	with st.expander("6️⃣ 📦 Outlier Detection (Box Plots)"):
	for col in df.select_dtypes(include=[np.number]).columns[:6]:
	fig = px.box(df, y=col, title=f"📦 Box Plot for {col}")
	st.plotly_chart(fig)

	with st.expander("7️⃣ 😻 Gender vs Obesity Analysis"):
	fig = px.histogram(
	df, x='Obesity', color='Gender',
	barmode='group',
	title="😻 Obesity Distribution by Gender",
	color_discrete_sequence=px.colors.qualitative.Vivid
	)
	st.plotly_chart(fig)

	with st.expander("8️⃣ 👶 Age vs Obesity Analysis"):
	fig = px.box(
	df, x='Obesity', y='Age',
	color='Obesity',
	title="👶 Age Distribution by Obesity Level"
	)
	st.plotly_chart(fig)

	with st.expander("9️⃣ 👨‍👧 Family History vs Obesity"):
	fig = px.histogram(
	df, x='Obesity', color='family_history',
	barmode='group',
	title="👨‍👧 Obesity Distribution by Family History"
	)
	st.plotly_chart(fig)

	with st.expander("🔹 🏃‍♂️ Physical Activity Frequency (FAF) Analysis"):
	fig = px.box(
	df, x='Obesity', y='FAF',
	color='Obesity',
	title="🏃‍♂️ Physical Activity Frequency by Obesity Level"
	)
	st.plotly_chart(fig)

	with st.expander("1️⃣ 1️⃣ 💧 Water Consumption (CH2O) Analysis"):
	fig = px.box(
	df, x='Obesity', y='CH2O',
	color='Obesity',
	title="💧 Daily Water Consumption by Obesity Level"
	)
	st.plotly_chart(fig)

	with st.expander("1️⃣ 2️⃣ 🍕 High Caloric Food Consumption (FAVC) Analysis"):
	fig = px.histogram(
	df, x='Obesity', color='FAVC',
	barmode='group',
	title="🍕 Obesity Distribution by High Caloric Food Consumption"
	)
	st.plotly_chart(fig)

	with st.expander("1️⃣ 3️⃣ 💻 Technology Usage Time (TUE) Analysis"):
	fig = px.box(
	df, x='Obesity', y='TUE',
	color='Obesity',
	title="💻 Technology Usage Time by Obesity Level"
	)
	st.plotly_chart(fig)

	with st.expander("1️⃣ 4️⃣ 🍷 Alcohol Consumption (CALC) Analysis"):
	fig = px.histogram(
	df, x='Obesity', color='CALC',
	barmode='group',
	title="🍷 Obesity Distribution by Alcohol Consumption"
	)
	st.plotly_chart(fig)

	with st.expander("1️⃣ 5️⃣ 🚗 Transportation Mode (MTRANS) vs Obesity"):
	fig = px.histogram(
	df, x='MTRANS', color='Obesity',
	barmode='group',
	title="🚗 Transportation Mode vs Obesity Levels"
	)
	st.plotly_chart(fig)


	# 🔮 Predict Page
	elif page == "🔮 Predict":
	st.title("🔮 Obesity Prediction")
	st.markdown("Fill in the details below to predict your obesity level:")

	col1, col2, col3 = st.columns(3)
	with col1:
	gender = st.selectbox("🚻 Gender", ["Male", "Female"])
	age = st.number_input("📅 Age", 10, 100, 25)
	height = st.number_input("📏 Height (m)", 1.0, 2.5, 1.70)
	weight = st.number_input("⚖️ Weight (kg)", 30, 200, 70)
	family_history = st.selectbox("🧬 Family History of Obesity", ["yes", "no"])

	with col2:
	favc = st.selectbox("🍔 Frequent High-Calorie Food (FAVC)", ["yes", "no"])
	fcvc = st.slider("🥦 Veggie Intake Frequency (FCVC)", 1.0, 3.0, 2.0)
	ncp = st.number_input("🍽️ Number of Main Meals (NCP)", 1.0, 4.0, 3.0)
	caec = st.selectbox("🍟 Snacking Between Meals (CAEC)", ["no", "Sometimes", "Frequently", "Always"])
	smoke = st.selectbox("🚬 Do you Smoke?", ["yes", "no"])

	with col3:
	ch2o = st.slider("💧 Water Intake (CH2O)", 0.0, 3.0, 1.0)
	scc = st.selectbox("📉 Calorie Monitoring (SCC)", ["yes", "no"])
	faf = st.slider("🏃 Physical Activity (FAF)", 0.0, 3.0, 1.0)
	tue = st.slider("📱 Tech Usage Time (TUE)", 0.0, 3.0, 1.0)
	calc = st.selectbox("🍷 Alcohol (CALC)", ["no", "Sometimes", "Frequently", "Always"])
	mtrans = st.selectbox("🚗 Transport Mode (MTRANS)", ["Walking", "Public_Transportation", "Automobile", "Bike", "Motorbike"])

	input_data = {
	"Gender": gender, "Age": age, "Height": height, "Weight": weight,
	"family_history": family_history, "FAVC": favc, "FCVC": fcvc, "NCP": ncp,
	"CAEC": caec, "SMOKE": smoke, "CH2O": ch2o, "SCC": scc,
	"FAF": faf, "TUE": tue, "CALC": calc, "MTRANS": mtrans
	}
	if st.button("🔍 Predict"):
	input_df = pd.DataFrame([input_data])

	# Apply Label Encoding
	for col in input_df.columns:
	if col in label_encoders:
	input_df[col] = label_encoders[col].transform(input_df[col])

	# Scale features
	input_scaled = scaler.transform(input_df)

	# Predict
	prediction = model.predict(input_scaled)

	# ✅ Decode the numeric prediction
	decoded_prediction = label_encoders["Obesity"].inverse_transform([prediction[0]])[0]

	# Save decoded prediction in session_state for PDF/report use
	st.session_state["prediction"] = decoded_prediction
	st.session_state["input_data"] = input_data

	# Display result
	st.success(f"🎯 Predicted Obesity Level: `{decoded_prediction}`")


	# 📌 Feature Importance Page (Interactive with Plotly)
	elif page == "📌 Feature Importance":
	st.title("📌 Feature Importance")

	importances = model.feature_importances_
	sorted_idx = np.argsort(importances)[::-1]
	sorted_features = [feature_names[i] for i in sorted_idx]
	sorted_importances = importances[sorted_idx]

	# 🏆 Top 5 Features
	top_features = sorted_features[:5]
	top_importances = sorted_importances[:5]
	top_labels = [f"{emoji_map.get(f, '')} {f}" for f in top_features]

	# 📊 Create Plotly Bar Chart
	fig = px.bar(
	x=top_importances[::-1],
	y=top_labels[::-1],
	orientation='h',
	labels={'x': 'Importance', 'y': 'Feature'},
	color=top_importances[::-1],
	color_continuous_scale='Turbo',
	title="🎯 Top 5 Influential Features",
	text=[f"{val:.2f}" for val in top_importances[::-1]]
	)

	fig.update_layout(
	xaxis_title="Importance Score",
	yaxis_title="",
	plot_bgcolor="rgba(0,0,0,0)",
	paper_bgcolor="rgba(0,0,0,0)",
	font=dict(size=14),
	coloraxis_showscale=False
	)
	fig.update_traces(textposition='outside', marker_line_width=1.2)

	st.plotly_chart(fig, use_container_width=True)
	st.markdown("✨ These features contribute the most to your predicted obesity level.")