Spaces:

AbdramaneB
/

Pima_reporting

Sleeping

App Files Files Community

Pima_reporting / src /streamlit_app.py

AbdramaneB

Upload folder using huggingface_hub

b7a0a90 verified 3 months ago

raw

history blame contribute delete

6.24 kB

	import streamlit as st
	import seaborn as sns
	import matplotlib.pyplot as plt
	import matplotlib.dates as mdates
	import pandas as pd

	# Load data
	def load_data():
	df = pd.read_csv("./processed_data.csv") # Make sure this file has the columns: preg, plas, pres, skin, test, mass, pedi, age, class
	return df

	# Create Streamlit app
	def app():
	# Title for the app
	huggingface_page_title = "Diabetes Outcomes Dashboard"
	st.set_page_config(page_title=huggingface_page_title, layout="wide")

	# --- Custom CSS to adjust sidebar width ---
	st.markdown(
	"""
	<style>
	/* Sidebar width */
	[data-testid="stSidebar"] {
	width: 600px;
	min-width: 600px;
	}
	</style>
	""",
	unsafe_allow_html=True
	)

	# --- Page content ---
	st.title(huggingface_page_title)

	# Load data
	data = load_data()

	# Ensure expected columns exist (optional safety check)
	expected_cols = {"preg", "plas", "pres", "skin", "test", "mass", "pedi", "age", "class"}
	if not expected_cols.issubset(set(data.columns)):
	st.error(f"Dataset is missing some expected columns. Found: {list(data.columns)}")
	return

	# --- Key Metrics from the data ---
	total_obs = len(data)
	n_diabetes = int(data["class"].sum()) # assuming 1 = diabetes, 0 = no diabetes
	diabetes_rate = (n_diabetes / total_obs * 100) if total_obs > 0 else 0

	avg_age = round(data["age"].mean(), 1)
	median_age = round(data["age"].median(), 1)

	avg_preg = round(data["preg"].mean(), 1)
	avg_bmi = round(data["mass"].mean(), 1)
	avg_glucose = round(data["plas"].mean(), 1)
	avg_bp = round(data["pres"].mean(), 1)
	avg_pedi = round(data["pedi"].mean(), 3)

	# Display metrics in the sidebar
	st.sidebar.header("Key Metrics")
	st.sidebar.metric("Total patients", total_obs)
	st.sidebar.metric("Patients with diabetes", n_diabetes)
	st.sidebar.metric("Diabetes prevalence (%)", f"{diabetes_rate:.1f}")

	st.sidebar.markdown("---")
	st.sidebar.metric("Avg age (years)", avg_age)
	st.sidebar.metric("Median age (years)", median_age)

	st.sidebar.markdown("---")
	st.sidebar.metric("Avg pregnancies", avg_preg)
	st.sidebar.metric("Avg BMI", avg_bmi)
	st.sidebar.metric("Avg plasma glucose", avg_glucose)
	st.sidebar.metric("Avg blood pressure (mm Hg)", avg_bp)
	st.sidebar.metric("Avg diabetes pedigree", avg_pedi)

	# --- Data preview ---
	st.markdown("### Data preview")
	st.dataframe(data.head())

	# Styling for seaborn plots
	sns.set_style("whitegrid", {'grid.color': 'lightgrey', 'grid.linestyle': '--'})

	#////////////////////////////////////////////////////////////////////////////
	# Outcome distribution (class)
	#////////////////////////////////////////////////////////////////////////////
	if "class" in data.columns:
	st.header("Diabetes outcome distribution")
	fig, ax = plt.subplots()
	outcome_counts = data["class"].value_counts().sort_index()
	sns.barplot(x=outcome_counts.index, y=outcome_counts.values, ax=ax)
	ax.set_xlabel("Outcome (0 = No diabetes, 1 = Diabetes)")
	ax.set_ylabel("Count")
	ax.set_title("Diabetes outcome count")
	for i, v in enumerate(outcome_counts.values):
	ax.text(i, v + max(outcome_counts.values) * 0.01, str(v), ha="center", va="bottom", fontsize=9)
	st.pyplot(fig)
	plt.close(fig)

	#////////////////////////////////////////////////////////////////////////////
	# Feature distributions by outcome (histograms)
	#////////////////////////////////////////////////////////////////////////////
	st.header("Key feature distributions by outcome")

	feature_display_names = {
	"preg": "Pregnancies",
	"plas": "Plasma glucose concentration",
	"pres": "Diastolic blood pressure (mm Hg)",
	"mass": "Body mass index (BMI)",
	"age": "Age (years)"
	}

	for col, label in feature_display_names.items():
	if {col, "class"}.issubset(data.columns):
	st.subheader(f"{label} by outcome")
	fig, ax = plt.subplots()
	sns.histplot(
	data=data,
	x=col,
	hue="class",
	multiple="stack",
	bins=30,
	ax=ax
	)
	ax.set_xlabel(label)
	ax.set_title(f"{label} distribution (Diabetes vs No diabetes)")
	st.pyplot(fig)
	plt.close(fig)

	#////////////////////////////////////////////////////////////////////////////
	# Boxplots of selected features by outcome
	#////////////////////////////////////////////////////////////////////////////
	st.header("Feature boxplots by outcome")

	box_features = {
	"plas": "Plasma glucose concentration",
	"mass": "Body mass index (BMI)",
	"pedi": "Diabetes pedigree function"
	}

	for col, label in box_features.items():
	if {col, "class"}.issubset(data.columns):
	st.subheader(f"{label} vs outcome")
	fig, ax = plt.subplots()
	sns.boxplot(
	data=data,
	x="class",
	y=col,
	ax=ax
	)
	ax.set_xlabel("Outcome (0 = No diabetes, 1 = Diabetes)")
	ax.set_ylabel(label)
	ax.set_title(f"{label} vs diabetes outcome")
	st.pyplot(fig)
	plt.close(fig)

	#////////////////////////////////////////////////////////////////////////////
	# Correlation heatmap of numeric variables
	#////////////////////////////////////////////////////////////////////////////
	st.header("Correlation heatmap (numeric features)")
	numeric_cols = data.select_dtypes(include=["number"]).columns
	if len(numeric_cols) > 1:
	fig, ax = plt.subplots(figsize=(8, 6))
	corr = data[numeric_cols].corr()
	sns.heatmap(corr, annot=True, fmt=".2f", cmap="coolwarm", ax=ax)
	ax.set_title("Correlation heatmap")
	st.pyplot(fig)
	plt.close(fig)
	else:
	st.write("Not enough numeric columns to compute correlations.")

	if __name__ == "__main__":
	app()