Spaces:

Shyanil
/

Advanced_Insurance_Data_Analysis_Dashboard

Sleeping

App Files Files Community

Advanced_Insurance_Data_Analysis_Dashboard / app.py

Shyanil

Update app.py

36891cf verified over 1 year ago

raw

history blame contribute delete

6.68 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns
	import plotly.express as px
	import plotly.graph_objects as go
	from plotly.subplots import make_subplots

	def load_data():
	"""Load and enhance the insurance dataset."""
	df = pd.read_csv('insurance.csv')

	# Add derived features for better analysis
	df['age_group'] = pd.cut(df['age'], bins=[0, 25, 35, 50, 100],
	labels=['Young Adult', 'Adult', 'Middle Age', 'Senior'])
	df['bmi_category'] = pd.cut(df['bmi'], bins=[0, 18.5, 24.9, 29.9, 100],
	labels=['Underweight', 'Normal', 'Overweight', 'Obese'])

	return df

	def main():
	st.set_page_config(layout="wide")

	# Custom styling
	st.markdown("""
	<style>
	.main {
	background-color: #f5f5f5;
	}
	.stButton>button {
	background-color: #4CAF50;
	color: white;
	border-radius: 5px;
	}
	</style>
	""", unsafe_allow_html=True)

	st.title("🏥 Advanced Insurance Data Analysis Dashboard")

	# Load data
	df = load_data()

	# Sidebar for analysis controls
	st.sidebar.header("Analysis Controls")

	# Variable selection for analysis
	primary_var = st.sidebar.selectbox(
	"Select Primary Variable",
	['age', 'bmi', 'children', 'charges', 'region', 'smoker', 'sex']
	)

	secondary_var = st.sidebar.selectbox(
	"Select Secondary Variable",
	['charges' if primary_var != 'charges' else 'age'] +
	[var for var in ['age', 'bmi', 'children', 'region', 'smoker', 'sex'] if var != primary_var]
	)

	# Advanced Analysis Section
	st.header("📊 Advanced Data Analysis")

	tab1, tab2, tab3 = st.tabs(["Distribution Analysis", "Relationship Analysis", "Categorical Insights"])

	with tab1:
	col1, col2 = st.columns(2)

	with col1:
	# Enhanced distribution plot
	fig = plt.figure(figsize=(10, 6))
	if df[primary_var].dtype in ['int64', 'float64']:
	sns.histplot(data=df, x=primary_var, hue='smoker')
	else:
	sns.countplot(data=df, x=primary_var, hue='smoker')
	plt.title(f'Distribution of {primary_var} by Smoking Status')
	plt.xticks(rotation=45)
	st.pyplot(fig)

	with col2:
	# Box plot with individual points
	fig = plt.figure(figsize=(10, 6))
	if df[primary_var].dtype in ['int64', 'float64']:
	sns.boxplot(data=df, y=primary_var, x='region', hue='smoker')
	plt.title(f'{primary_var} Distribution by Region')
	else:
	sns.boxplot(data=df, y='charges', x=primary_var, hue='smoker')
	plt.title(f'Charges Distribution by {primary_var}')
	plt.xticks(rotation=45)
	st.pyplot(fig)

	with tab2:
	col1, col2 = st.columns(2)

	with col1:
	# Scatter plot without trend line
	if df[primary_var].dtype in ['int64', 'float64'] and df[secondary_var].dtype in ['int64', 'float64']:
	fig = px.scatter(df, x=primary_var, y=secondary_var,
	color='smoker',
	title=f'Relationship between {primary_var} and {secondary_var}')
	st.plotly_chart(fig, use_container_width=True)
	else:
	st.write("Cannot create scatter plot for categorical variables")

	with col2:
	# Advanced violin plot
	fig = plt.figure(figsize=(10, 6))
	if df[primary_var].dtype in ['int64', 'float64']:
	sns.violinplot(data=df, y=primary_var, x='region', hue='smoker', split=True)
	plt.title(f'{primary_var} Distribution by Region and Smoking Status')
	else:
	sns.violinplot(data=df, y='charges', x=primary_var, hue='smoker', split=True)
	plt.title(f'Charges Distribution by {primary_var} and Smoking Status')
	plt.xticks(rotation=45)
	st.pyplot(fig)

	with tab3:
	col1, col2 = st.columns(2)

	with col1:
	# Heatmap for numerical variables
	numerical_cols = df.select_dtypes(include=['int64', 'float64']).columns
	correlation = df[numerical_cols].corr()
	fig = px.imshow(correlation,
	title="Correlation Heatmap",
	labels=dict(color="Correlation"))
	st.plotly_chart(fig, use_container_width=True)

	with col2:
	# Advanced categorical analysis
	if df[primary_var].dtype not in ['int64', 'float64']:
	# Create joint plot for categorical variables
	fig = plt.figure(figsize=(10, 6))
	sns.barplot(data=df, x=primary_var, y='charges', hue='smoker')
	plt.title(f'Average Charges by {primary_var} and Smoking Status')
	plt.xticks(rotation=45)
	st.pyplot(fig)
	else:
	# Alternative visualization for numerical primary variable
	fig = px.box(df, x='region', y=primary_var, color='smoker',
	title=f'{primary_var} Distribution by Region and Smoking Status')
	st.plotly_chart(fig, use_container_width=True)

	# Detailed Insights Section
	st.header("🔍 Detailed Insights")

	# Summary statistics
	if st.checkbox("Show Summary Statistics"):
	if df[primary_var].dtype in ['int64', 'float64']:
	summary = df.groupby('region')[primary_var].describe()
	st.write(f"Summary Statistics for {primary_var} by Region:")
	st.dataframe(summary)
	else:
	summary = df.groupby(primary_var)['charges'].describe()
	st.write(f"Charges Summary Statistics by {primary_var}:")
	st.dataframe(summary)

	# Cross-analysis
	if st.checkbox("Show Cross Analysis"):
	if df[primary_var].dtype not in ['int64', 'float64']:
	cross_analysis = pd.crosstab(df[primary_var], df['region'], margins=True)
	st.write(f"Cross Analysis of {primary_var} by Region:")
	st.dataframe(cross_analysis)
	else:
	grouped_analysis = df.groupby('region')[primary_var].agg(['mean', 'median', 'std', 'count'])
	st.write(f"Grouped Analysis of {primary_var} by Region:")
	st.dataframe(grouped_analysis)

	if __name__ == "__main__":
	main()