Spaces:

tbqguy
/

eda-gapminder

Sleeping

App Files Files Community

eda-gapminder / app.py

tbqguy

Initial deployment: Full-featured EDA Gapminder dashboard

da952bf 2 months ago

raw

history blame contribute delete

30.6 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import plotly.express as px
	import plotly.graph_objects as go
	from plotly.subplots import make_subplots

	# Set page config
	st.set_page_config(page_title="Gapminder EDA v2", layout="wide")

	# Custom CSS to set Helvetica for body text while keeping headers in Source Sans Pro
	st.markdown("""
	<style>
	/* Body text, paragraphs, and general content */
	.stMarkdown p, .stMarkdown li, .stMarkdown span {
	font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif !important;
	}

	/* Dataframe text */
	.stDataFrame, .stDataFrame * {
	font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif !important;
	}

	/* Metric values */
	.stMetric {
	font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif !important;
	}

	/* Select boxes and inputs */
	.stSelectbox, .stMultiSelect {
	font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif !important;
	}

	/* Tab labels */
	.stTabs [data-baseweb="tab"] {
	font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif !important;
	}

	/* Info boxes */
	.stAlert {
	font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif !important;
	}

	/* Caption text */
	.stCaption {
	font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif !important;
	}

	/* Keep headers in Source Sans Pro (default Streamlit font) */
	h1, h2, h3, h4, h5, h6 {
	font-family: 'Source Sans Pro', sans-serif !important;
	}
	</style>
	""", unsafe_allow_html=True)

	# Title
	st.title("🌍 Gapminder Dataset - Exploratory Data Analysis v2")

	# Load data
	@st.cache_data
	def load_data():
	"""Load Gapminder dataset from local file with fallback to URL"""
	# For HuggingFace Spaces, use gapminder.tsv in the same directory
	local_path = 'gapminder.tsv'

	try:
	# Try loading from local file first
	df = pd.read_csv(local_path, sep='\t')
	except FileNotFoundError:
	# Fallback to data/ directory (for local development)
	try:
	df = pd.read_csv('data/gapminder.tsv', sep='\t')
	except FileNotFoundError:
	# Final fallback to downloading from GitHub
	url = "https://raw.githubusercontent.com/jennybc/gapminder/master/inst/extdata/gapminder.tsv"
	df = pd.read_csv(url, sep='\t')

	return df

	df = load_data()

	# Display basic info
	st.header("Dataset Overview")
	col1, col2, col3 = st.columns(3)
	with col1:
	st.metric("Total Rows", df.shape[0])
	with col2:
	st.metric("Total Columns", df.shape[1])
	with col3:
	st.metric("Countries", df['country'].nunique())

	st.subheader("Sample Data")
	st.dataframe(df.head(10))

	# Descriptive Statistics Section
	st.header("📊 Descriptive Statistics")

	# Select numerical columns
	numerical_cols = ['lifeExp', 'pop', 'gdpPercap']

	# Create tabs for each metric
	tab1, tab2, tab3, tab4, tab5, tab6, tab7 = st.tabs([
	"Mean", "Median", "Mode", "Standard Deviation",
	"Range & IQR", "Coefficient of Variation", "Percentiles"
	])

	with tab1:
	st.subheader("Mean (Average)")
	st.write("The mean represents the average value of the data.")

	mean_data = {
	'Metric': numerical_cols,
	'Mean Value': [df[col].mean() for col in numerical_cols]
	}
	mean_df = pd.DataFrame(mean_data)

	col1, col2 = st.columns([1, 2])
	with col1:
	st.dataframe(mean_df, hide_index=True)
	with col2:
	fig = px.bar(mean_df, x='Metric', y='Mean Value',
	title='Mean Values by Metric',
	color='Metric')
	st.plotly_chart(fig, use_container_width=True)

	with tab2:
	st.subheader("Median")
	st.write("The median represents the middle value when data is sorted.")

	median_data = {
	'Metric': numerical_cols,
	'Median Value': [df[col].median() for col in numerical_cols]
	}
	median_df = pd.DataFrame(median_data)

	col1, col2 = st.columns([1, 2])
	with col1:
	st.dataframe(median_df, hide_index=True)
	with col2:
	fig = px.bar(median_df, x='Metric', y='Median Value',
	title='Median Values by Metric',
	color='Metric')
	st.plotly_chart(fig, use_container_width=True)

	with tab3:
	st.subheader("Mode")
	st.write("The mode represents the most frequently occurring value(s).")

	mode_results = []
	for col in numerical_cols:
	mode_val = df[col].mode()
	if len(mode_val) > 0:
	mode_results.append({
	'Metric': col,
	'Mode Value': mode_val[0],
	'Frequency': (df[col] == mode_val[0]).sum()
	})

	mode_df = pd.DataFrame(mode_results)
	st.dataframe(mode_df, hide_index=True)

	st.info("Note: For continuous data like life expectancy and GDP, mode may not be very meaningful as values rarely repeat exactly.")

	with tab4:
	st.subheader("Standard Deviation")
	st.write("Standard deviation measures the amount of variation or dispersion in the data.")

	std_data = {
	'Metric': numerical_cols,
	'Standard Deviation': [df[col].std() for col in numerical_cols],
	'Variance': [df[col].var() for col in numerical_cols]
	}
	std_df = pd.DataFrame(std_data)

	col1, col2 = st.columns([1, 2])
	with col1:
	st.dataframe(std_df, hide_index=True)
	with col2:
	fig = px.bar(std_df, x='Metric', y='Standard Deviation',
	title='Standard Deviation by Metric',
	color='Metric')
	st.plotly_chart(fig, use_container_width=True)

	with tab5:
	st.subheader("Range & Interquartile Range (IQR)")
	st.write("Range shows the total spread (Max - Min), while IQR shows the spread of the middle 50% of data.")

	range_data = {
	'Metric': numerical_cols,
	'Min': [df[col].min() for col in numerical_cols],
	'Q1 (25%)': [df[col].quantile(0.25) for col in numerical_cols],
	'Q3 (75%)': [df[col].quantile(0.75) for col in numerical_cols],
	'Max': [df[col].max() for col in numerical_cols],
	'Range': [df[col].max() - df[col].min() for col in numerical_cols],
	'IQR': [df[col].quantile(0.75) - df[col].quantile(0.25) for col in numerical_cols]
	}
	range_df = pd.DataFrame(range_data)

	st.dataframe(range_df, hide_index=True)

	# Visualizations
	col1, col2 = st.columns(2)

	with col1:
	fig = px.bar(range_df, x='Metric', y='Range',
	title='Range by Metric',
	color='Metric')
	fig.update_layout(transition={'duration': 500})
	st.plotly_chart(fig, use_container_width=True)

	with col2:
	fig = px.bar(range_df, x='Metric', y='IQR',
	title='Interquartile Range (IQR) by Metric',
	color='Metric')
	fig.update_layout(transition={'duration': 500})
	st.plotly_chart(fig, use_container_width=True)

	st.info("💡 IQR is more robust to outliers than Range. A larger IQR indicates more variability in the middle 50% of the data.")

	with tab6:
	st.subheader("Coefficient of Variation (CV)")
	st.write("CV = (Standard Deviation / Mean) × 100. It allows comparison of variability across different scales.")

	cv_data = {
	'Metric': numerical_cols,
	'Mean': [df[col].mean() for col in numerical_cols],
	'Std Dev': [df[col].std() for col in numerical_cols],
	'CV (%)': [(df[col].std() / df[col].mean()) * 100 for col in numerical_cols]
	}
	cv_df = pd.DataFrame(cv_data)

	col1, col2 = st.columns([1, 2])

	with col1:
	st.dataframe(cv_df, hide_index=True)

	st.markdown("Interpretation:")
	st.markdown("- CV < 15%: Low variability")
	st.markdown("- CV 15-30%: Moderate variability")
	st.markdown("- CV > 30%: High variability")

	with col2:
	fig = px.bar(cv_df, x='Metric', y='CV (%)',
	title='Coefficient of Variation by Metric',
	color='Metric')
	fig.update_layout(
	yaxis_title="CV (%)",
	transition={'duration': 500}
	)

	# Add reference lines for interpretation
	fig.add_hline(y=15, line_dash="dash", line_color="green",
	annotation_text="Low variability threshold")
	fig.add_hline(y=30, line_dash="dash", line_color="orange",
	annotation_text="High variability threshold")

	st.plotly_chart(fig, use_container_width=True)

	st.info("💡 Higher CV indicates greater relative variability. This is especially useful when comparing metrics with different units or scales.")

	with tab7:
	st.subheader("Percentiles Analysis")
	st.write("Percentiles divide the data into 100 equal parts. Key percentiles help understand data distribution.")

	percentiles = [0, 10, 25, 50, 75, 90, 100]
	percentile_data = {'Percentile': [f'{p}th' if p not in [0, 100] else ('Min' if p == 0 else 'Max')
	for p in percentiles]}

	for col in numerical_cols:
	percentile_data[col] = [df[col].quantile(p/100) for p in percentiles]

	percentile_df = pd.DataFrame(percentile_data)

	st.dataframe(percentile_df, hide_index=True)

	# Visualizations
	st.subheader("Percentile Visualizations")

	selected_percentile_metric = st.selectbox(
	"Select metric for percentile visualization",
	options=numerical_cols,
	format_func=lambda x: {
	'lifeExp': 'Life Expectancy',
	'pop': 'Population',
	'gdpPercap': 'GDP per Capita'
	}[x],
	key='percentile_metric'
	)

	col1, col2 = st.columns(2)

	with col1:
	# Line chart showing percentile progression
	perc_viz_data = pd.DataFrame({
	'Percentile': percentiles,
	'Value': [df[selected_percentile_metric].quantile(p/100) for p in percentiles]
	})

	fig = px.line(perc_viz_data, x='Percentile', y='Value',
	title=f'Percentile Progression - {selected_percentile_metric}',
	markers=True)
	fig.update_traces(line=dict(width=3), marker=dict(size=10))
	fig.update_layout(transition={'duration': 600})
	st.plotly_chart(fig, use_container_width=True)

	with col2:
	# Enhanced box plot with percentile annotations
	fig = px.box(df, y=selected_percentile_metric,
	title=f'Box Plot with Percentiles - {selected_percentile_metric}')

	# Add percentile annotations
	for p in [10, 25, 50, 75, 90]:
	val = df[selected_percentile_metric].quantile(p/100)
	fig.add_hline(y=val, line_dash="dot", line_color="red",
	annotation_text=f"{p}th percentile",
	annotation_position="right")

	fig.update_layout(transition={'duration': 600})
	st.plotly_chart(fig, use_container_width=True)

	st.info("💡 The 50th percentile is the median. The difference between 75th and 25th percentiles is the IQR.")

	# Comprehensive Statistics Table
	st.header("📈 Complete Statistical Summary")
	summary_stats = df[numerical_cols].describe().T
	summary_stats['mode'] = [df[col].mode()[0] if len(df[col].mode()) > 0 else np.nan for col in numerical_cols]
	summary_stats = summary_stats[['count', 'mean', 'mode', 'std', '50%', 'min', 'max']]
	summary_stats.columns = ['Count', 'Mean', 'Mode', 'Std Dev', 'Median', 'Min', 'Max']
	st.dataframe(summary_stats)

	# Add Min/Max country details
	st.subheader("🌟 Extreme Values - Countries")
	st.write("Discover which countries hold the minimum and maximum values for each metric")

	extreme_cols = st.columns(3)

	for idx, col in enumerate(numerical_cols):
	with extreme_cols[idx]:
	col_name = {
	'lifeExp': 'Life Expectancy',
	'pop': 'Population',
	'gdpPercap': 'GDP per Capita'
	}[col]

	# Find min and max
	min_val = df[col].min()
	max_val = df[col].max()

	min_row = df[df[col] == min_val].iloc[0]
	max_row = df[df[col] == max_val].iloc[0]

	st.markdown(f"{col_name}")

	# Min value
	st.markdown(f"📉 Minimum: {min_val:,.2f}")
	st.caption(f"📍 {min_row['country']} ({min_row['continent']}) in {int(min_row['year'])}")

	# Max value
	st.markdown(f"📈 Maximum: {max_val:,.2f}")
	st.caption(f"📍 {max_row['country']} ({max_row['continent']}) in {int(max_row['year'])}")

	st.markdown("---")

	# Correlation Analysis
	st.header("🔗 Correlation Analysis")
	st.write("Correlation measures the strength and direction of relationships between variables (-1 to +1).")

	col1, col2 = st.columns([1, 1])

	with col1:
	# Calculate correlation matrix
	correlation_matrix = df[numerical_cols].corr()

	st.subheader("Correlation Matrix")
	st.dataframe(correlation_matrix.round(3))

	st.markdown("Interpretation:")
	st.markdown("- +1: Perfect positive correlation")
	st.markdown("- 0: No correlation")
	st.markdown("- -1: Perfect negative correlation")
	st.markdown("- \|r\| > 0.7: Strong correlation")
	st.markdown("- \|r\| 0.3-0.7: Moderate correlation")
	st.markdown("- \|r\| < 0.3: Weak correlation")

	with col2:
	# Heatmap visualization
	fig = px.imshow(correlation_matrix,
	text_auto='.2f',
	aspect='auto',
	color_continuous_scale='RdYlGn',
	color_continuous_midpoint=0,
	title='Correlation Heatmap',
	labels=dict(color="Correlation"))

	fig.update_layout(
	height=400,
	xaxis_title="",
	yaxis_title=""
	)

	st.plotly_chart(fig, use_container_width=True)

	# Detailed correlation insights
	st.subheader("🔍 Key Correlation Insights")

	col1, col2, col3 = st.columns(3)

	# Get correlations
	corr_life_gdp = correlation_matrix.loc['lifeExp', 'gdpPercap']
	corr_life_pop = correlation_matrix.loc['lifeExp', 'pop']
	corr_gdp_pop = correlation_matrix.loc['gdpPercap', 'pop']

	with col1:
	st.metric(
	"Life Expectancy ↔ GDP per Capita",
	f"{corr_life_gdp:.3f}",
	delta="Strong positive" if abs(corr_life_gdp) > 0.7 else "Moderate" if abs(corr_life_gdp) > 0.3 else "Weak"
	)
	st.caption("Higher GDP tends to correlate with longer life expectancy")

	with col2:
	st.metric(
	"Life Expectancy ↔ Population",
	f"{corr_life_pop:.3f}",
	delta="Strong" if abs(corr_life_pop) > 0.7 else "Moderate" if abs(corr_life_pop) > 0.3 else "Weak"
	)
	st.caption("Relationship between population size and life expectancy")

	with col3:
	st.metric(
	"GDP per Capita ↔ Population",
	f"{corr_gdp_pop:.3f}",
	delta="Strong" if abs(corr_gdp_pop) > 0.7 else "Moderate" if abs(corr_gdp_pop) > 0.3 else "Weak"
	)
	st.caption("Relationship between wealth and population size")

	st.info("💡 Correlation does not imply causation! High correlation indicates variables move together, but one doesn't necessarily cause the other.")

	# Filter by Year or Country
	st.header("🔎 Filter & Analyze")
	col1, col2 = st.columns(2)

	with col1:
	selected_year = st.selectbox("Select Year", sorted(df['year'].unique()))
	with col2:
	selected_continent = st.multiselect("Select Continent(s)",
	df['continent'].unique(),
	default=df['continent'].unique())

	filtered_df = df[(df['year'] == selected_year) & (df['continent'].isin(selected_continent))]

	if not filtered_df.empty:
	st.subheader(f"Statistics for {selected_year} - {', '.join(selected_continent)}")

	# Metrics in columns
	col1, col2, col3 = st.columns(3)

	with col1:
	st.metric("Life Expectancy (Mean)", f"{filtered_df['lifeExp'].mean():.2f}")
	st.metric("Life Expectancy (Median)", f"{filtered_df['lifeExp'].median():.2f}")
	st.metric("Life Expectancy (Std Dev)", f"{filtered_df['lifeExp'].std():.2f}")

	with col2:
	st.metric("Population (Mean)", f"{filtered_df['pop'].mean():,.0f}")
	st.metric("Population (Median)", f"{filtered_df['pop'].median():,.0f}")
	st.metric("Population (Std Dev)", f"{filtered_df['pop'].std():,.0f}")

	with col3:
	st.metric("GDP per Capita (Mean)", f"${filtered_df['gdpPercap'].mean():,.2f}")
	st.metric("GDP per Capita (Median)", f"${filtered_df['gdpPercap'].median():.2f}")
	st.metric("GDP per Capita (Std Dev)", f"${filtered_df['gdpPercap'].std():,.2f}")

	# NEW: Add visualizations for filtered data
	st.subheader("📊 Visual Analysis")

	# Create tabs for different visualizations
	viz_tab1, viz_tab2, viz_tab3, viz_tab4, viz_tab5 = st.tabs([
	"Comparative Bar Charts",
	"Scatter Analysis",
	"Geographic Distribution",
	"Statistical Distribution",
	"Animated Timeline"
	])

	with viz_tab1:
	st.write("Compare Mean, Median, and Standard Deviation across metrics")

	# Create comparison charts
	col1, col2 = st.columns(2)

	with col1:
	# Mean vs Median comparison
	comparison_data = pd.DataFrame({
	'Metric': numerical_cols * 2,
	'Statistic': ['Mean']3 + ['Median']3,
	'Value': [
	filtered_df['lifeExp'].mean(),
	filtered_df['pop'].mean(),
	filtered_df['gdpPercap'].mean(),
	filtered_df['lifeExp'].median(),
	filtered_df['pop'].median(),
	filtered_df['gdpPercap'].median()
	]
	})

	fig = px.bar(comparison_data, x='Metric', y='Value',
	color='Statistic', barmode='group',
	title='Mean vs Median Comparison',
	labels={'Value': 'Value'},
	animation_frame=None)
	fig.update_layout(transition={'duration': 500})
	st.plotly_chart(fig, use_container_width=True)

	with col2:
	# Standard Deviation
	std_data = pd.DataFrame({
	'Metric': numerical_cols,
	'Std Dev': [
	filtered_df['lifeExp'].std(),
	filtered_df['pop'].std(),
	filtered_df['gdpPercap'].std()
	]
	})

	fig = px.bar(std_data, x='Metric', y='Std Dev',
	title='Standard Deviation by Metric',
	color='Metric')
	fig.update_layout(transition={'duration': 500})
	st.plotly_chart(fig, use_container_width=True)

	with viz_tab2:
	st.write("Explore relationships between different metrics")

	col1, col2 = st.columns(2)

	with col1:
	# GDP vs Life Expectancy
	fig = px.scatter(filtered_df, x='gdpPercap', y='lifeExp',
	size='pop', color='continent',
	hover_name='country',
	title=f'GDP per Capita vs Life Expectancy ({selected_year})',
	labels={'gdpPercap': 'GDP per Capita',
	'lifeExp': 'Life Expectancy'},
	log_x=True)
	fig.update_traces(marker=dict(line=dict(width=1, color='DarkSlateGrey')))
	fig.update_layout(transition={'duration': 800, 'easing': 'cubic-in-out'})
	st.plotly_chart(fig, use_container_width=True)

	with col2:
	# Population vs Life Expectancy
	fig = px.scatter(filtered_df, x='pop', y='lifeExp',
	size='gdpPercap', color='continent',
	hover_name='country',
	title=f'Population vs Life Expectancy ({selected_year})',
	labels={'pop': 'Population',
	'lifeExp': 'Life Expectancy'},
	log_x=True)
	fig.update_traces(marker=dict(line=dict(width=1, color='DarkSlateGrey')))
	fig.update_layout(transition={'duration': 800, 'easing': 'cubic-in-out'})
	st.plotly_chart(fig, use_container_width=True)

	with viz_tab3:
	st.write("Geographic distribution of metrics across countries")

	# Bar chart by country
	metric_choice = st.selectbox(
	"Select metric to visualize by country",
	options=['lifeExp', 'pop', 'gdpPercap'],
	format_func=lambda x: {
	'lifeExp': 'Life Expectancy',
	'pop': 'Population',
	'gdpPercap': 'GDP per Capita'
	}[x]
	)

	# Sort by selected metric
	sorted_df = filtered_df.sort_values(metric_choice, ascending=False)

	fig = px.bar(sorted_df, x='country', y=metric_choice,
	color='continent',
	title=f'{metric_choice.replace("_", " ").title()} by Country ({selected_year})',
	labels={metric_choice: metric_choice.replace('_', ' ').title()})
	fig.update_layout(
	xaxis_tickangle=-45,
	height=500,
	transition={'duration': 500, 'easing': 'cubic-in-out'}
	)
	st.plotly_chart(fig, use_container_width=True)

	# Top 10 and Bottom 10
	col1, col2 = st.columns(2)

	with col1:
	st.write(f"🏆 Top 10 Countries - {metric_choice.replace('_', ' ').title()}")
	top_10 = sorted_df[['country', metric_choice, 'continent']].head(10)
	st.dataframe(top_10, hide_index=True)

	with col2:
	st.write(f"⚠️ Bottom 10 Countries - {metric_choice.replace('_', ' ').title()}")
	bottom_10 = sorted_df[['country', metric_choice, 'continent']].tail(10)
	st.dataframe(bottom_10, hide_index=True)

	with viz_tab4:
	st.write("Distribution patterns and box plots for filtered data")

	# Box plots by continent
	selected_metric_dist = st.selectbox(
	"Select metric for distribution analysis",
	options=['lifeExp', 'pop', 'gdpPercap'],
	format_func=lambda x: {
	'lifeExp': 'Life Expectancy',
	'pop': 'Population',
	'gdpPercap': 'GDP per Capita'
	}[x],
	key='dist_metric'
	)

	col1, col2 = st.columns(2)

	with col1:
	# Box plot
	fig = px.box(filtered_df, x='continent', y=selected_metric_dist,
	color='continent',
	title=f'{selected_metric_dist.replace("_", " ").title()} Distribution by Continent',
	labels={selected_metric_dist: selected_metric_dist.replace('_', ' ').title()})
	fig.update_layout(transition={'duration': 600})
	st.plotly_chart(fig, use_container_width=True)

	with col2:
	# Violin plot
	fig = px.violin(filtered_df, x='continent', y=selected_metric_dist,
	color='continent', box=True,
	title=f'{selected_metric_dist.replace("_", " ").title()} Violin Plot',
	labels={selected_metric_dist: selected_metric_dist.replace('_', ' ').title()})
	fig.update_layout(transition={'duration': 600})
	st.plotly_chart(fig, use_container_width=True)

	with viz_tab5:
	st.write("🎬 Watch how metrics evolve over time with animated visualizations")

	# Animation type selector
	anim_type = st.radio(
	"Select Animation Type",
	options=["Scatter Plot Timeline", "Bar Chart Race", "Line Chart Evolution"],
	horizontal=True
	)

	if anim_type == "Scatter Plot Timeline":
	# Animated scatter plot over all years
	st.subheader("GDP vs Life Expectancy Over Time")

	# Filter by selected continents
	anim_df = df[df['continent'].isin(selected_continent)]

	fig = px.scatter(anim_df,
	x='gdpPercap',
	y='lifeExp',
	animation_frame='year',
	animation_group='country',
	size='pop',
	color='continent',
	hover_name='country',
	log_x=True,
	size_max=60,
	range_x=[100, 120000],
	range_y=[20, 90],
	title='GDP per Capita vs Life Expectancy (Animated)',
	labels={'gdpPercap': 'GDP per Capita',
	'lifeExp': 'Life Expectancy',
	'pop': 'Population'})

	fig.update_layout(
	height=600,
	transition={'duration': 800}
	)

	fig.layout.updatemenus[0].buttons[0].args[1]['frame']['duration'] = 800
	fig.layout.updatemenus[0].buttons[0].args[1]['transition']['duration'] = 500

	st.plotly_chart(fig, use_container_width=True)
	st.info("Click the Play button to watch the animation! Each frame represents a year from 1952 to 2007.")

	elif anim_type == "Bar Chart Race":
	# Animated bar chart race
	st.subheader("Top Countries Bar Chart Race")

	metric_race = st.selectbox(
	"Select metric for bar chart race",
	options=['lifeExp', 'pop', 'gdpPercap'],
	format_func=lambda x: {
	'lifeExp': 'Life Expectancy',
	'pop': 'Population',
	'gdpPercap': 'GDP per Capita'
	}[x],
	key='race_metric'
	)

	# Get top 15 countries for the latest year
	latest_year_df = df[df['year'] == df['year'].max()]
	top_countries = latest_year_df.nlargest(15, metric_race)['country'].tolist()

	# Filter data for these countries
	race_df = df[df['country'].isin(top_countries) & df['continent'].isin(selected_continent)]

	fig = px.bar(race_df,
	x=metric_race,
	y='country',
	color='continent',
	animation_frame='year',
	orientation='h',
	title=f'Top Countries by {metric_race.replace("_", " ").title()} Over Time',
	labels={metric_race: metric_race.replace('_', ' ').title()},
	range_x=[0, race_df[metric_race].max() * 1.1])

	fig.update_layout(
	height=600,
	yaxis={'categoryorder': 'total ascending'}
	)

	fig.layout.updatemenus[0].buttons[0].args[1]['frame']['duration'] = 1000
	fig.layout.updatemenus[0].buttons[0].args[1]['transition']['duration'] = 500

	st.plotly_chart(fig, use_container_width=True)
	st.info("Watch the ranking change over time! Countries rise and fall based on their performance.")

	else: # Line Chart Evolution
	st.subheader("Metric Evolution by Continent")

	metric_line = st.selectbox(
	"Select metric for time evolution",
	options=['lifeExp', 'pop', 'gdpPercap'],
	format_func=lambda x: {
	'lifeExp': 'Life Expectancy',
	'pop': 'Population',
	'gdpPercap': 'GDP per Capita'
	}[x],
	key='line_metric'
	)

	# Calculate mean by continent and year
	line_df = df[df['continent'].isin(selected_continent)].groupby(['year', 'continent'])[metric_line].mean().reset_index()

	fig = px.line(line_df,
	x='year',
	y=metric_line,
	color='continent',
	markers=True,
	title=f'Average {metric_line.replace("_", " ").title()} Evolution by Continent',
	labels={metric_line: f'Average {metric_line.replace("_", " ").title()}',
	'year': 'Year'})

	fig.update_traces(
	mode='lines+markers',
	line=dict(width=3),
	marker=dict(size=8)
	)

	fig.update_layout(
	height=500,
	hovermode='x unified',
	transition={'duration': 600}
	)

	# Add animation on load
	fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')
	fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='LightGray')

	st.plotly_chart(fig, use_container_width=True)
	st.info("Hover over the chart to see detailed values for each year and continent.")

	# Distribution Visualizations
	st.header("📉 Overall Data Distributions")
	selected_metric = st.selectbox("Select metric to visualize", numerical_cols,
	format_func=lambda x: {
	'lifeExp': 'Life Expectancy',
	'pop': 'Population',
	'gdpPercap': 'GDP per Capita'
	}[x])

	col1, col2 = st.columns(2)

	with col1:
	fig = px.histogram(df, x=selected_metric, nbins=50,
	title=f'Distribution of {selected_metric}',
	labels={selected_metric: selected_metric.replace('_', ' ').title()})
	fig.add_vline(x=df[selected_metric].mean(), line_dash="dash",
	line_color="red", annotation_text="Mean")
	fig.add_vline(x=df[selected_metric].median(), line_dash="dash",
	line_color="green", annotation_text="Median")
	fig.update_layout(transition={'duration': 700})
	st.plotly_chart(fig, use_container_width=True)

	with col2:
	fig = px.box(df, y=selected_metric, x='continent',
	title=f'{selected_metric} by Continent',
	labels={selected_metric: selected_metric.replace('_', ' ').title()})
	fig.update_layout(transition={'duration': 700})
	st.plotly_chart(fig, use_container_width=True)

	# Footer
	st.markdown("---")
	st.markdown("Data source: [Gapminder](https://www.gapminder.org/)")
	st.markdown("Version 2 - Enhanced with interactive visualizations in Filter & Analyze section")