Shah-Miloni's picture
Rename pages/app.py to app.py
da69bad verified
import streamlit as st
import pandas as pd
import altair as alt
child_mortality_path = "child_mortality_0_5_year_olds_dying_per_1000_born.csv"
population_path = "pop.csv"
child_mortality = pd.read_csv(child_mortality_path)
population = pd.read_csv(population_path)
# Data Cleaning
def convert_population(value):
if isinstance(value, str):
if 'B' in value:
return float(value.replace('B', '')) * 1_000_000_000
elif 'M' in value:
return float(value.replace('M', '')) * 1_000_000
elif 'k' in value:
return float(value.replace('k', '')) * 1_000
else:
return float(value)
return value
population.iloc[:, 1:] = population.iloc[:, 1:].applymap(convert_population)
st.title("Child Mortality Rate vs Population")
st.write("""
This visualization explores the trends in child mortality and population over time for a selected country.
The data combines population estimates and child mortality rates (deaths per 1,000 live births)
into a unified scale by calculating the total child mortality as a proportion of the population.
""")
# Dropdown for Country Selection
st.subheader("Select a Country")
countries = sorted(child_mortality['country'].unique())
selected_country = st.selectbox("Country", countries, index=0)
if selected_country:
# Data Processing
mortality_country = child_mortality[child_mortality['country'] == selected_country].melt(
id_vars='country', var_name='year', value_name='child_mortality'
)
population_country = population[population['country'] == selected_country].melt(
id_vars='country', var_name='year', value_name='population'
)
merged_country_data = pd.merge(mortality_country, population_country, on=['country', 'year'], how='inner')
merged_country_data['year'] = merged_country_data['year'].astype(int)
merged_country_data = merged_country_data[merged_country_data['year'] % 20 == 0]
merged_country_data['child_mortality_rate'] = (
merged_country_data['child_mortality'] * merged_country_data['population'] / 1_000
)
# Chart
chart = alt.Chart(merged_country_data).transform_fold(
['child_mortality_rate', 'population'],
as_=['Metric', 'Value']
).mark_line(point=True).encode(
x=alt.X('year:O', title='Year', axis=alt.Axis(labelAngle=0)),
y=alt.Y('Value:Q', title="Child Mortality Rate per Population", scale=alt.Scale(type='linear')),
color=alt.Color('Metric:N', title='Metrics', legend=alt.Legend(orient='top')),
tooltip=['year', 'Metric:N', 'Value:Q']
).properties(
width=800,
height=400,
title=f"Child Mortality Rate and Population Trends in {selected_country}"
)
st.altair_chart(chart, use_container_width=True)
# Difficulties Section
st.subheader("Challenges Faced")
st.write("""While building the visualization I began by trying to scale the data for the y-axis. Having both metrics on the same axis would enable direct comparison, providing a holistic view of population and child mortality trends.
- To do that I calculated the child mortality rate:
- {Child Mortality Rate} = {{Child Mortality(per 1,000 live births)}*{Population}}/{1,000}
- Data is shown in 20-year intervals for better clarity and simplicity.
- Population data was stored with suffixes ('M', 'B', 'k'), which required conversion to numeric formats for proper calculations.
""")
else:
st.warning("Please select a country to view the trends.")