|
|
import streamlit as st |
|
|
import numpy as np |
|
|
import pickle |
|
|
import pandas as pd |
|
|
import matplotlib.pyplot as plt |
|
|
import seaborn as sns |
|
|
|
|
|
|
|
|
model = pickle.load(open("life_expectancy_model.pkl", "rb")) |
|
|
|
|
|
|
|
|
st.set_page_config( |
|
|
page_title="Life Expectancy Prediction", |
|
|
page_icon="📊", |
|
|
layout="centered", |
|
|
) |
|
|
|
|
|
|
|
|
st.markdown(""" |
|
|
<style> |
|
|
.stApp { |
|
|
background-color: #E3F2FD; /* Light Blue Background */ |
|
|
} |
|
|
.title { |
|
|
text-align: center; |
|
|
font-size: 28px; |
|
|
font-weight: bold; |
|
|
color: #2C3E50; |
|
|
} |
|
|
.subtitle { |
|
|
text-align: center; |
|
|
font-size: 30px; |
|
|
font-weight: bold; |
|
|
color: #1E88E5; |
|
|
margin-top: 15px; |
|
|
} |
|
|
.stButton > button { |
|
|
width: 100%; |
|
|
background-color: #1E88E5; |
|
|
color: white; |
|
|
font-size: 16px; |
|
|
font-weight: bold; |
|
|
border-radius: 6px; |
|
|
padding: 8px; |
|
|
transition: 0.3s; |
|
|
} |
|
|
.stButton > button:hover { |
|
|
background-color: #1565C0; /* Darker Blue on Hover */ |
|
|
} |
|
|
.result-box { |
|
|
text-align: center; |
|
|
font-size: 22px; |
|
|
font-weight: bold; |
|
|
color: white; |
|
|
padding: 15px; |
|
|
border-radius: 8px; |
|
|
margin-top: 20px; |
|
|
background-color: #388E3C; |
|
|
} |
|
|
</style> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
if "current_page" not in st.session_state: |
|
|
st.session_state.current_page = "Model Report" |
|
|
|
|
|
|
|
|
def switch_page(page): |
|
|
st.session_state.current_page = page |
|
|
|
|
|
|
|
|
st.sidebar.title("Navigation") |
|
|
if st.sidebar.button("Model Report"): |
|
|
switch_page("Model Report") |
|
|
if st.sidebar.button("Hands-on Model"): |
|
|
switch_page("Hands-on Model") |
|
|
|
|
|
|
|
|
data = pd.read_csv("Life Expectancy Data.csv") |
|
|
data.columns = data.columns.str.strip() |
|
|
|
|
|
|
|
|
if st.session_state.current_page == "Model Report": |
|
|
st.markdown("<h1 class='title'>Model Report</h1>", unsafe_allow_html=True) |
|
|
|
|
|
st.image("images/Life_Expectanccy.webp", |
|
|
caption="Life Expectancy Prediction Overview", |
|
|
use_container_width=True) |
|
|
|
|
|
st.markdown("<p class='subtitle'>Explore different stages of the Life Expectancy project</p>", unsafe_allow_html=True) |
|
|
|
|
|
if st.button("▶ **Problem Statement**"): |
|
|
switch_page("Problem Statement") |
|
|
if st.button("▶ **Data Collection**"): |
|
|
switch_page("Data Collection") |
|
|
if st.button("▶ **Simple EDA**"): |
|
|
switch_page("Simple EDA") |
|
|
if st.button("▶ **Data Pre-processing**"): |
|
|
switch_page("Data Pre-processing") |
|
|
if st.button("▶ **Exploratory Data Analysis**"): |
|
|
switch_page("EDA") |
|
|
if st.button("▶ **Model Building**"): |
|
|
switch_page("Model Building") |
|
|
if st.button("▶ **Final Model**"): |
|
|
switch_page("Final Model") |
|
|
|
|
|
|
|
|
|
|
|
elif st.session_state.current_page == "Problem Statement": |
|
|
st.markdown("<h1 class='title'>Problem Statement</h1>", unsafe_allow_html=True) |
|
|
|
|
|
st.markdown(""" |
|
|
<h5 style="text-align: center; margin-top: 20px;"> |
|
|
The Goal of this project is to build a predictive model that estimates the Life Expectancy of a country |
|
|
based on multiple influencing factors such as health indicators, economic conditions, and social parameters. |
|
|
</h5> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
st.markdown("<br>", unsafe_allow_html=True) |
|
|
|
|
|
st.image("images/problem_statement.png", |
|
|
caption="Life Expectancy Prediction Overview", |
|
|
use_container_width=True) |
|
|
|
|
|
|
|
|
if st.button("🔙 Go Back to Model Report"): |
|
|
switch_page("Model Report") |
|
|
|
|
|
elif st.session_state.current_page == "Data Collection": |
|
|
st.markdown("<h1 class='title'>Data Collection</h1>", unsafe_allow_html=True) |
|
|
|
|
|
st.markdown(""" |
|
|
<h5 style="text-align: center; margin-top: 20px;"> |
|
|
The dataset used in this project is sourced from Kaggle, containing information on life expectancy across |
|
|
different countries along with various health, economic, and demographic factors. |
|
|
</h5> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
st.markdown("<br>", unsafe_allow_html=True) |
|
|
|
|
|
st.markdown(""" |
|
|
<h5 style="text-align: center; margin-top: 10px;"> |
|
|
📌 <a href="https://www.kaggle.com/datasets/kumarajarshi/life-expectancy-who" target="_blank" style="font-weight: bold; color: #007BFF; text-decoration: none;"> |
|
|
Click here to access the dataset on Kaggle</a> |
|
|
</h5> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
st.markdown("<h2 class='subtitle' style='text-align: center; margin-top: 20px;'>Dataset Overview</h2>", unsafe_allow_html=True) |
|
|
|
|
|
st.markdown(""" |
|
|
<h5 style="text-align: center; margin-top: 15px; margin-bottom: 20px;"> |
|
|
The dataset consists of <b>2938 rows</b> and <b>22 columns</b>, capturing crucial indicators such as life expectancy, |
|
|
mortality rates, GDP, schooling, immunization rates, and more. Below is a summary of the dataset features: |
|
|
</h5> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
st.markdown("<br>", unsafe_allow_html=True) |
|
|
|
|
|
data_info = """ |
|
|
<div style= "font-size: 16px; background-color: #F5F5F5; padding: 15px; border-radius: 10px;"> |
|
|
• <b>Country:</b> Name of the country (Categorical)<br> |
|
|
• <b>Year:</b> Year of observation (Numerical)<br> |
|
|
• <b>Status:</b> Developing or Developed country (Categorical)<br> |
|
|
• <b>Life Expectancy:</b> Average age a person is expected to live (Numerical)<br> |
|
|
• <b>Adult Mortality:</b> Probability of dying between 15-60 years per 1000 population (Numerical)<br> |
|
|
• <b>Infant Deaths:</b> Number of infant deaths per 1000 live births (Numerical)<br> |
|
|
• <b>Alcohol:</b> Alcohol consumption per capita (Numerical)<br> |
|
|
• <b>Percentage Expenditure:</b> Government expenditure on health as a percentage of GDP (Numerical)<br> |
|
|
• <b>Hepatitis B:</b> Immunization coverage for Hepatitis B (Numerical)<br> |
|
|
• <b>Measles:</b> Number of reported measles cases per year (Numerical)<br> |
|
|
• <b>BMI:</b> Average Body Mass Index of the population (Numerical)<br> |
|
|
• <b>Under-five Deaths:</b> Number of deaths under the age of five per 1000 live births (Numerical)<br> |
|
|
• <b>Polio:</b> Immunization coverage for Polio (Numerical)<br> |
|
|
• <b>Total Expenditure:</b> Total health expenditure as a percentage of GDP (Numerical)<br> |
|
|
• <b>Diphtheria:</b> Immunization coverage for Diphtheria (Numerical)<br> |
|
|
• <b>HIV/AIDS:</b> Death rate due to HIV/AIDS per 100,000 people (Numerical)<br> |
|
|
• <b>GDP:</b> Gross Domestic Product per capita (Numerical)<br> |
|
|
• <b>Population:</b> Total population of the country (Numerical)<br> |
|
|
• <b>Thinness 1-19 Years:</b> Percentage of thin individuals aged 1-19 years (Numerical)<br> |
|
|
• <b>Thinness 5-9 Years:</b> Percentage of thin individuals aged 5-9 years (Numerical)<br> |
|
|
• <b>Income Composition:</b> Human development index based on income composition (Numerical)<br> |
|
|
• <b>Schooling:</b> Average number of years of schooling (Numerical)<br> |
|
|
</div> |
|
|
""" |
|
|
|
|
|
st.markdown(data_info, unsafe_allow_html=True) |
|
|
|
|
|
st.markdown("<br>", unsafe_allow_html=True) |
|
|
|
|
|
if st.button("🔙 Go Back to Model Report"): |
|
|
switch_page("Model Report") |
|
|
|
|
|
|
|
|
|
|
|
elif st.session_state.current_page == "Simple EDA": |
|
|
st.markdown("<h1 class='title'>Simple Exploratory Data Analysis</h1>", unsafe_allow_html=True) |
|
|
|
|
|
st.markdown(""" |
|
|
<h5 style="text-align: center; margin-top: 20px;"> |
|
|
Exploratory Data Analysis (EDA) helps in understanding the structure, patterns, and missing values in the dataset. |
|
|
Below is an initial preview of the data, followed by a missing values summary. |
|
|
</h5> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
st.markdown("<br>", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
st.markdown("<h3 class='subtitle' style='text-align: center;'>Sample Dataset</h3>", unsafe_allow_html=True) |
|
|
st.dataframe(data.head()) |
|
|
|
|
|
st.markdown("<br>", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
st.markdown("<h3 class='subtitle' style='text-align: center;'>Missing Values Summary</h3>", unsafe_allow_html=True) |
|
|
|
|
|
missing_values = data.isna().sum().reset_index() |
|
|
missing_values.columns = ["Column Name", "Missing Values"] |
|
|
|
|
|
col1, col2, col3 = st.columns([1, 2, 1]) |
|
|
|
|
|
with col2: |
|
|
st.dataframe(missing_values) |
|
|
|
|
|
st.markdown("<br>", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
st.markdown("<h3 class='subtitle' style='text-align: center;'>Data Description</h3>", unsafe_allow_html=True) |
|
|
|
|
|
st.dataframe(data.describe()) |
|
|
|
|
|
st.markdown("<br>", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
st.markdown("<h3 class='subtitle' style='text-align: center;'>Boxplots for Data Distribution</h3>", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
columns = ['Life expectancy', 'Adult Mortality', |
|
|
'infant deaths', 'Alcohol', 'percentage expenditure', 'Hepatitis B', |
|
|
'Measles', 'BMI', 'under-five deaths', 'Polio', 'Total expenditure', |
|
|
'Diphtheria', 'HIV/AIDS', 'GDP', 'Population', 'thinness 1-19 years', |
|
|
'thinness 5-9 years', 'Income composition of resources', 'Schooling'] |
|
|
|
|
|
|
|
|
fig, axes = plt.subplots(nrows=10, ncols=2, figsize=(12, 30)) |
|
|
axes = axes.flatten() |
|
|
|
|
|
for i, col in enumerate(columns): |
|
|
sns.boxplot(x=data[col], ax=axes[i], color="skyblue") |
|
|
axes[i].set_title(f'Boxplot of {col}', fontsize=12) |
|
|
axes[i].set_xlabel("") |
|
|
|
|
|
plt.tight_layout() |
|
|
st.pyplot(fig) |
|
|
|
|
|
st.markdown("<br>", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
if st.button("🔙 Go Back to Model Report"): |
|
|
switch_page("Model Report") |
|
|
|
|
|
|
|
|
elif st.session_state.current_page == "Hands-on Model": |
|
|
st.title("Hands-on Model") |
|
|
st.write("Provide inputs to predict Life Expectancy.") |
|
|
|
|
|
col1, col2 = st.columns(2) |
|
|
with col1: |
|
|
year = st.slider("Year", 2000, 2015, 2008) |
|
|
status = st.radio("Status", ["Developing", "Developed"], horizontal=True) |
|
|
status = 1 if status == "Developed" else 0 |
|
|
adult_mortality = st.slider("Adult Mortality Rate", 1, 723, 144) |
|
|
infant_deaths = st.slider("Infant Deaths", 0, 1800, 3) |
|
|
alcohol = st.slider("Alcohol Consumption", 0.01, 17.87, 4.55) |
|
|
percentage_expenditure = st.slider("Percentage Expenditure", 0.0, 19479.91, 738.25) |
|
|
hepatitis_b = st.slider("Hepatitis B Immunization (%)", 1, 99, 83) |
|
|
measles = st.slider("Measles Cases", 0, 212183, 2419) |
|
|
bmi = st.slider("BMI", 1.0, 87.3, 38.3) |
|
|
polio = st.slider("Polio Immunization (%)", 3, 99, 82) |
|
|
|
|
|
with col2: |
|
|
under_five_deaths = st.slider("Under-Five Deaths", 0, 2500, 4) |
|
|
total_expenditure = st.slider("Total Healthcare Expenditure (%)", 0.37, 17.6, 5.92) |
|
|
diphtheria = st.slider("Diphtheria Immunization (%)", 2, 99, 82) |
|
|
hiv_aids = st.slider("HIV/AIDS Prevalence Rate", 0.1, 50.6, 1.74) |
|
|
gdp = st.slider("GDP per Capita", 1.68, 119172.7, 6611.52) |
|
|
population = st.slider("Population", 34, 1293859000, 10230850) |
|
|
thinness_1_19 = st.slider("Thinness 1-19 years (%)", 0.1, 27.7, 4.83) |
|
|
thinness_5_9 = st.slider("Thinness 5-9 years (%)", 0.1, 28.6, 4.86) |
|
|
income_composition = st.slider("Income Composition of Resources", 0.0, 0.948, 0.63) |
|
|
schooling = st.slider("Schooling (Years)", 0.0, 20.7, 11.99) |
|
|
|
|
|
if st.button("Predict Life Expectancy"): |
|
|
features = np.array([[year, status, adult_mortality, infant_deaths, alcohol, percentage_expenditure, |
|
|
hepatitis_b, measles, bmi, under_five_deaths, polio, total_expenditure, |
|
|
diphtheria, hiv_aids, gdp, population, thinness_1_19, thinness_5_9, |
|
|
income_composition, schooling]]) |
|
|
|
|
|
prediction = model.predict(features)[0] |
|
|
|
|
|
st.markdown( |
|
|
f""" |
|
|
<div class="result-box"> |
|
|
Predicted Life Expectancy: <b>{prediction:.2f} years</b> |
|
|
</div> |
|
|
""", |
|
|
unsafe_allow_html=True, |
|
|
) |
|
|
|
|
|
if st.button("⬅ **Back to Model Report**"): |
|
|
switch_page("Model Report") |
|
|
|