|
|
|
|
|
"""model.ipynb |
|
|
|
|
|
Automatically generated by Colaboratory. |
|
|
|
|
|
Original file is located at |
|
|
https://colab.research.google.com/drive/1hA9Xz_VjzMVj66qS_j3A5dKcGkAfScKM |
|
|
""" |
|
|
|
|
|
pip install pycaret |
|
|
|
|
|
from scipy import stats |
|
|
|
|
|
import numpy as np |
|
|
|
|
|
import pandas as pd |
|
|
|
|
|
import pickle |
|
|
|
|
|
import matplotlib.pyplot as plt |
|
|
|
|
|
simple_life_dataset = pd.DataFrame({'Age':[0, 60], 'Life Expectancy':[90, 30]}) |
|
|
simple_life_dataset.head() |
|
|
|
|
|
import numpy as np |
|
|
from scipy import stats |
|
|
slope, intercept, r_value, p_value, std_err = stats.linregress(simple_life_dataset['Age'],simple_life_dataset['Life Expectancy']) |
|
|
print('intercept: ', intercept) |
|
|
print('slope: ', slope) |
|
|
|
|
|
np.ceil(slope * 20 + intercept) |
|
|
|
|
|
fig, axes = plt.subplots(figsize=(5,5)) |
|
|
x = [0,20,60] |
|
|
y = [90, 70, 30] |
|
|
axes.plot(x,y, color='blue', linestyle='--', marker='o') |
|
|
fig.suptitle('Life Expectancy') |
|
|
axes.set_xlabel('age') |
|
|
axes.set_xlim([-5,100]) |
|
|
axes.set_ylabel('life_expectancy') |
|
|
axes.set_ylim([0,100]) |
|
|
plt.grid() |
|
|
plt.show() |
|
|
|
|
|
|
|
|
|
|
|
who_list = pd.read_csv('/content/drive/MyDrive/WHOSIS_000001,WHOSIS_000015.csv') |
|
|
|
|
|
who_list.to_csv('WHOSIS_000001,WHOSIS_000015.csv') |
|
|
|
|
|
|
|
|
who_list = who_list[['GHO (DISPLAY)', 'YEAR (CODE)' , 'COUNTRY (DISPLAY)', 'SEX (DISPLAY)', 'Numeric']] |
|
|
who_list['COUNTRY (DISPLAY)'] = [ctry.title() for ctry in who_list['COUNTRY (DISPLAY)'].values] |
|
|
|
|
|
who_list[who_list['COUNTRY (DISPLAY)']=='France'].head(10) |
|
|
|
|
|
country = 'United States Of America' |
|
|
sex = 'Male' |
|
|
|
|
|
sub_set = who_list[who_list['COUNTRY (DISPLAY)'].str.startswith(country, na=False)] |
|
|
sub_set = sub_set[sub_set['SEX (DISPLAY)'] == sex] |
|
|
|
|
|
sub_set = sub_set.sort_values('YEAR (CODE)', ascending=False) |
|
|
sub_set_birth = sub_set[sub_set['GHO (DISPLAY)'] == 'Life expectancy at birth (years)'] |
|
|
sub_set_60 = sub_set[sub_set['GHO (DISPLAY)'] == 'Life expectancy at age 60 (years)'] |
|
|
print('sub_set_birth:') |
|
|
print(sub_set_birth.head(5)) |
|
|
print('sub_set_60:') |
|
|
print(sub_set_60.head(5)) |
|
|
|
|
|
|
|
|
lf_at_birth = sub_set_birth['Numeric'].values[0] |
|
|
lf_at_60 = sub_set_60['Numeric'].values[0] |
|
|
|
|
|
age = [0,60] |
|
|
life_expectancy = [lf_at_birth, lf_at_60] |
|
|
fig, axes = plt.subplots(figsize=(5,5)) |
|
|
x = age |
|
|
y = life_expectancy |
|
|
axes.plot(x,y, color='blue', linestyle='--', marker='o') |
|
|
fig.suptitle('Life Expectancy') |
|
|
axes.set_xlabel('age') |
|
|
axes.set_xlim([-5,100]) |
|
|
axes.set_ylabel('life expectancy') |
|
|
axes.set_ylim([0,100]) |
|
|
plt.grid() |
|
|
plt.show() |
|
|
|
|
|
|
|
|
slope, intercept, r_value, p_value, std_err = stats.linregress(age, life_expectancy) |
|
|
print('intercept: ', intercept) |
|
|
print('slope: ', slope) |
|
|
|
|
|
|
|
|
np.ceil(slope * 49 + intercept) |
|
|
|
|
|
def get_life_expectancy(age, country, sex): |
|
|
|
|
|
sub_set = who_list[who_list['COUNTRY (DISPLAY)'].str.startswith(country, na=False)] |
|
|
sub_set = sub_set[sub_set['SEX (DISPLAY)'] == sex] |
|
|
sub_set = sub_set.sort_values('YEAR (CODE)', ascending=False) |
|
|
sub_set_birth = sub_set[sub_set['GHO (DISPLAY)'] == 'Life expectancy at birth (years)'] |
|
|
sub_set_60 = sub_set[sub_set['GHO (DISPLAY)'] == 'Life expectancy at age 60 (years)'] |
|
|
|
|
|
|
|
|
if len(sub_set_birth['Numeric']) > 0 and len(sub_set_60['Numeric']) > 0: |
|
|
|
|
|
lf_at_birth = sub_set_birth['Numeric'].values[0] |
|
|
lf_at_60 = sub_set_60['Numeric'].values[0] |
|
|
|
|
|
|
|
|
slope, intercept, r_value, p_value, std_err = stats.linregress([0,60],[lf_at_birth, lf_at_60]) |
|
|
|
|
|
|
|
|
return(np.ceil(slope * age + intercept)) |
|
|
else: |
|
|
return None |
|
|
|
|
|
list(set(who_list['COUNTRY (DISPLAY)']))[0:10] |
|
|
|
|
|
|
|
|
get_life_expectancy(22, 'Japan', 'Female') |
|
|
|
|
|
get_life_expectancy(22,'Pakistan','Female') |
|
|
|
|
|
get_life_expectancy(21,'India','Male') |
|
|
|
|
|
missing_values_count = who_list.isnull().sum() |
|
|
|
|
|
print(missing_values_count) |
|
|
|
|
|
get_life_expectancy(80,'Pakistan','Female') |
|
|
|
|
|
pickle.dump(get_life_expectancy,open('model.pkl','wb')) |
|
|
|
|
|
model=pickle.load(open('model.pkl','rb')) |