Ll / model.py

uploaded model.py

7458c95 almost 3 years ago

4.57 kB

	# -- coding: utf-8 --
	"""model.ipynb

	Automatically generated by Colaboratory.

	Original file is located at
	https://colab.research.google.com/drive/1hA9Xz_VjzMVj66qS_j3A5dKcGkAfScKM
	"""

	pip install pycaret

	from scipy import stats

	import numpy as np

	import pandas as pd

	import pickle

	import matplotlib.pyplot as plt
	# create fictitious data set
	simple_life_dataset = pd.DataFrame({'Age':[0, 60], 'Life Expectancy':[90, 30]})
	simple_life_dataset.head()

	import numpy as np
	from scipy import stats
	slope, intercept, r_value, p_value, std_err = stats.linregress(simple_life_dataset['Age'],simple_life_dataset['Life Expectancy'])
	print('intercept: ', intercept)
	print('slope: ', slope)

	np.ceil(slope * 20 + intercept)

	fig, axes = plt.subplots(figsize=(5,5))
	x = [0,20,60]
	y = [90, 70, 30]
	axes.plot(x,y, color='blue', linestyle='--', marker='o')
	fig.suptitle('Life Expectancy')
	axes.set_xlabel('age')
	axes.set_xlim([-5,100])
	axes.set_ylabel('life_expectancy')
	axes.set_ylim([0,100])
	plt.grid()
	plt.show()

	# load WHO longevity data
	# http://apps.who.int/gho/data/node.main.688
	who_list = pd.read_csv('/content/drive/MyDrive/WHOSIS_000001,WHOSIS_000015.csv')
	# save a local copy of the data set for our Flask prototype later on
	who_list.to_csv('WHOSIS_000001,WHOSIS_000015.csv')

	# Keep only useful features fix case display of country text
	who_list = who_list[['GHO (DISPLAY)', 'YEAR (CODE)' , 'COUNTRY (DISPLAY)', 'SEX (DISPLAY)', 'Numeric']]
	who_list['COUNTRY (DISPLAY)'] = [ctry.title() for ctry in who_list['COUNTRY (DISPLAY)'].values]
	# print a few rows
	who_list[who_list['COUNTRY (DISPLAY)']=='France'].head(10)

	country = 'United States Of America'
	sex = 'Male'
	# pull latest entries for birth and 60 years for a country and gender
	sub_set = who_list[who_list['COUNTRY (DISPLAY)'].str.startswith(country, na=False)]
	sub_set = sub_set[sub_set['SEX (DISPLAY)'] == sex]
	# sort by year in descending order to work with the latest read
	sub_set = sub_set.sort_values('YEAR (CODE)', ascending=False)
	sub_set_birth = sub_set[sub_set['GHO (DISPLAY)'] == 'Life expectancy at birth (years)']
	sub_set_60 = sub_set[sub_set['GHO (DISPLAY)'] == 'Life expectancy at age 60 (years)']
	print('sub_set_birth:')
	print(sub_set_birth.head(5))
	print('sub_set_60:')
	print(sub_set_60.head(5))

	# create data set with both points as shown in first example
	lf_at_birth = sub_set_birth['Numeric'].values[0]
	lf_at_60 = sub_set_60['Numeric'].values[0]
	# let's organize our data and plot
	age = [0,60]
	life_expectancy = [lf_at_birth, lf_at_60]
	fig, axes = plt.subplots(figsize=(5,5))
	x = age
	y = life_expectancy
	axes.plot(x,y, color='blue', linestyle='--', marker='o')
	fig.suptitle('Life Expectancy')
	axes.set_xlabel('age')
	axes.set_xlim([-5,100])
	axes.set_ylabel('life expectancy')
	axes.set_ylim([0,100])
	plt.grid()
	plt.show()

	# model
	slope, intercept, r_value, p_value, std_err = stats.linregress(age, life_expectancy)
	print('intercept: ', intercept)
	print('slope: ', slope)

	# predict life expectancy for an 49-year-old male in the USA:
	np.ceil(slope * 49 + intercept)

	def get_life_expectancy(age, country, sex):
	# pull latest entries for birth and 60 years
	sub_set = who_list[who_list['COUNTRY (DISPLAY)'].str.startswith(country, na=False)]
	sub_set = sub_set[sub_set['SEX (DISPLAY)'] == sex]
	sub_set = sub_set.sort_values('YEAR (CODE)', ascending=False)
	sub_set_birth = sub_set[sub_set['GHO (DISPLAY)'] == 'Life expectancy at birth (years)']
	sub_set_60 = sub_set[sub_set['GHO (DISPLAY)'] == 'Life expectancy at age 60 (years)']

	# not all combinations exsits so check that we have data for both
	if len(sub_set_birth['Numeric']) > 0 and len(sub_set_60['Numeric']) > 0:
	# create data set with both points as shown in first example
	lf_at_birth = sub_set_birth['Numeric'].values[0]
	lf_at_60 = sub_set_60['Numeric'].values[0]

	# model
	slope, intercept, r_value, p_value, std_err = stats.linregress([0,60],[lf_at_birth, lf_at_60])

	# predict for the age variable
	return(np.ceil(slope * age + intercept))
	else:
	return None

	list(set(who_list['COUNTRY (DISPLAY)']))[0:10]

	# test the function out using a 22-year-old Japanese female:
	get_life_expectancy(22, 'Japan', 'Female')

	get_life_expectancy(22,'Pakistan','Female')

	get_life_expectancy(21,'India','Male')

	missing_values_count = who_list.isnull().sum()

	print(missing_values_count)

	get_life_expectancy(80,'Pakistan','Female')

	pickle.dump(get_life_expectancy,open('model.pkl','wb'))

	model=pickle.load(open('model.pkl','rb'))