Spaces:

wb-droid
/

HDB_price_predictor

Sleeping

App Files Files Community

HDB_price_predictor / app.py

wb-droid

Add minor fixes

14cf7cd about 2 years ago

raw

history blame contribute delete

5.98 kB

	# References:
	# https://www.tanishq.ai/blog/posts/2021-11-16-gradio-huggingface.html

	from fastai.tabular.all import *
	import numpy as np
	import pandas as pd
	import gradio as gr

	def prepare_input():
	df['remaining_lease_'] = df.remaining_lease.apply(lambda x: int(str(x).replace("years","").replace("months","").split()[0]))
	df['address'] = df['street_name'] + " BLK " + df['block']
	df['month_'] = df['month'].astype('category')
	df['resale_price_'] = np.log(df['resale_price'])
	# normalize price
	train_price_mean = df['resale_price_'].mean()
	df['resale_price_'] = df['resale_price_'] - train_price_mean
	train_price_std = df['resale_price_'].std()
	df['resale_price_'] = df['resale_price_'] / train_price_std
	return train_price_mean, train_price_std

	def restore_price(p):
	p *= train_price_std
	p += train_price_mean
	return np.exp(p)

	learn = load_learner('export.pkl')

	data_directory = './'
	price2017 = pd.read_csv(data_directory + 'resale-flat-prices-based-on-registration-date-from-jan-2017-onwards.csv')

	df = price2017

	train_price_mean, train_price_std = prepare_input()

	splits = EndSplitter(valid_last=True)(range_of(df))

	def tostr(i):
	s = str(i)
	if len(s) == 2:
	return s
	else:
	s = '0'+s
	return s

	inp_cols = ['flat_model', 'storey_range', 'street_name', 'address', 'flat_type', 'town', 'month_', 'remaining_lease_', 'floor_area_sqm']
	uniques_month_ = list(map(str, df.iloc[splits[0]]['month_'].unique()))
	uniques = {}
	modes = {}
	for c in inp_cols:
	if c == 'month_':
	uniques[c] = list(map(str,list(range(1997,2030))))
	uniques['month_2'] = list(map(tostr,list(range(1,13))))
	modes[c] = str(df.iloc[splits[0]][c].mode()[0])[:4]
	modes['month_2'] = str(df.iloc[splits[0]][c].mode()[0])[5:7]
	else:
	uniques[c] = list(map(str, df.iloc[splits[0]][c].unique()))
	modes[c] = str(df.iloc[splits[0]][c].mode()[0])

	# HDB resale price index from https://beta.data.gov.sg/collections/152/datasets/d_14f63e595975691e7c24a27ae4c07c79/view
	df_rpi = pd.read_csv('HousingAndDevelopmentBoardResalePriceIndex1Q2009100Quarterly.csv')
	def get_rpi(year, month):
	quarters = ['Q1']3 + ['Q2']3 + ['Q3']3 + ['Q4']3
	months = list(range(1,13))
	quarter = quarters[months.index(month)]
	quarter = str(year) + '-' + quarter
	rpi = df_rpi[df_rpi.quarter==quarter]['index']
	#print(rpi)
	#print(rpi.loc[4])
	return float(rpi.iloc[0] if len(rpi) > 0 else 0)

	# Get scale factor from rpi
	def get_rpi_ym(year_month):
	print(year_month)
	y = int(year_month[0:4])
	m = int(year_month[5:7])
	rpi = get_rpi(y,m)
	return rpi if rpi > 0 else 0

	def get_rpi_factor(year_month):
	print("getting..." + year_month)
	if year_month in uniques_month_:
	# within training period
	return 1
	else:
	predicted_ym = uniques_month_[-1]
	predicted_ym_rpi = get_rpi_ym(predicted_ym)
	#print(predicted_ym_rpi)
	#print(get_rpi_ym(year_month))
	#return 1 - (predicted_ym_rpi - get_rpi_ym(year_month) )/predicted_ym_rpi
	actual_ym_rpi = get_rpi_ym(year_month)
	print(actual_ym_rpi)
	return actual_ym_rpi/predicted_ym_rpi if actual_ym_rpi != 0 else 1

	def update_inp_with_trained_cat(idx = 0):
	inp_ = df.iloc[idx]
	inp = inp_[inp_cols]
	updated = False
	skipped = False
	for c in inp.keys():
	if (type(inp[c]) == str):
	if not inp[c] in uniques[c]:
	inp = inp.copy()
	updated = True
	if c == 'month_':
	lastdate = df.iloc[max(splits[0])].month
	inp[c] = lastdate
	else:
	inp[c] = ''
	skipped = True
	# add rpi factor
	inp['rpi_factor'] = get_rpi_factor(df.iloc[idx]['month_'])
	return inp_, inp, updated, skipped

	p=False
	price_scaler = 1
	def predict(flat_model, storey_range, street_name, address, flat_type, town, month_, month_2, remaining_lease_, floor_area_sqm):
	price_scaler = 1
	inp = {}

	inp['flat_model'] = flat_model
	inp['storey_range'] = storey_range
	inp['street_name'] = street_name
	inp['address'] = address
	inp['flat_type'] = flat_type
	inp['town'] = town
	inp['month_'] = month_+"-"+month_2
	inp['remaining_lease_'] = float(remaining_lease_)
	inp['floor_area_sqm'] = float(floor_area_sqm)
	print(inp['month_'])
	price_scaler = inp['rpi_factor'] = get_rpi_factor(inp['month_'])

	# predict with the latest trained month
	if not inp['month_'] in uniques_month_:
	#price_scaler = 1.02
	inp['month_'] = uniques_month_[-1]

	row, clas, probs = learn.predict(pd.Series(inp))
	pred = restore_price(row['resale_price_'].iloc[0]) * price_scaler
	return int(pred), price_scaler


	inputs = []
	for c in inp_cols:
	if c=='month_':
	c2='month_2'
	inputs += [gr.Dropdown(choices=list(uniques[c]), label='year', info='year of valuation', value=modes[c])]
	inputs += [gr.Dropdown(choices=list(uniques[c2]), label='month', info='month of valuation', value=modes[c2])]
	else:
	inputs += [gr.Dropdown(choices=list(uniques[c]), label=c, info=c, value=modes[c])]

	#[gr.Dropdown(choices=list(uniques["flat_model"]), label="flat_model", info="flat_model", value=modes["flat_model"])]

	gr.Interface(fn=predict, inputs=inputs, outputs=[gr.Number(label="Predicted Value (S$)"), gr.Number(label="This result is adjusted automatically by the following factor (derived from Resale Price Index 1990Q1-2024Q1))")], title="Deep Learning with Neural Network -- Enter the HDB info to predict the price\n(Trained with old data from 1997-01 to 2020-01 on purpose to test the model durability to predict future prices. Prediction nearer to the train data period is more accurate with mean-abs-error of ~S$20k.)").launch(share=True)