wb-droid's picture
Add minor fixes
14cf7cd
# References:
# https://www.tanishq.ai/blog/posts/2021-11-16-gradio-huggingface.html
from fastai.tabular.all import *
import numpy as np
import pandas as pd
import gradio as gr
def prepare_input():
df['remaining_lease_'] = df.remaining_lease.apply(lambda x: int(str(x).replace("years","").replace("months","").split()[0]))
df['address'] = df['street_name'] + " BLK " + df['block']
df['month_'] = df['month'].astype('category')
df['resale_price_'] = np.log(df['resale_price'])
# normalize price
train_price_mean = df['resale_price_'].mean()
df['resale_price_'] = df['resale_price_'] - train_price_mean
train_price_std = df['resale_price_'].std()
df['resale_price_'] = df['resale_price_'] / train_price_std
return train_price_mean, train_price_std
def restore_price(p):
p *= train_price_std
p += train_price_mean
return np.exp(p)
learn = load_learner('export.pkl')
data_directory = './'
price2017 = pd.read_csv(data_directory + 'resale-flat-prices-based-on-registration-date-from-jan-2017-onwards.csv')
df = price2017
train_price_mean, train_price_std = prepare_input()
splits = EndSplitter(valid_last=True)(range_of(df))
def tostr(i):
s = str(i)
if len(s) == 2:
return s
else:
s = '0'+s
return s
inp_cols = ['flat_model', 'storey_range', 'street_name', 'address', 'flat_type', 'town', 'month_', 'remaining_lease_', 'floor_area_sqm']
uniques_month_ = list(map(str, df.iloc[splits[0]]['month_'].unique()))
uniques = {}
modes = {}
for c in inp_cols:
if c == 'month_':
uniques[c] = list(map(str,list(range(1997,2030))))
uniques['month_2'] = list(map(tostr,list(range(1,13))))
modes[c] = str(df.iloc[splits[0]][c].mode()[0])[:4]
modes['month_2'] = str(df.iloc[splits[0]][c].mode()[0])[5:7]
else:
uniques[c] = list(map(str, df.iloc[splits[0]][c].unique()))
modes[c] = str(df.iloc[splits[0]][c].mode()[0])
# HDB resale price index from https://beta.data.gov.sg/collections/152/datasets/d_14f63e595975691e7c24a27ae4c07c79/view
df_rpi = pd.read_csv('HousingAndDevelopmentBoardResalePriceIndex1Q2009100Quarterly.csv')
def get_rpi(year, month):
quarters = ['Q1']*3 + ['Q2']*3 + ['Q3']*3 + ['Q4']*3
months = list(range(1,13))
quarter = quarters[months.index(month)]
quarter = str(year) + '-' + quarter
rpi = df_rpi[df_rpi.quarter==quarter]['index']
#print(rpi)
#print(rpi.loc[4])
return float(rpi.iloc[0] if len(rpi) > 0 else 0)
# Get scale factor from rpi
def get_rpi_ym(year_month):
print(year_month)
y = int(year_month[0:4])
m = int(year_month[5:7])
rpi = get_rpi(y,m)
return rpi if rpi > 0 else 0
def get_rpi_factor(year_month):
print("getting..." + year_month)
if year_month in uniques_month_:
# within training period
return 1
else:
predicted_ym = uniques_month_[-1]
predicted_ym_rpi = get_rpi_ym(predicted_ym)
#print(predicted_ym_rpi)
#print(get_rpi_ym(year_month))
#return 1 - (predicted_ym_rpi - get_rpi_ym(year_month) )/predicted_ym_rpi
actual_ym_rpi = get_rpi_ym(year_month)
print(actual_ym_rpi)
return actual_ym_rpi/predicted_ym_rpi if actual_ym_rpi != 0 else 1
def update_inp_with_trained_cat(idx = 0):
inp_ = df.iloc[idx]
inp = inp_[inp_cols]
updated = False
skipped = False
for c in inp.keys():
if (type(inp[c]) == str):
if not inp[c] in uniques[c]:
inp = inp.copy()
updated = True
if c == 'month_':
lastdate = df.iloc[max(splits[0])].month
inp[c] = lastdate
else:
inp[c] = ''
skipped = True
# add rpi factor
inp['rpi_factor'] = get_rpi_factor(df.iloc[idx]['month_'])
return inp_, inp, updated, skipped
p=False
price_scaler = 1
def predict(flat_model, storey_range, street_name, address, flat_type, town, month_, month_2, remaining_lease_, floor_area_sqm):
price_scaler = 1
inp = {}
inp['flat_model'] = flat_model
inp['storey_range'] = storey_range
inp['street_name'] = street_name
inp['address'] = address
inp['flat_type'] = flat_type
inp['town'] = town
inp['month_'] = month_+"-"+month_2
inp['remaining_lease_'] = float(remaining_lease_)
inp['floor_area_sqm'] = float(floor_area_sqm)
print(inp['month_'])
price_scaler = inp['rpi_factor'] = get_rpi_factor(inp['month_'])
# predict with the latest trained month
if not inp['month_'] in uniques_month_:
#price_scaler = 1.02
inp['month_'] = uniques_month_[-1]
row, clas, probs = learn.predict(pd.Series(inp))
pred = restore_price(row['resale_price_'].iloc[0]) * price_scaler
return int(pred), price_scaler
inputs = []
for c in inp_cols:
if c=='month_':
c2='month_2'
inputs += [gr.Dropdown(choices=list(uniques[c]), label='year', info='year of valuation', value=modes[c])]
inputs += [gr.Dropdown(choices=list(uniques[c2]), label='month', info='month of valuation', value=modes[c2])]
else:
inputs += [gr.Dropdown(choices=list(uniques[c]), label=c, info=c, value=modes[c])]
#[gr.Dropdown(choices=list(uniques["flat_model"]), label="flat_model", info="flat_model", value=modes["flat_model"])]
gr.Interface(fn=predict, inputs=inputs, outputs=[gr.Number(label="Predicted Value (S$)"), gr.Number(label="This result is adjusted automatically by the following factor (derived from Resale Price Index 1990Q1-2024Q1))")], title="Deep Learning with Neural Network -- Enter the HDB info to predict the price\n(Trained with old data from 1997-01 to 2020-01 on purpose to test the model durability to predict future prices. Prediction nearer to the train data period is more accurate with mean-abs-error of ~S$20k.)").launch(share=True)