Spaces:
Sleeping
Sleeping
Add application.
Browse files
HousingAndDevelopmentBoardResalePriceIndex1Q2009100Quarterly.csv
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
quarter,index
|
| 2 |
+
1990-Q1,24.3
|
| 3 |
+
1990-Q2,24.4
|
| 4 |
+
1990-Q3,25
|
| 5 |
+
1990-Q4,24.7
|
| 6 |
+
1991-Q1,24.9
|
| 7 |
+
1991-Q2,25.5
|
| 8 |
+
1991-Q3,25.2
|
| 9 |
+
1991-Q4,25.1
|
| 10 |
+
1992-Q1,25.8
|
| 11 |
+
1992-Q2,27.4
|
| 12 |
+
1992-Q3,28
|
| 13 |
+
1992-Q4,28.6
|
| 14 |
+
1993-Q1,30.2
|
| 15 |
+
1993-Q2,39.6
|
| 16 |
+
1993-Q3,47.7
|
| 17 |
+
1993-Q4,49
|
| 18 |
+
1994-Q1,50.4
|
| 19 |
+
1994-Q2,51.7
|
| 20 |
+
1994-Q3,54.6
|
| 21 |
+
1994-Q4,54.8
|
| 22 |
+
1995-Q1,57.3
|
| 23 |
+
1995-Q2,63.9
|
| 24 |
+
1995-Q3,67.7
|
| 25 |
+
1995-Q4,73.7
|
| 26 |
+
1996-Q1,80.5
|
| 27 |
+
1996-Q2,90.9
|
| 28 |
+
1996-Q3,96.4
|
| 29 |
+
1996-Q4,99
|
| 30 |
+
1997-Q1,98.6
|
| 31 |
+
1997-Q2,97.5
|
| 32 |
+
1997-Q3,93.6
|
| 33 |
+
1997-Q4,88
|
| 34 |
+
1998-Q1,81.7
|
| 35 |
+
1998-Q2,78.3
|
| 36 |
+
1998-Q3,74.9
|
| 37 |
+
1998-Q4,72.3
|
| 38 |
+
1999-Q1,71.2
|
| 39 |
+
1999-Q2,72.2
|
| 40 |
+
1999-Q3,78.1
|
| 41 |
+
1999-Q4,79.8
|
| 42 |
+
2000-Q1,80.3
|
| 43 |
+
2000-Q2,79.3
|
| 44 |
+
2000-Q3,77.6
|
| 45 |
+
2000-Q4,75.8
|
| 46 |
+
2001-Q1,73.2
|
| 47 |
+
2001-Q2,72.1
|
| 48 |
+
2001-Q3,70.6
|
| 49 |
+
2001-Q4,69.6
|
| 50 |
+
2002-Q1,69.1
|
| 51 |
+
2002-Q2,69.2
|
| 52 |
+
2002-Q3,69.9
|
| 53 |
+
2002-Q4,69.9
|
| 54 |
+
2003-Q1,71
|
| 55 |
+
2003-Q2,72.5
|
| 56 |
+
2003-Q3,74.3
|
| 57 |
+
2003-Q4,75.1
|
| 58 |
+
2004-Q1,75.3
|
| 59 |
+
2004-Q2,76.2
|
| 60 |
+
2004-Q3,76.3
|
| 61 |
+
2004-Q4,77.1
|
| 62 |
+
2005-Q1,77.2
|
| 63 |
+
2005-Q2,73.5
|
| 64 |
+
2005-Q3,73.2
|
| 65 |
+
2005-Q4,73.5
|
| 66 |
+
2006-Q1,73.6
|
| 67 |
+
2006-Q2,74.3
|
| 68 |
+
2006-Q3,74.2
|
| 69 |
+
2006-Q4,74.9
|
| 70 |
+
2007-Q1,75.8
|
| 71 |
+
2007-Q2,78.1
|
| 72 |
+
2007-Q3,83.2
|
| 73 |
+
2007-Q4,88
|
| 74 |
+
2008-Q1,91.3
|
| 75 |
+
2008-Q2,95.4
|
| 76 |
+
2008-Q3,99.4
|
| 77 |
+
2008-Q4,100.8
|
| 78 |
+
2009-Q1,100
|
| 79 |
+
2009-Q2,101.4
|
| 80 |
+
2009-Q3,105
|
| 81 |
+
2009-Q4,109
|
| 82 |
+
2010-Q1,112.1
|
| 83 |
+
2010-Q2,116.6
|
| 84 |
+
2010-Q3,121.3
|
| 85 |
+
2010-Q4,124.4
|
| 86 |
+
2011-Q1,126.4
|
| 87 |
+
2011-Q2,130.4
|
| 88 |
+
2011-Q3,135.4
|
| 89 |
+
2011-Q4,137.7
|
| 90 |
+
2012-Q1,138.5
|
| 91 |
+
2012-Q2,140.3
|
| 92 |
+
2012-Q3,143.1
|
| 93 |
+
2012-Q4,146.7
|
| 94 |
+
2013-Q1,148.6
|
| 95 |
+
2013-Q2,149.4
|
| 96 |
+
2013-Q3,148.1
|
| 97 |
+
2013-Q4,145.8
|
| 98 |
+
2014-Q1,143.5
|
| 99 |
+
2014-Q2,141.5
|
| 100 |
+
2014-Q3,139.1
|
| 101 |
+
2014-Q4,137
|
| 102 |
+
2015-Q1,135.6
|
| 103 |
+
2015-Q2,135
|
| 104 |
+
2015-Q3,134.6
|
| 105 |
+
2015-Q4,134.8
|
| 106 |
+
2016-Q1,134.7
|
| 107 |
+
2016-Q2,134.7
|
| 108 |
+
2016-Q3,134.7
|
| 109 |
+
2016-Q4,134.6
|
| 110 |
+
2017-Q1,133.9
|
| 111 |
+
2017-Q2,133.7
|
| 112 |
+
2017-Q3,132.8
|
| 113 |
+
2017-Q4,132.6
|
| 114 |
+
2018-Q1,131.6
|
| 115 |
+
2018-Q2,131.7
|
| 116 |
+
2018-Q3,131.6
|
| 117 |
+
2018-Q4,131.4
|
| 118 |
+
2019-Q1,131
|
| 119 |
+
2019-Q2,130.8
|
| 120 |
+
2019-Q3,130.9
|
| 121 |
+
2019-Q4,131.5
|
| 122 |
+
2020-Q1,131.5
|
| 123 |
+
2020-Q2,131.9
|
| 124 |
+
2020-Q3,133.9
|
| 125 |
+
2020-Q4,138.1
|
| 126 |
+
2021-Q1,142.2
|
| 127 |
+
2021-Q2,146.4
|
| 128 |
+
2021-Q3,150.6
|
| 129 |
+
2021-Q4,155.7
|
| 130 |
+
2022-Q1,159.5
|
| 131 |
+
2022-Q2,163.9
|
| 132 |
+
2022-Q3,168.1
|
| 133 |
+
2022-Q4,171.9
|
| 134 |
+
2023-Q1,173.6
|
| 135 |
+
2023-Q2,176.2
|
| 136 |
+
2023-Q3,178.5
|
| 137 |
+
2023-Q4,180.4
|
| 138 |
+
2024-Q1,183.5
|
app.py
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# References:
|
| 2 |
+
# https://www.tanishq.ai/blog/posts/2021-11-16-gradio-huggingface.html
|
| 3 |
+
|
| 4 |
+
from fastai.tabular.all import *
|
| 5 |
+
import numpy as np
|
| 6 |
+
import pandas as pd
|
| 7 |
+
import gradio as gr
|
| 8 |
+
|
| 9 |
+
def prepare_input():
|
| 10 |
+
df['remaining_lease_'] = df.remaining_lease.apply(lambda x: int(str(x).replace("years","").replace("months","").split()[0]))
|
| 11 |
+
df['address'] = df['street_name'] + " BLK " + df['block']
|
| 12 |
+
df['month_'] = df['month'].astype('category')
|
| 13 |
+
df['resale_price_'] = np.log(df['resale_price'])
|
| 14 |
+
# normalize price
|
| 15 |
+
train_price_mean = df['resale_price_'].mean()
|
| 16 |
+
df['resale_price_'] = df['resale_price_'] - train_price_mean
|
| 17 |
+
train_price_std = df['resale_price_'].std()
|
| 18 |
+
df['resale_price_'] = df['resale_price_'] / train_price_std
|
| 19 |
+
return train_price_mean, train_price_std
|
| 20 |
+
|
| 21 |
+
def restore_price(p):
|
| 22 |
+
p *= train_price_std
|
| 23 |
+
p += train_price_mean
|
| 24 |
+
return np.exp(p)
|
| 25 |
+
|
| 26 |
+
learn = load_learner('export.pkl')
|
| 27 |
+
|
| 28 |
+
data_directory = './'
|
| 29 |
+
price2017 = pd.read_csv(data_directory + 'resale-flat-prices-based-on-registration-date-from-jan-2017-onwards.csv')
|
| 30 |
+
|
| 31 |
+
df = price2017
|
| 32 |
+
|
| 33 |
+
train_price_mean, train_price_std = prepare_input()
|
| 34 |
+
|
| 35 |
+
splits = EndSplitter(valid_last=True)(range_of(df))
|
| 36 |
+
|
| 37 |
+
def tostr(i):
|
| 38 |
+
s = str(i)
|
| 39 |
+
if len(s) == 2:
|
| 40 |
+
return s
|
| 41 |
+
else:
|
| 42 |
+
s = '0'+s
|
| 43 |
+
return s
|
| 44 |
+
|
| 45 |
+
inp_cols = ['flat_model', 'storey_range', 'street_name', 'address', 'flat_type', 'town', 'month_', 'remaining_lease_', 'floor_area_sqm']
|
| 46 |
+
uniques_month_ = list(map(str, df.iloc[splits[0]]['month_'].unique()))
|
| 47 |
+
uniques = {}
|
| 48 |
+
modes = {}
|
| 49 |
+
for c in inp_cols:
|
| 50 |
+
if c == 'month_':
|
| 51 |
+
uniques[c] = list(map(str,list(range(1997,2030))))
|
| 52 |
+
uniques['month_2'] = list(map(tostr,list(range(1,13))))
|
| 53 |
+
modes[c] = str(df.iloc[splits[0]][c].mode()[0])[:4]
|
| 54 |
+
modes['month_2'] = str(df.iloc[splits[0]][c].mode()[0])[5:7]
|
| 55 |
+
else:
|
| 56 |
+
uniques[c] = list(map(str, df.iloc[splits[0]][c].unique()))
|
| 57 |
+
modes[c] = str(df.iloc[splits[0]][c].mode()[0])
|
| 58 |
+
|
| 59 |
+
# HDB resale price index from https://beta.data.gov.sg/collections/152/datasets/d_14f63e595975691e7c24a27ae4c07c79/view
|
| 60 |
+
df_rpi = pd.read_csv('HousingAndDevelopmentBoardResalePriceIndex1Q2009100Quarterly.csv')
|
| 61 |
+
def get_rpi(year, month):
|
| 62 |
+
quarters = ['Q1']*3 + ['Q2']*3 + ['Q3']*3 + ['Q4']*3
|
| 63 |
+
months = list(range(1,13))
|
| 64 |
+
quarter = quarters[months.index(month)]
|
| 65 |
+
quarter = str(year) + '-' + quarter
|
| 66 |
+
rpi = df_rpi[df_rpi.quarter==quarter]['index']
|
| 67 |
+
#print(rpi)
|
| 68 |
+
#print(rpi.loc[4])
|
| 69 |
+
return float(rpi.iloc[0] if len(rpi) > 0 else 0)
|
| 70 |
+
|
| 71 |
+
# Get scale factor from rpi
|
| 72 |
+
def get_rpi_ym(year_month):
|
| 73 |
+
print(year_month)
|
| 74 |
+
y = int(year_month[0:4])
|
| 75 |
+
m = int(year_month[5:7])
|
| 76 |
+
rpi = get_rpi(y,m)
|
| 77 |
+
return rpi if rpi > 0 else 1
|
| 78 |
+
|
| 79 |
+
def get_rpi_factor(year_month):
|
| 80 |
+
print("getting..." + year_month)
|
| 81 |
+
if year_month in uniques_month_:
|
| 82 |
+
# within training period
|
| 83 |
+
return 1
|
| 84 |
+
else:
|
| 85 |
+
predicted_ym = uniques_month_[-1]
|
| 86 |
+
predicted_ym_rpi = get_rpi_ym(predicted_ym)
|
| 87 |
+
#print(predicted_ym_rpi)
|
| 88 |
+
#print(get_rpi_ym(year_month))
|
| 89 |
+
#return 1 - (predicted_ym_rpi - get_rpi_ym(year_month) )/predicted_ym_rpi
|
| 90 |
+
return (get_rpi_ym(year_month))/predicted_ym_rpi
|
| 91 |
+
|
| 92 |
+
def update_inp_with_trained_cat(idx = 0):
|
| 93 |
+
inp_ = df.iloc[idx]
|
| 94 |
+
inp = inp_[inp_cols]
|
| 95 |
+
updated = False
|
| 96 |
+
skipped = False
|
| 97 |
+
for c in inp.keys():
|
| 98 |
+
if (type(inp[c]) == str):
|
| 99 |
+
if not inp[c] in uniques[c]:
|
| 100 |
+
inp = inp.copy()
|
| 101 |
+
updated = True
|
| 102 |
+
if c == 'month_':
|
| 103 |
+
lastdate = df.iloc[max(splits[0])].month
|
| 104 |
+
inp[c] = lastdate
|
| 105 |
+
else:
|
| 106 |
+
inp[c] = ''
|
| 107 |
+
skipped = True
|
| 108 |
+
# add rpi factor
|
| 109 |
+
inp['rpi_factor'] = get_rpi_factor(df.iloc[idx]['month_'])
|
| 110 |
+
return inp_, inp, updated, skipped
|
| 111 |
+
|
| 112 |
+
p=False
|
| 113 |
+
price_scaler = 1
|
| 114 |
+
def predict(flat_model, storey_range, street_name, address, flat_type, town, month_, month_2, remaining_lease_, floor_area_sqm):
|
| 115 |
+
price_scaler = 1
|
| 116 |
+
inp = {}
|
| 117 |
+
|
| 118 |
+
inp['flat_model'] = flat_model
|
| 119 |
+
inp['storey_range'] = storey_range
|
| 120 |
+
inp['street_name'] = street_name
|
| 121 |
+
inp['address'] = address
|
| 122 |
+
inp['flat_type'] = flat_type
|
| 123 |
+
inp['town'] = town
|
| 124 |
+
inp['month_'] = month_+"-"+month_2
|
| 125 |
+
inp['remaining_lease_'] = float(remaining_lease_)
|
| 126 |
+
inp['floor_area_sqm'] = float(floor_area_sqm)
|
| 127 |
+
print(inp['month_'])
|
| 128 |
+
price_scaler = inp['rpi_factor'] = get_rpi_factor(inp['month_'])
|
| 129 |
+
|
| 130 |
+
# predict with the latest trained month
|
| 131 |
+
if not inp['month_'] in uniques_month_:
|
| 132 |
+
#price_scaler = 1.02
|
| 133 |
+
inp['month_'] = uniques_month_[-1]
|
| 134 |
+
|
| 135 |
+
row, clas, probs = learn.predict(pd.Series(inp))
|
| 136 |
+
pred = restore_price(row['resale_price_'].iloc[0]) * price_scaler
|
| 137 |
+
return int(pred), price_scaler
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
inputs = []
|
| 141 |
+
for c in inp_cols:
|
| 142 |
+
if c=='month_':
|
| 143 |
+
c2='month_2'
|
| 144 |
+
inputs += [gr.Dropdown(choices=list(uniques[c]), label='year', info='year of valuation', value=modes[c])]
|
| 145 |
+
inputs += [gr.Dropdown(choices=list(uniques[c2]), label='month', info='month of valuation', value=modes[c2])]
|
| 146 |
+
else:
|
| 147 |
+
inputs += [gr.Dropdown(choices=list(uniques[c]), label=c, info=c, value=modes[c])]
|
| 148 |
+
|
| 149 |
+
#[gr.Dropdown(choices=list(uniques["flat_model"]), label="flat_model", info="flat_model", value=modes["flat_model"])]
|
| 150 |
+
|
| 151 |
+
gr.Interface(fn=predict, inputs=inputs, outputs=[gr.Number(label="Predicted Value (S$)"), gr.Number(label="This result is adjusted automatically by the following factor (derived from Resale Price Index 1990Q1-2024Q1))")], title="Enter the HDB info to predict the price\n(Trained with old data from 1997-01 to 2020-01 on purpose to test the model durability to predict future prices. Prediction nearer to the train data period is more accurate with mean-abs-error of ~S$20k.)").launch()
|
| 152 |
+
|
export.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9b02e28935beba4d857d557d9b916caada98de6c9e0c6517c2539fd89a868f79
|
| 3 |
+
size 10126378
|
resale-flat-prices-based-on-registration-date-from-jan-2017-onwards.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|