wb-droid commited on
Commit
3ba668e
·
1 Parent(s): 20e1a48

Add application.

Browse files
HousingAndDevelopmentBoardResalePriceIndex1Q2009100Quarterly.csv ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ quarter,index
2
+ 1990-Q1,24.3
3
+ 1990-Q2,24.4
4
+ 1990-Q3,25
5
+ 1990-Q4,24.7
6
+ 1991-Q1,24.9
7
+ 1991-Q2,25.5
8
+ 1991-Q3,25.2
9
+ 1991-Q4,25.1
10
+ 1992-Q1,25.8
11
+ 1992-Q2,27.4
12
+ 1992-Q3,28
13
+ 1992-Q4,28.6
14
+ 1993-Q1,30.2
15
+ 1993-Q2,39.6
16
+ 1993-Q3,47.7
17
+ 1993-Q4,49
18
+ 1994-Q1,50.4
19
+ 1994-Q2,51.7
20
+ 1994-Q3,54.6
21
+ 1994-Q4,54.8
22
+ 1995-Q1,57.3
23
+ 1995-Q2,63.9
24
+ 1995-Q3,67.7
25
+ 1995-Q4,73.7
26
+ 1996-Q1,80.5
27
+ 1996-Q2,90.9
28
+ 1996-Q3,96.4
29
+ 1996-Q4,99
30
+ 1997-Q1,98.6
31
+ 1997-Q2,97.5
32
+ 1997-Q3,93.6
33
+ 1997-Q4,88
34
+ 1998-Q1,81.7
35
+ 1998-Q2,78.3
36
+ 1998-Q3,74.9
37
+ 1998-Q4,72.3
38
+ 1999-Q1,71.2
39
+ 1999-Q2,72.2
40
+ 1999-Q3,78.1
41
+ 1999-Q4,79.8
42
+ 2000-Q1,80.3
43
+ 2000-Q2,79.3
44
+ 2000-Q3,77.6
45
+ 2000-Q4,75.8
46
+ 2001-Q1,73.2
47
+ 2001-Q2,72.1
48
+ 2001-Q3,70.6
49
+ 2001-Q4,69.6
50
+ 2002-Q1,69.1
51
+ 2002-Q2,69.2
52
+ 2002-Q3,69.9
53
+ 2002-Q4,69.9
54
+ 2003-Q1,71
55
+ 2003-Q2,72.5
56
+ 2003-Q3,74.3
57
+ 2003-Q4,75.1
58
+ 2004-Q1,75.3
59
+ 2004-Q2,76.2
60
+ 2004-Q3,76.3
61
+ 2004-Q4,77.1
62
+ 2005-Q1,77.2
63
+ 2005-Q2,73.5
64
+ 2005-Q3,73.2
65
+ 2005-Q4,73.5
66
+ 2006-Q1,73.6
67
+ 2006-Q2,74.3
68
+ 2006-Q3,74.2
69
+ 2006-Q4,74.9
70
+ 2007-Q1,75.8
71
+ 2007-Q2,78.1
72
+ 2007-Q3,83.2
73
+ 2007-Q4,88
74
+ 2008-Q1,91.3
75
+ 2008-Q2,95.4
76
+ 2008-Q3,99.4
77
+ 2008-Q4,100.8
78
+ 2009-Q1,100
79
+ 2009-Q2,101.4
80
+ 2009-Q3,105
81
+ 2009-Q4,109
82
+ 2010-Q1,112.1
83
+ 2010-Q2,116.6
84
+ 2010-Q3,121.3
85
+ 2010-Q4,124.4
86
+ 2011-Q1,126.4
87
+ 2011-Q2,130.4
88
+ 2011-Q3,135.4
89
+ 2011-Q4,137.7
90
+ 2012-Q1,138.5
91
+ 2012-Q2,140.3
92
+ 2012-Q3,143.1
93
+ 2012-Q4,146.7
94
+ 2013-Q1,148.6
95
+ 2013-Q2,149.4
96
+ 2013-Q3,148.1
97
+ 2013-Q4,145.8
98
+ 2014-Q1,143.5
99
+ 2014-Q2,141.5
100
+ 2014-Q3,139.1
101
+ 2014-Q4,137
102
+ 2015-Q1,135.6
103
+ 2015-Q2,135
104
+ 2015-Q3,134.6
105
+ 2015-Q4,134.8
106
+ 2016-Q1,134.7
107
+ 2016-Q2,134.7
108
+ 2016-Q3,134.7
109
+ 2016-Q4,134.6
110
+ 2017-Q1,133.9
111
+ 2017-Q2,133.7
112
+ 2017-Q3,132.8
113
+ 2017-Q4,132.6
114
+ 2018-Q1,131.6
115
+ 2018-Q2,131.7
116
+ 2018-Q3,131.6
117
+ 2018-Q4,131.4
118
+ 2019-Q1,131
119
+ 2019-Q2,130.8
120
+ 2019-Q3,130.9
121
+ 2019-Q4,131.5
122
+ 2020-Q1,131.5
123
+ 2020-Q2,131.9
124
+ 2020-Q3,133.9
125
+ 2020-Q4,138.1
126
+ 2021-Q1,142.2
127
+ 2021-Q2,146.4
128
+ 2021-Q3,150.6
129
+ 2021-Q4,155.7
130
+ 2022-Q1,159.5
131
+ 2022-Q2,163.9
132
+ 2022-Q3,168.1
133
+ 2022-Q4,171.9
134
+ 2023-Q1,173.6
135
+ 2023-Q2,176.2
136
+ 2023-Q3,178.5
137
+ 2023-Q4,180.4
138
+ 2024-Q1,183.5
app.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # References:
2
+ # https://www.tanishq.ai/blog/posts/2021-11-16-gradio-huggingface.html
3
+
4
+ from fastai.tabular.all import *
5
+ import numpy as np
6
+ import pandas as pd
7
+ import gradio as gr
8
+
9
+ def prepare_input():
10
+ df['remaining_lease_'] = df.remaining_lease.apply(lambda x: int(str(x).replace("years","").replace("months","").split()[0]))
11
+ df['address'] = df['street_name'] + " BLK " + df['block']
12
+ df['month_'] = df['month'].astype('category')
13
+ df['resale_price_'] = np.log(df['resale_price'])
14
+ # normalize price
15
+ train_price_mean = df['resale_price_'].mean()
16
+ df['resale_price_'] = df['resale_price_'] - train_price_mean
17
+ train_price_std = df['resale_price_'].std()
18
+ df['resale_price_'] = df['resale_price_'] / train_price_std
19
+ return train_price_mean, train_price_std
20
+
21
+ def restore_price(p):
22
+ p *= train_price_std
23
+ p += train_price_mean
24
+ return np.exp(p)
25
+
26
+ learn = load_learner('export.pkl')
27
+
28
+ data_directory = './'
29
+ price2017 = pd.read_csv(data_directory + 'resale-flat-prices-based-on-registration-date-from-jan-2017-onwards.csv')
30
+
31
+ df = price2017
32
+
33
+ train_price_mean, train_price_std = prepare_input()
34
+
35
+ splits = EndSplitter(valid_last=True)(range_of(df))
36
+
37
+ def tostr(i):
38
+ s = str(i)
39
+ if len(s) == 2:
40
+ return s
41
+ else:
42
+ s = '0'+s
43
+ return s
44
+
45
+ inp_cols = ['flat_model', 'storey_range', 'street_name', 'address', 'flat_type', 'town', 'month_', 'remaining_lease_', 'floor_area_sqm']
46
+ uniques_month_ = list(map(str, df.iloc[splits[0]]['month_'].unique()))
47
+ uniques = {}
48
+ modes = {}
49
+ for c in inp_cols:
50
+ if c == 'month_':
51
+ uniques[c] = list(map(str,list(range(1997,2030))))
52
+ uniques['month_2'] = list(map(tostr,list(range(1,13))))
53
+ modes[c] = str(df.iloc[splits[0]][c].mode()[0])[:4]
54
+ modes['month_2'] = str(df.iloc[splits[0]][c].mode()[0])[5:7]
55
+ else:
56
+ uniques[c] = list(map(str, df.iloc[splits[0]][c].unique()))
57
+ modes[c] = str(df.iloc[splits[0]][c].mode()[0])
58
+
59
+ # HDB resale price index from https://beta.data.gov.sg/collections/152/datasets/d_14f63e595975691e7c24a27ae4c07c79/view
60
+ df_rpi = pd.read_csv('HousingAndDevelopmentBoardResalePriceIndex1Q2009100Quarterly.csv')
61
+ def get_rpi(year, month):
62
+ quarters = ['Q1']*3 + ['Q2']*3 + ['Q3']*3 + ['Q4']*3
63
+ months = list(range(1,13))
64
+ quarter = quarters[months.index(month)]
65
+ quarter = str(year) + '-' + quarter
66
+ rpi = df_rpi[df_rpi.quarter==quarter]['index']
67
+ #print(rpi)
68
+ #print(rpi.loc[4])
69
+ return float(rpi.iloc[0] if len(rpi) > 0 else 0)
70
+
71
+ # Get scale factor from rpi
72
+ def get_rpi_ym(year_month):
73
+ print(year_month)
74
+ y = int(year_month[0:4])
75
+ m = int(year_month[5:7])
76
+ rpi = get_rpi(y,m)
77
+ return rpi if rpi > 0 else 1
78
+
79
+ def get_rpi_factor(year_month):
80
+ print("getting..." + year_month)
81
+ if year_month in uniques_month_:
82
+ # within training period
83
+ return 1
84
+ else:
85
+ predicted_ym = uniques_month_[-1]
86
+ predicted_ym_rpi = get_rpi_ym(predicted_ym)
87
+ #print(predicted_ym_rpi)
88
+ #print(get_rpi_ym(year_month))
89
+ #return 1 - (predicted_ym_rpi - get_rpi_ym(year_month) )/predicted_ym_rpi
90
+ return (get_rpi_ym(year_month))/predicted_ym_rpi
91
+
92
+ def update_inp_with_trained_cat(idx = 0):
93
+ inp_ = df.iloc[idx]
94
+ inp = inp_[inp_cols]
95
+ updated = False
96
+ skipped = False
97
+ for c in inp.keys():
98
+ if (type(inp[c]) == str):
99
+ if not inp[c] in uniques[c]:
100
+ inp = inp.copy()
101
+ updated = True
102
+ if c == 'month_':
103
+ lastdate = df.iloc[max(splits[0])].month
104
+ inp[c] = lastdate
105
+ else:
106
+ inp[c] = ''
107
+ skipped = True
108
+ # add rpi factor
109
+ inp['rpi_factor'] = get_rpi_factor(df.iloc[idx]['month_'])
110
+ return inp_, inp, updated, skipped
111
+
112
+ p=False
113
+ price_scaler = 1
114
+ def predict(flat_model, storey_range, street_name, address, flat_type, town, month_, month_2, remaining_lease_, floor_area_sqm):
115
+ price_scaler = 1
116
+ inp = {}
117
+
118
+ inp['flat_model'] = flat_model
119
+ inp['storey_range'] = storey_range
120
+ inp['street_name'] = street_name
121
+ inp['address'] = address
122
+ inp['flat_type'] = flat_type
123
+ inp['town'] = town
124
+ inp['month_'] = month_+"-"+month_2
125
+ inp['remaining_lease_'] = float(remaining_lease_)
126
+ inp['floor_area_sqm'] = float(floor_area_sqm)
127
+ print(inp['month_'])
128
+ price_scaler = inp['rpi_factor'] = get_rpi_factor(inp['month_'])
129
+
130
+ # predict with the latest trained month
131
+ if not inp['month_'] in uniques_month_:
132
+ #price_scaler = 1.02
133
+ inp['month_'] = uniques_month_[-1]
134
+
135
+ row, clas, probs = learn.predict(pd.Series(inp))
136
+ pred = restore_price(row['resale_price_'].iloc[0]) * price_scaler
137
+ return int(pred), price_scaler
138
+
139
+
140
+ inputs = []
141
+ for c in inp_cols:
142
+ if c=='month_':
143
+ c2='month_2'
144
+ inputs += [gr.Dropdown(choices=list(uniques[c]), label='year', info='year of valuation', value=modes[c])]
145
+ inputs += [gr.Dropdown(choices=list(uniques[c2]), label='month', info='month of valuation', value=modes[c2])]
146
+ else:
147
+ inputs += [gr.Dropdown(choices=list(uniques[c]), label=c, info=c, value=modes[c])]
148
+
149
+ #[gr.Dropdown(choices=list(uniques["flat_model"]), label="flat_model", info="flat_model", value=modes["flat_model"])]
150
+
151
+ gr.Interface(fn=predict, inputs=inputs, outputs=[gr.Number(label="Predicted Value (S$)"), gr.Number(label="This result is adjusted automatically by the following factor (derived from Resale Price Index 1990Q1-2024Q1))")], title="Enter the HDB info to predict the price\n(Trained with old data from 1997-01 to 2020-01 on purpose to test the model durability to predict future prices. Prediction nearer to the train data period is more accurate with mean-abs-error of ~S$20k.)").launch()
152
+
export.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b02e28935beba4d857d557d9b916caada98de6c9e0c6517c2539fd89a868f79
3
+ size 10126378
resale-flat-prices-based-on-registration-date-from-jan-2017-onwards.csv ADDED
The diff for this file is too large to render. See raw diff