Spaces:
Sleeping
Sleeping
| # References: | |
| # https://www.tanishq.ai/blog/posts/2021-11-16-gradio-huggingface.html | |
| from fastai.tabular.all import * | |
| import numpy as np | |
| import pandas as pd | |
| import gradio as gr | |
| def prepare_input(): | |
| df['remaining_lease_'] = df.remaining_lease.apply(lambda x: int(str(x).replace("years","").replace("months","").split()[0])) | |
| df['address'] = df['street_name'] + " BLK " + df['block'] | |
| df['month_'] = df['month'].astype('category') | |
| df['resale_price_'] = np.log(df['resale_price']) | |
| # normalize price | |
| train_price_mean = df['resale_price_'].mean() | |
| df['resale_price_'] = df['resale_price_'] - train_price_mean | |
| train_price_std = df['resale_price_'].std() | |
| df['resale_price_'] = df['resale_price_'] / train_price_std | |
| return train_price_mean, train_price_std | |
| def restore_price(p): | |
| p *= train_price_std | |
| p += train_price_mean | |
| return np.exp(p) | |
| learn = load_learner('export.pkl') | |
| data_directory = './' | |
| price2017 = pd.read_csv(data_directory + 'resale-flat-prices-based-on-registration-date-from-jan-2017-onwards.csv') | |
| df = price2017 | |
| train_price_mean, train_price_std = prepare_input() | |
| splits = EndSplitter(valid_last=True)(range_of(df)) | |
| def tostr(i): | |
| s = str(i) | |
| if len(s) == 2: | |
| return s | |
| else: | |
| s = '0'+s | |
| return s | |
| inp_cols = ['flat_model', 'storey_range', 'street_name', 'address', 'flat_type', 'town', 'month_', 'remaining_lease_', 'floor_area_sqm'] | |
| uniques_month_ = list(map(str, df.iloc[splits[0]]['month_'].unique())) | |
| uniques = {} | |
| modes = {} | |
| for c in inp_cols: | |
| if c == 'month_': | |
| uniques[c] = list(map(str,list(range(1997,2030)))) | |
| uniques['month_2'] = list(map(tostr,list(range(1,13)))) | |
| modes[c] = str(df.iloc[splits[0]][c].mode()[0])[:4] | |
| modes['month_2'] = str(df.iloc[splits[0]][c].mode()[0])[5:7] | |
| else: | |
| uniques[c] = list(map(str, df.iloc[splits[0]][c].unique())) | |
| modes[c] = str(df.iloc[splits[0]][c].mode()[0]) | |
| # HDB resale price index from https://beta.data.gov.sg/collections/152/datasets/d_14f63e595975691e7c24a27ae4c07c79/view | |
| df_rpi = pd.read_csv('HousingAndDevelopmentBoardResalePriceIndex1Q2009100Quarterly.csv') | |
| def get_rpi(year, month): | |
| quarters = ['Q1']*3 + ['Q2']*3 + ['Q3']*3 + ['Q4']*3 | |
| months = list(range(1,13)) | |
| quarter = quarters[months.index(month)] | |
| quarter = str(year) + '-' + quarter | |
| rpi = df_rpi[df_rpi.quarter==quarter]['index'] | |
| #print(rpi) | |
| #print(rpi.loc[4]) | |
| return float(rpi.iloc[0] if len(rpi) > 0 else 0) | |
| # Get scale factor from rpi | |
| def get_rpi_ym(year_month): | |
| print(year_month) | |
| y = int(year_month[0:4]) | |
| m = int(year_month[5:7]) | |
| rpi = get_rpi(y,m) | |
| return rpi if rpi > 0 else 0 | |
| def get_rpi_factor(year_month): | |
| print("getting..." + year_month) | |
| if year_month in uniques_month_: | |
| # within training period | |
| return 1 | |
| else: | |
| predicted_ym = uniques_month_[-1] | |
| predicted_ym_rpi = get_rpi_ym(predicted_ym) | |
| #print(predicted_ym_rpi) | |
| #print(get_rpi_ym(year_month)) | |
| #return 1 - (predicted_ym_rpi - get_rpi_ym(year_month) )/predicted_ym_rpi | |
| actual_ym_rpi = get_rpi_ym(year_month) | |
| print(actual_ym_rpi) | |
| return actual_ym_rpi/predicted_ym_rpi if actual_ym_rpi != 0 else 1 | |
| def update_inp_with_trained_cat(idx = 0): | |
| inp_ = df.iloc[idx] | |
| inp = inp_[inp_cols] | |
| updated = False | |
| skipped = False | |
| for c in inp.keys(): | |
| if (type(inp[c]) == str): | |
| if not inp[c] in uniques[c]: | |
| inp = inp.copy() | |
| updated = True | |
| if c == 'month_': | |
| lastdate = df.iloc[max(splits[0])].month | |
| inp[c] = lastdate | |
| else: | |
| inp[c] = '' | |
| skipped = True | |
| # add rpi factor | |
| inp['rpi_factor'] = get_rpi_factor(df.iloc[idx]['month_']) | |
| return inp_, inp, updated, skipped | |
| p=False | |
| price_scaler = 1 | |
| def predict(flat_model, storey_range, street_name, address, flat_type, town, month_, month_2, remaining_lease_, floor_area_sqm): | |
| price_scaler = 1 | |
| inp = {} | |
| inp['flat_model'] = flat_model | |
| inp['storey_range'] = storey_range | |
| inp['street_name'] = street_name | |
| inp['address'] = address | |
| inp['flat_type'] = flat_type | |
| inp['town'] = town | |
| inp['month_'] = month_+"-"+month_2 | |
| inp['remaining_lease_'] = float(remaining_lease_) | |
| inp['floor_area_sqm'] = float(floor_area_sqm) | |
| print(inp['month_']) | |
| price_scaler = inp['rpi_factor'] = get_rpi_factor(inp['month_']) | |
| # predict with the latest trained month | |
| if not inp['month_'] in uniques_month_: | |
| #price_scaler = 1.02 | |
| inp['month_'] = uniques_month_[-1] | |
| row, clas, probs = learn.predict(pd.Series(inp)) | |
| pred = restore_price(row['resale_price_'].iloc[0]) * price_scaler | |
| return int(pred), price_scaler | |
| inputs = [] | |
| for c in inp_cols: | |
| if c=='month_': | |
| c2='month_2' | |
| inputs += [gr.Dropdown(choices=list(uniques[c]), label='year', info='year of valuation', value=modes[c])] | |
| inputs += [gr.Dropdown(choices=list(uniques[c2]), label='month', info='month of valuation', value=modes[c2])] | |
| else: | |
| inputs += [gr.Dropdown(choices=list(uniques[c]), label=c, info=c, value=modes[c])] | |
| #[gr.Dropdown(choices=list(uniques["flat_model"]), label="flat_model", info="flat_model", value=modes["flat_model"])] | |
| gr.Interface(fn=predict, inputs=inputs, outputs=[gr.Number(label="Predicted Value (S$)"), gr.Number(label="This result is adjusted automatically by the following factor (derived from Resale Price Index 1990Q1-2024Q1))")], title="Deep Learning with Neural Network -- Enter the HDB info to predict the price\n(Trained with old data from 1997-01 to 2020-01 on purpose to test the model durability to predict future prices. Prediction nearer to the train data period is more accurate with mean-abs-error of ~S$20k.)").launch(share=True) | |