Spaces:
Sleeping
Sleeping
File size: 11,359 Bytes
a4bd17e 81613bb a4bd17e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 | import streamlit as st
import pandas as pd
import numpy as np
import xgboost
from PIL import Image
from joblib import load
# load all files
model = load('xgb_tuned_model.joblib')
transformer = load('transformer.joblib')
def run():
with st.form('from_website_data'):
# write short description about the model
st.write('''
# **IKN Property Prediction**
- The model used for this Regression is `XGBRegressor` Model which Hyperparameter have been tuned.
- This model achieved `88%` R² Train Score and `83%` R² Test Score.
''')
#Tambahkan gambar
image = Image.open('IKN_LOGO2.png')
st.image(image)
sertifikat_choice = {'SHM': 'SHM - Sertifikat Hak Milik', 'HGB': 'HGB - Hak Guna Bangunan', 'Lainnya': 'Lainnya (PPJB, Girik, Adat, dll)'}
lokasi_choice = {
'Balikpapan Selatan': 'Balikpapan Selatan, Balikpapan',
'Balikpapan Utara': 'Balikpapan Utara, Balikpapan',
'Balikpapan Tengah': 'Balikpapan Tengah, Balikpapan',
'Balikpapan Baru': 'Balikpapan Baru, Balikpapan',
'Balikpapan Timur': 'Balikpapan Timur, Balikpapan',
'Damai': 'Damai, Balikpapan',
'Gn. Samarinda': 'Gn. Samarinda, Balikpapan',
'Sepinggan': 'Sepinggan, Balikpapan',
'Sumber Rejo': 'Sumber Rejo, Balikpapan',
'Balikpapan Kota': 'Balikpapan Kota, Balikpapan',
'Marga Sari': 'Marga Sari, Balikpapan',
'Gn. Sari Ilir': 'Gn. Sari Ilir, Balikpapan',
'Manggar': 'Manggar, Balikpapan',
'Batakan': 'Batakan, Balikpapan',
'Gunung Bahagia': 'Gunung Bahagia, Balikpapan',
'Balikpapan Barat': 'Balikpapan Barat, Balikpapan',
'Karang Joang': 'Karang Joang, Balikpapan',
'Manggar Baru': 'Manggar Baru, Balikpapan',
'Karang Rejo': 'Karang Rejo, Balikpapan',
'Batu Ampar': 'Batu Ampar, Balikpapan',
'Telaga Sari': 'Telaga Sari, Balikpapan',
'Klandasan Ulu': 'Klandasan Ulu, Balikpapan',
'Klandasan Ilir': 'Klandasan Ilir, Balikpapan',
'Muara Rapak': 'Muara Rapak, Balikpapan',
'Kariangau': 'Kariangau, Balikpapan',
'Baru Tengah': 'Baru Tengah, Balikpapan',
'Lamaru': 'Lamaru, Balikpapan',
'Prapatan': 'Prapatan, Balikpapan',
'Teritip': 'Teritip, Balikpapan',
'Karang Jati': 'Karang Jati, Balikpapan'
}
sertifikat = st.selectbox("Pilih Sertifikat", options=list(sertifikat_choice.values()), help='The type of certificate of the house')
lokasi = st.selectbox("Piluh Loasi", options=list(lokasi_choice.values()), help='The location of the house')
kamar_tidur = st.number_input('Kamar Tidur', min_value=0, max_value=30, value=2, help='The number of bedrooms')
kamar_mandi = st.number_input('Kamar Mandi', min_value=0, max_value=120, value=2, help='The number of bathrooms')
luas_tanah = st.number_input('Luas Tanah', min_value=0, max_value=100000, value=300, help='The land area in square meters')
luas_bangunan = st.number_input('Luas Bangunan', min_value=0, max_value=100000, value=270, help='The building area in square meters')
daya = st.number_input('Daya Listrik', min_value=0, max_value=22000, value=1300, help='The power capacity of the house in watt')
#submit buttion
submitted = st.form_submit_button('Predict')
data_inf = {
'Sertifikat': sertifikat,
'Lokasi': lokasi,
'Kamar Tidur': kamar_tidur,
'Kamar Mandi': kamar_mandi,
'Luas Tanah': luas_tanah,
'Luas Bangunan': luas_bangunan,
'Daya Listrik': daya
}
data_inf = pd.DataFrame([data_inf])
# logic ketika user submit
if submitted:
# check for luas bangunan must be smaller than luas tanah
if luas_bangunan > luas_tanah:
st.warning('Luas Bangunan tidak boleh lebih besar dari Luas Tanah')
st.stop()
# show data_inf
st.dataframe(data_inf)
# scaling and encoding with transformer
data_inf_final = transformer.transform(data_inf)
# predict using model
y_pred_inf = model.predict(data_inf_final)
if y_pred_inf[0] > 1000:
y_pred_inf[0] = y_pred_inf[0] / 1000
final = round(y_pred_inf[0], 2)
st.markdown(f'<p style="color: green; text-align: center; font-size: 50px;">Predicted Price: {final:.2f} Milyar</p>', unsafe_allow_html=True)
# get random 10 listing form clean_data_fix.csv based on the price and show it to user both the image, hyperlink, and the price
st.write('**10 Random Listing**')
clean_data_fix = pd.read_csv('clean_data_fix.csv')
clean_data_fix = clean_data_fix[clean_data_fix['Harga'] > y_pred_inf[0] * 1000]
# filter 10 random listing with price around 10% of the predicted price both lower and upper
clean_data_fix = clean_data_fix[(clean_data_fix['Harga'] > y_pred_inf[0] * 1000 * 0.8) & (clean_data_fix['Harga'] < y_pred_inf[0] * 1000 * 1.2)]
# first get the key of the lokasi
for key, value in lokasi_choice.items():
if value == lokasi:
lokasi = key
break
# filter by same lokasi based on the key
clean_data_fix = clean_data_fix[clean_data_fix['Lokasi'] == lokasi]
# if 10 random listing is not enough from the same lokasi, then add the rest from random other lokasi
if clean_data_fix.shape[0] < 10:
# filter first by price range
clean_data_fix_additional = pd.read_csv('clean_data_fix.csv')
clean_data_fix_additional = clean_data_fix_additional[clean_data_fix_additional['Harga'] > y_pred_inf[0] * 1000]
clean_data_fix_additional = clean_data_fix_additional[(clean_data_fix_additional['Harga'] > y_pred_inf[0] * 1000 * 0.8) & (clean_data_fix_additional['Harga'] < y_pred_inf[0] * 1000 * 1.2)]
# filter by different lokasi
clean_data_fix_additional = clean_data_fix_additional[clean_data_fix_additional['Lokasi'] != lokasi]
# get the rest of the 10 random listing
if clean_data_fix_additional.shape[0] > 10 - clean_data_fix.shape[0]:
clean_data_fix_additional = clean_data_fix_additional.sample(10 - clean_data_fix.shape[0])
# combine the 10 random listing from the same lokasi and the rest from different lokasi
clean_data_fix = pd.concat([clean_data_fix, clean_data_fix_additional])
else:
clean_data_fix = clean_data_fix.sample(10)
# if there is no listing, show warning to user that cant find example in the price range
if clean_data_fix.shape[0] == 0:
st.warning('Tidak ada listing yang mirip dengan range harga')
st.stop()
# Create a list of tabs
tabs = st.tabs([f'Listing {i+1}' for i in range(10)])
for i in range(10):
# Define the content for each tab
with tabs[i]:
st.image(clean_data_fix.iloc[i]["Img_Hyperlink"], width=None)
st.write(f'**Price:** {clean_data_fix.iloc[i]["Harga"]/1000} Milyar')
st.write(f'**Location:** {clean_data_fix.iloc[i]["Lokasi"]}')
st.markdown(f'**Link:** [Click here]({clean_data_fix.iloc[i]["Hyperlink"]})')
else:
final = round(y_pred_inf[0], 3)
st.markdown(f'<p style="color: green; text-align: center; font-size: 50px;">Predicted Price: {final:.4f} Juta</p>', unsafe_allow_html=True)
# Get random 10 listings from clean_data_fix.csv based on the price and show them to the user, including image, hyperlink, and price
st.write('**10 Random Listing**')
clean_data_fix = pd.read_csv('clean_data_fix.csv')
clean_data_fix = clean_data_fix[clean_data_fix['Harga'] > y_pred_inf[0]]
# Filter 10 random listings with a price around 10% of the predicted price both lower and upper
clean_data_fix = clean_data_fix[(clean_data_fix['Harga'] > y_pred_inf[0] * 0.8) & (clean_data_fix['Harga'] < y_pred_inf[0] * 1.2)]
# If there are no listings, show a warning to the user that there are no listings in the price range
if clean_data_fix.shape[0] == 0:
st.warning('Tidak ada listing yang mirip dengan range harga')
st.stop()
# first get the key of the lokasi
for key, value in lokasi_choice.items():
if value == lokasi:
lokasi = key
break
# Filter by the same location
clean_data_fix_same_loc = clean_data_fix[clean_data_fix['Lokasi'] == lokasi]
# If there are not enough listings with the same location, get additional listings from different locations
if clean_data_fix_same_loc.shape[0] < 10:
# Filter first by price range
clean_data_fix_additional = pd.read_csv('clean_data_fix.csv')
clean_data_fix_additional = clean_data_fix_additional[clean_data_fix_additional['Harga'] > y_pred_inf[0]]
clean_data_fix_additional = clean_data_fix_additional[(clean_data_fix_additional['Harga'] > y_pred_inf[0] * 0.8) & (clean_data_fix_additional['Harga'] < y_pred_inf[0] * 1.2)]
# Filter by different location
clean_data_fix_additional = clean_data_fix_additional[clean_data_fix_additional['Lokasi'] != lokasi]
# Get the rest of the 10 random listings
if clean_data_fix_additional.shape[0] > 10 - clean_data_fix_same_loc.shape[0]:
clean_data_fix_additional = clean_data_fix_additional.sample(10 - clean_data_fix_same_loc.shape[0])
# Combine the 10 random listings from the same location and the rest from different locations
clean_data_fix = pd.concat([clean_data_fix_same_loc, clean_data_fix_additional])
else:
clean_data_fix = clean_data_fix_same_loc.sample(10)
# Display the listings using a tab structure
tabs = st.tabs([f'Listing {i+1}' for i in range(10)])
for i in range(10):
# Define the content for each tab
with tabs[i]:
st.image(clean_data_fix.iloc[i]["Img_Hyperlink"], use_column_width=True)
st.write(f'**Price:** {clean_data_fix.iloc[i]["Harga"]} Juta')
st.markdown(f'**Location:** {clean_data_fix.iloc[i]["Lokasi"]}')
st.markdown(f'**Link:** [Click here]({clean_data_fix.iloc[i]["Hyperlink"]})')
if __name__ == '__main__':
app() |