xyncz's picture
Upload 22 files
81613bb verified
import streamlit as st
import pandas as pd
import numpy as np
import xgboost
from PIL import Image
from joblib import load
# load all files
model = load('xgb_tuned_model.joblib')
transformer = load('transformer.joblib')
def run():
with st.form('from_website_data'):
# write short description about the model
st.write('''
# **IKN Property Prediction**
- The model used for this Regression is `XGBRegressor` Model which Hyperparameter have been tuned.
- This model achieved `88%` R² Train Score and `83%` R² Test Score.
''')
#Tambahkan gambar
image = Image.open('IKN_LOGO2.png')
st.image(image)
sertifikat_choice = {'SHM': 'SHM - Sertifikat Hak Milik', 'HGB': 'HGB - Hak Guna Bangunan', 'Lainnya': 'Lainnya (PPJB, Girik, Adat, dll)'}
lokasi_choice = {
'Balikpapan Selatan': 'Balikpapan Selatan, Balikpapan',
'Balikpapan Utara': 'Balikpapan Utara, Balikpapan',
'Balikpapan Tengah': 'Balikpapan Tengah, Balikpapan',
'Balikpapan Baru': 'Balikpapan Baru, Balikpapan',
'Balikpapan Timur': 'Balikpapan Timur, Balikpapan',
'Damai': 'Damai, Balikpapan',
'Gn. Samarinda': 'Gn. Samarinda, Balikpapan',
'Sepinggan': 'Sepinggan, Balikpapan',
'Sumber Rejo': 'Sumber Rejo, Balikpapan',
'Balikpapan Kota': 'Balikpapan Kota, Balikpapan',
'Marga Sari': 'Marga Sari, Balikpapan',
'Gn. Sari Ilir': 'Gn. Sari Ilir, Balikpapan',
'Manggar': 'Manggar, Balikpapan',
'Batakan': 'Batakan, Balikpapan',
'Gunung Bahagia': 'Gunung Bahagia, Balikpapan',
'Balikpapan Barat': 'Balikpapan Barat, Balikpapan',
'Karang Joang': 'Karang Joang, Balikpapan',
'Manggar Baru': 'Manggar Baru, Balikpapan',
'Karang Rejo': 'Karang Rejo, Balikpapan',
'Batu Ampar': 'Batu Ampar, Balikpapan',
'Telaga Sari': 'Telaga Sari, Balikpapan',
'Klandasan Ulu': 'Klandasan Ulu, Balikpapan',
'Klandasan Ilir': 'Klandasan Ilir, Balikpapan',
'Muara Rapak': 'Muara Rapak, Balikpapan',
'Kariangau': 'Kariangau, Balikpapan',
'Baru Tengah': 'Baru Tengah, Balikpapan',
'Lamaru': 'Lamaru, Balikpapan',
'Prapatan': 'Prapatan, Balikpapan',
'Teritip': 'Teritip, Balikpapan',
'Karang Jati': 'Karang Jati, Balikpapan'
}
sertifikat = st.selectbox("Pilih Sertifikat", options=list(sertifikat_choice.values()), help='The type of certificate of the house')
lokasi = st.selectbox("Piluh Loasi", options=list(lokasi_choice.values()), help='The location of the house')
kamar_tidur = st.number_input('Kamar Tidur', min_value=0, max_value=30, value=2, help='The number of bedrooms')
kamar_mandi = st.number_input('Kamar Mandi', min_value=0, max_value=120, value=2, help='The number of bathrooms')
luas_tanah = st.number_input('Luas Tanah', min_value=0, max_value=100000, value=300, help='The land area in square meters')
luas_bangunan = st.number_input('Luas Bangunan', min_value=0, max_value=100000, value=270, help='The building area in square meters')
daya = st.number_input('Daya Listrik', min_value=0, max_value=22000, value=1300, help='The power capacity of the house in watt')
#submit buttion
submitted = st.form_submit_button('Predict')
data_inf = {
'Sertifikat': sertifikat,
'Lokasi': lokasi,
'Kamar Tidur': kamar_tidur,
'Kamar Mandi': kamar_mandi,
'Luas Tanah': luas_tanah,
'Luas Bangunan': luas_bangunan,
'Daya Listrik': daya
}
data_inf = pd.DataFrame([data_inf])
# logic ketika user submit
if submitted:
# check for luas bangunan must be smaller than luas tanah
if luas_bangunan > luas_tanah:
st.warning('Luas Bangunan tidak boleh lebih besar dari Luas Tanah')
st.stop()
# show data_inf
st.dataframe(data_inf)
# scaling and encoding with transformer
data_inf_final = transformer.transform(data_inf)
# predict using model
y_pred_inf = model.predict(data_inf_final)
if y_pred_inf[0] > 1000:
y_pred_inf[0] = y_pred_inf[0] / 1000
final = round(y_pred_inf[0], 2)
st.markdown(f'<p style="color: green; text-align: center; font-size: 50px;">Predicted Price: {final:.2f} Milyar</p>', unsafe_allow_html=True)
# get random 10 listing form clean_data_fix.csv based on the price and show it to user both the image, hyperlink, and the price
st.write('**10 Random Listing**')
clean_data_fix = pd.read_csv('clean_data_fix.csv')
clean_data_fix = clean_data_fix[clean_data_fix['Harga'] > y_pred_inf[0] * 1000]
# filter 10 random listing with price around 10% of the predicted price both lower and upper
clean_data_fix = clean_data_fix[(clean_data_fix['Harga'] > y_pred_inf[0] * 1000 * 0.8) & (clean_data_fix['Harga'] < y_pred_inf[0] * 1000 * 1.2)]
# first get the key of the lokasi
for key, value in lokasi_choice.items():
if value == lokasi:
lokasi = key
break
# filter by same lokasi based on the key
clean_data_fix = clean_data_fix[clean_data_fix['Lokasi'] == lokasi]
# if 10 random listing is not enough from the same lokasi, then add the rest from random other lokasi
if clean_data_fix.shape[0] < 10:
# filter first by price range
clean_data_fix_additional = pd.read_csv('clean_data_fix.csv')
clean_data_fix_additional = clean_data_fix_additional[clean_data_fix_additional['Harga'] > y_pred_inf[0] * 1000]
clean_data_fix_additional = clean_data_fix_additional[(clean_data_fix_additional['Harga'] > y_pred_inf[0] * 1000 * 0.8) & (clean_data_fix_additional['Harga'] < y_pred_inf[0] * 1000 * 1.2)]
# filter by different lokasi
clean_data_fix_additional = clean_data_fix_additional[clean_data_fix_additional['Lokasi'] != lokasi]
# get the rest of the 10 random listing
if clean_data_fix_additional.shape[0] > 10 - clean_data_fix.shape[0]:
clean_data_fix_additional = clean_data_fix_additional.sample(10 - clean_data_fix.shape[0])
# combine the 10 random listing from the same lokasi and the rest from different lokasi
clean_data_fix = pd.concat([clean_data_fix, clean_data_fix_additional])
else:
clean_data_fix = clean_data_fix.sample(10)
# if there is no listing, show warning to user that cant find example in the price range
if clean_data_fix.shape[0] == 0:
st.warning('Tidak ada listing yang mirip dengan range harga')
st.stop()
# Create a list of tabs
tabs = st.tabs([f'Listing {i+1}' for i in range(10)])
for i in range(10):
# Define the content for each tab
with tabs[i]:
st.image(clean_data_fix.iloc[i]["Img_Hyperlink"], width=None)
st.write(f'**Price:** {clean_data_fix.iloc[i]["Harga"]/1000} Milyar')
st.write(f'**Location:** {clean_data_fix.iloc[i]["Lokasi"]}')
st.markdown(f'**Link:** [Click here]({clean_data_fix.iloc[i]["Hyperlink"]})')
else:
final = round(y_pred_inf[0], 3)
st.markdown(f'<p style="color: green; text-align: center; font-size: 50px;">Predicted Price: {final:.4f} Juta</p>', unsafe_allow_html=True)
# Get random 10 listings from clean_data_fix.csv based on the price and show them to the user, including image, hyperlink, and price
st.write('**10 Random Listing**')
clean_data_fix = pd.read_csv('clean_data_fix.csv')
clean_data_fix = clean_data_fix[clean_data_fix['Harga'] > y_pred_inf[0]]
# Filter 10 random listings with a price around 10% of the predicted price both lower and upper
clean_data_fix = clean_data_fix[(clean_data_fix['Harga'] > y_pred_inf[0] * 0.8) & (clean_data_fix['Harga'] < y_pred_inf[0] * 1.2)]
# If there are no listings, show a warning to the user that there are no listings in the price range
if clean_data_fix.shape[0] == 0:
st.warning('Tidak ada listing yang mirip dengan range harga')
st.stop()
# first get the key of the lokasi
for key, value in lokasi_choice.items():
if value == lokasi:
lokasi = key
break
# Filter by the same location
clean_data_fix_same_loc = clean_data_fix[clean_data_fix['Lokasi'] == lokasi]
# If there are not enough listings with the same location, get additional listings from different locations
if clean_data_fix_same_loc.shape[0] < 10:
# Filter first by price range
clean_data_fix_additional = pd.read_csv('clean_data_fix.csv')
clean_data_fix_additional = clean_data_fix_additional[clean_data_fix_additional['Harga'] > y_pred_inf[0]]
clean_data_fix_additional = clean_data_fix_additional[(clean_data_fix_additional['Harga'] > y_pred_inf[0] * 0.8) & (clean_data_fix_additional['Harga'] < y_pred_inf[0] * 1.2)]
# Filter by different location
clean_data_fix_additional = clean_data_fix_additional[clean_data_fix_additional['Lokasi'] != lokasi]
# Get the rest of the 10 random listings
if clean_data_fix_additional.shape[0] > 10 - clean_data_fix_same_loc.shape[0]:
clean_data_fix_additional = clean_data_fix_additional.sample(10 - clean_data_fix_same_loc.shape[0])
# Combine the 10 random listings from the same location and the rest from different locations
clean_data_fix = pd.concat([clean_data_fix_same_loc, clean_data_fix_additional])
else:
clean_data_fix = clean_data_fix_same_loc.sample(10)
# Display the listings using a tab structure
tabs = st.tabs([f'Listing {i+1}' for i in range(10)])
for i in range(10):
# Define the content for each tab
with tabs[i]:
st.image(clean_data_fix.iloc[i]["Img_Hyperlink"], use_column_width=True)
st.write(f'**Price:** {clean_data_fix.iloc[i]["Harga"]} Juta')
st.markdown(f'**Location:** {clean_data_fix.iloc[i]["Lokasi"]}')
st.markdown(f'**Link:** [Click here]({clean_data_fix.iloc[i]["Hyperlink"]})')
if __name__ == '__main__':
app()