Spaces:

xyncz
/

IKN-Price-Prediction

Sleeping

App Files Files Community

IKN-Price-Prediction / model.py

xyncz

Upload 22 files

81613bb verified about 2 years ago

raw

history blame contribute delete

11.4 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import xgboost
	from PIL import Image
	from joblib import load

	# load all files
	model = load('xgb_tuned_model.joblib')
	transformer = load('transformer.joblib')

	def run():
	with st.form('from_website_data'):
	# write short description about the model
	st.write('''
	# IKN Property Prediction
	- The model used for this Regression is `XGBRegressor` Model which Hyperparameter have been tuned.
	- This model achieved `88%` R² Train Score and `83%` R² Test Score.
	''')

	#Tambahkan gambar
	image = Image.open('IKN_LOGO2.png')
	st.image(image)

	sertifikat_choice = {'SHM': 'SHM - Sertifikat Hak Milik', 'HGB': 'HGB - Hak Guna Bangunan', 'Lainnya': 'Lainnya (PPJB, Girik, Adat, dll)'}

	lokasi_choice = {
	'Balikpapan Selatan': 'Balikpapan Selatan, Balikpapan',
	'Balikpapan Utara': 'Balikpapan Utara, Balikpapan',
	'Balikpapan Tengah': 'Balikpapan Tengah, Balikpapan',
	'Balikpapan Baru': 'Balikpapan Baru, Balikpapan',
	'Balikpapan Timur': 'Balikpapan Timur, Balikpapan',
	'Damai': 'Damai, Balikpapan',
	'Gn. Samarinda': 'Gn. Samarinda, Balikpapan',
	'Sepinggan': 'Sepinggan, Balikpapan',
	'Sumber Rejo': 'Sumber Rejo, Balikpapan',
	'Balikpapan Kota': 'Balikpapan Kota, Balikpapan',
	'Marga Sari': 'Marga Sari, Balikpapan',
	'Gn. Sari Ilir': 'Gn. Sari Ilir, Balikpapan',
	'Manggar': 'Manggar, Balikpapan',
	'Batakan': 'Batakan, Balikpapan',
	'Gunung Bahagia': 'Gunung Bahagia, Balikpapan',
	'Balikpapan Barat': 'Balikpapan Barat, Balikpapan',
	'Karang Joang': 'Karang Joang, Balikpapan',
	'Manggar Baru': 'Manggar Baru, Balikpapan',
	'Karang Rejo': 'Karang Rejo, Balikpapan',
	'Batu Ampar': 'Batu Ampar, Balikpapan',
	'Telaga Sari': 'Telaga Sari, Balikpapan',
	'Klandasan Ulu': 'Klandasan Ulu, Balikpapan',
	'Klandasan Ilir': 'Klandasan Ilir, Balikpapan',
	'Muara Rapak': 'Muara Rapak, Balikpapan',
	'Kariangau': 'Kariangau, Balikpapan',
	'Baru Tengah': 'Baru Tengah, Balikpapan',
	'Lamaru': 'Lamaru, Balikpapan',
	'Prapatan': 'Prapatan, Balikpapan',
	'Teritip': 'Teritip, Balikpapan',
	'Karang Jati': 'Karang Jati, Balikpapan'
	}


	sertifikat = st.selectbox("Pilih Sertifikat", options=list(sertifikat_choice.values()), help='The type of certificate of the house')

	lokasi = st.selectbox("Piluh Loasi", options=list(lokasi_choice.values()), help='The location of the house')

	kamar_tidur = st.number_input('Kamar Tidur', min_value=0, max_value=30, value=2, help='The number of bedrooms')

	kamar_mandi = st.number_input('Kamar Mandi', min_value=0, max_value=120, value=2, help='The number of bathrooms')

	luas_tanah = st.number_input('Luas Tanah', min_value=0, max_value=100000, value=300, help='The land area in square meters')

	luas_bangunan = st.number_input('Luas Bangunan', min_value=0, max_value=100000, value=270, help='The building area in square meters')

	daya = st.number_input('Daya Listrik', min_value=0, max_value=22000, value=1300, help='The power capacity of the house in watt')

	#submit buttion
	submitted = st.form_submit_button('Predict')


	data_inf = {
	'Sertifikat': sertifikat,
	'Lokasi': lokasi,
	'Kamar Tidur': kamar_tidur,
	'Kamar Mandi': kamar_mandi,
	'Luas Tanah': luas_tanah,
	'Luas Bangunan': luas_bangunan,
	'Daya Listrik': daya
	}

	data_inf = pd.DataFrame([data_inf])

	# logic ketika user submit
	if submitted:

	# check for luas bangunan must be smaller than luas tanah
	if luas_bangunan > luas_tanah:
	st.warning('Luas Bangunan tidak boleh lebih besar dari Luas Tanah')
	st.stop()

	# show data_inf
	st.dataframe(data_inf)

	# scaling and encoding with transformer
	data_inf_final = transformer.transform(data_inf)

	# predict using model
	y_pred_inf = model.predict(data_inf_final)

	if y_pred_inf[0] > 1000:
	y_pred_inf[0] = y_pred_inf[0] / 1000
	final = round(y_pred_inf[0], 2)
	st.markdown(f'<p style="color: green; text-align: center; font-size: 50px;">Predicted Price: {final:.2f} Milyar</p>', unsafe_allow_html=True)

	# get random 10 listing form clean_data_fix.csv based on the price and show it to user both the image, hyperlink, and the price
	st.write('10 Random Listing')
	clean_data_fix = pd.read_csv('clean_data_fix.csv')
	clean_data_fix = clean_data_fix[clean_data_fix['Harga'] > y_pred_inf[0] * 1000]

	# filter 10 random listing with price around 10% of the predicted price both lower and upper
	clean_data_fix = clean_data_fix[(clean_data_fix['Harga'] > y_pred_inf[0] * 1000 * 0.8) & (clean_data_fix['Harga'] < y_pred_inf[0] * 1000 * 1.2)]

	# first get the key of the lokasi
	for key, value in lokasi_choice.items():
	if value == lokasi:
	lokasi = key
	break

	# filter by same lokasi based on the key
	clean_data_fix = clean_data_fix[clean_data_fix['Lokasi'] == lokasi]

	# if 10 random listing is not enough from the same lokasi, then add the rest from random other lokasi
	if clean_data_fix.shape[0] < 10:
	# filter first by price range
	clean_data_fix_additional = pd.read_csv('clean_data_fix.csv')
	clean_data_fix_additional = clean_data_fix_additional[clean_data_fix_additional['Harga'] > y_pred_inf[0] * 1000]
	clean_data_fix_additional = clean_data_fix_additional[(clean_data_fix_additional['Harga'] > y_pred_inf[0] * 1000 * 0.8) & (clean_data_fix_additional['Harga'] < y_pred_inf[0] * 1000 * 1.2)]
	# filter by different lokasi
	clean_data_fix_additional = clean_data_fix_additional[clean_data_fix_additional['Lokasi'] != lokasi]
	# get the rest of the 10 random listing
	if clean_data_fix_additional.shape[0] > 10 - clean_data_fix.shape[0]:
	clean_data_fix_additional = clean_data_fix_additional.sample(10 - clean_data_fix.shape[0])
	# combine the 10 random listing from the same lokasi and the rest from different lokasi
	clean_data_fix = pd.concat([clean_data_fix, clean_data_fix_additional])
	else:
	clean_data_fix = clean_data_fix.sample(10)

	# if there is no listing, show warning to user that cant find example in the price range
	if clean_data_fix.shape[0] == 0:
	st.warning('Tidak ada listing yang mirip dengan range harga')
	st.stop()

	# Create a list of tabs
	tabs = st.tabs([f'Listing {i+1}' for i in range(10)])

	for i in range(10):
	# Define the content for each tab
	with tabs[i]:
	st.image(clean_data_fix.iloc[i]["Img_Hyperlink"], width=None)
	st.write(f'Price: {clean_data_fix.iloc[i]["Harga"]/1000} Milyar')
	st.write(f'Location: {clean_data_fix.iloc[i]["Lokasi"]}')
	st.markdown(f'Link: [Click here]({clean_data_fix.iloc[i]["Hyperlink"]})')
	else:
	final = round(y_pred_inf[0], 3)
	st.markdown(f'<p style="color: green; text-align: center; font-size: 50px;">Predicted Price: {final:.4f} Juta</p>', unsafe_allow_html=True)

	# Get random 10 listings from clean_data_fix.csv based on the price and show them to the user, including image, hyperlink, and price
	st.write('10 Random Listing')
	clean_data_fix = pd.read_csv('clean_data_fix.csv')
	clean_data_fix = clean_data_fix[clean_data_fix['Harga'] > y_pred_inf[0]]

	# Filter 10 random listings with a price around 10% of the predicted price both lower and upper
	clean_data_fix = clean_data_fix[(clean_data_fix['Harga'] > y_pred_inf[0] * 0.8) & (clean_data_fix['Harga'] < y_pred_inf[0] * 1.2)]

	# If there are no listings, show a warning to the user that there are no listings in the price range
	if clean_data_fix.shape[0] == 0:
	st.warning('Tidak ada listing yang mirip dengan range harga')
	st.stop()

	# first get the key of the lokasi
	for key, value in lokasi_choice.items():
	if value == lokasi:
	lokasi = key
	break

	# Filter by the same location
	clean_data_fix_same_loc = clean_data_fix[clean_data_fix['Lokasi'] == lokasi]

	# If there are not enough listings with the same location, get additional listings from different locations
	if clean_data_fix_same_loc.shape[0] < 10:
	# Filter first by price range
	clean_data_fix_additional = pd.read_csv('clean_data_fix.csv')
	clean_data_fix_additional = clean_data_fix_additional[clean_data_fix_additional['Harga'] > y_pred_inf[0]]
	clean_data_fix_additional = clean_data_fix_additional[(clean_data_fix_additional['Harga'] > y_pred_inf[0] * 0.8) & (clean_data_fix_additional['Harga'] < y_pred_inf[0] * 1.2)]

	# Filter by different location
	clean_data_fix_additional = clean_data_fix_additional[clean_data_fix_additional['Lokasi'] != lokasi]

	# Get the rest of the 10 random listings
	if clean_data_fix_additional.shape[0] > 10 - clean_data_fix_same_loc.shape[0]:
	clean_data_fix_additional = clean_data_fix_additional.sample(10 - clean_data_fix_same_loc.shape[0])

	# Combine the 10 random listings from the same location and the rest from different locations
	clean_data_fix = pd.concat([clean_data_fix_same_loc, clean_data_fix_additional])
	else:
	clean_data_fix = clean_data_fix_same_loc.sample(10)

	# Display the listings using a tab structure
	tabs = st.tabs([f'Listing {i+1}' for i in range(10)])

	for i in range(10):
	# Define the content for each tab
	with tabs[i]:
	st.image(clean_data_fix.iloc[i]["Img_Hyperlink"], use_column_width=True)
	st.write(f'Price: {clean_data_fix.iloc[i]["Harga"]} Juta')
	st.markdown(f'Location: {clean_data_fix.iloc[i]["Lokasi"]}')
	st.markdown(f'Link: [Click here]({clean_data_fix.iloc[i]["Hyperlink"]})')

	if __name__ == '__main__':
	app()