import streamlit as st import pandas as pd import numpy as np import xgboost from PIL import Image from joblib import load # load all files model = load('xgb_tuned_model.joblib') transformer = load('transformer.joblib') def run(): with st.form('from_website_data'): # write short description about the model st.write(''' # **IKN Property Prediction** - The model used for this Regression is `XGBRegressor` Model which Hyperparameter have been tuned. - This model achieved `88%` R² Train Score and `83%` R² Test Score. ''') #Tambahkan gambar image = Image.open('IKN_LOGO2.png') st.image(image) sertifikat_choice = {'SHM': 'SHM - Sertifikat Hak Milik', 'HGB': 'HGB - Hak Guna Bangunan', 'Lainnya': 'Lainnya (PPJB, Girik, Adat, dll)'} lokasi_choice = { 'Balikpapan Selatan': 'Balikpapan Selatan, Balikpapan', 'Balikpapan Utara': 'Balikpapan Utara, Balikpapan', 'Balikpapan Tengah': 'Balikpapan Tengah, Balikpapan', 'Balikpapan Baru': 'Balikpapan Baru, Balikpapan', 'Balikpapan Timur': 'Balikpapan Timur, Balikpapan', 'Damai': 'Damai, Balikpapan', 'Gn. Samarinda': 'Gn. Samarinda, Balikpapan', 'Sepinggan': 'Sepinggan, Balikpapan', 'Sumber Rejo': 'Sumber Rejo, Balikpapan', 'Balikpapan Kota': 'Balikpapan Kota, Balikpapan', 'Marga Sari': 'Marga Sari, Balikpapan', 'Gn. Sari Ilir': 'Gn. Sari Ilir, Balikpapan', 'Manggar': 'Manggar, Balikpapan', 'Batakan': 'Batakan, Balikpapan', 'Gunung Bahagia': 'Gunung Bahagia, Balikpapan', 'Balikpapan Barat': 'Balikpapan Barat, Balikpapan', 'Karang Joang': 'Karang Joang, Balikpapan', 'Manggar Baru': 'Manggar Baru, Balikpapan', 'Karang Rejo': 'Karang Rejo, Balikpapan', 'Batu Ampar': 'Batu Ampar, Balikpapan', 'Telaga Sari': 'Telaga Sari, Balikpapan', 'Klandasan Ulu': 'Klandasan Ulu, Balikpapan', 'Klandasan Ilir': 'Klandasan Ilir, Balikpapan', 'Muara Rapak': 'Muara Rapak, Balikpapan', 'Kariangau': 'Kariangau, Balikpapan', 'Baru Tengah': 'Baru Tengah, Balikpapan', 'Lamaru': 'Lamaru, Balikpapan', 'Prapatan': 'Prapatan, Balikpapan', 'Teritip': 'Teritip, Balikpapan', 'Karang Jati': 'Karang Jati, Balikpapan' } sertifikat = st.selectbox("Pilih Sertifikat", options=list(sertifikat_choice.values()), help='The type of certificate of the house') lokasi = st.selectbox("Piluh Loasi", options=list(lokasi_choice.values()), help='The location of the house') kamar_tidur = st.number_input('Kamar Tidur', min_value=0, max_value=30, value=2, help='The number of bedrooms') kamar_mandi = st.number_input('Kamar Mandi', min_value=0, max_value=120, value=2, help='The number of bathrooms') luas_tanah = st.number_input('Luas Tanah', min_value=0, max_value=100000, value=300, help='The land area in square meters') luas_bangunan = st.number_input('Luas Bangunan', min_value=0, max_value=100000, value=270, help='The building area in square meters') daya = st.number_input('Daya Listrik', min_value=0, max_value=22000, value=1300, help='The power capacity of the house in watt') #submit buttion submitted = st.form_submit_button('Predict') data_inf = { 'Sertifikat': sertifikat, 'Lokasi': lokasi, 'Kamar Tidur': kamar_tidur, 'Kamar Mandi': kamar_mandi, 'Luas Tanah': luas_tanah, 'Luas Bangunan': luas_bangunan, 'Daya Listrik': daya } data_inf = pd.DataFrame([data_inf]) # logic ketika user submit if submitted: # check for luas bangunan must be smaller than luas tanah if luas_bangunan > luas_tanah: st.warning('Luas Bangunan tidak boleh lebih besar dari Luas Tanah') st.stop() # show data_inf st.dataframe(data_inf) # scaling and encoding with transformer data_inf_final = transformer.transform(data_inf) # predict using model y_pred_inf = model.predict(data_inf_final) if y_pred_inf[0] > 1000: y_pred_inf[0] = y_pred_inf[0] / 1000 final = round(y_pred_inf[0], 2) st.markdown(f'
Predicted Price: {final:.2f} Milyar
', unsafe_allow_html=True) # get random 10 listing form clean_data_fix.csv based on the price and show it to user both the image, hyperlink, and the price st.write('**10 Random Listing**') clean_data_fix = pd.read_csv('clean_data_fix.csv') clean_data_fix = clean_data_fix[clean_data_fix['Harga'] > y_pred_inf[0] * 1000] # filter 10 random listing with price around 10% of the predicted price both lower and upper clean_data_fix = clean_data_fix[(clean_data_fix['Harga'] > y_pred_inf[0] * 1000 * 0.8) & (clean_data_fix['Harga'] < y_pred_inf[0] * 1000 * 1.2)] # first get the key of the lokasi for key, value in lokasi_choice.items(): if value == lokasi: lokasi = key break # filter by same lokasi based on the key clean_data_fix = clean_data_fix[clean_data_fix['Lokasi'] == lokasi] # if 10 random listing is not enough from the same lokasi, then add the rest from random other lokasi if clean_data_fix.shape[0] < 10: # filter first by price range clean_data_fix_additional = pd.read_csv('clean_data_fix.csv') clean_data_fix_additional = clean_data_fix_additional[clean_data_fix_additional['Harga'] > y_pred_inf[0] * 1000] clean_data_fix_additional = clean_data_fix_additional[(clean_data_fix_additional['Harga'] > y_pred_inf[0] * 1000 * 0.8) & (clean_data_fix_additional['Harga'] < y_pred_inf[0] * 1000 * 1.2)] # filter by different lokasi clean_data_fix_additional = clean_data_fix_additional[clean_data_fix_additional['Lokasi'] != lokasi] # get the rest of the 10 random listing if clean_data_fix_additional.shape[0] > 10 - clean_data_fix.shape[0]: clean_data_fix_additional = clean_data_fix_additional.sample(10 - clean_data_fix.shape[0]) # combine the 10 random listing from the same lokasi and the rest from different lokasi clean_data_fix = pd.concat([clean_data_fix, clean_data_fix_additional]) else: clean_data_fix = clean_data_fix.sample(10) # if there is no listing, show warning to user that cant find example in the price range if clean_data_fix.shape[0] == 0: st.warning('Tidak ada listing yang mirip dengan range harga') st.stop() # Create a list of tabs tabs = st.tabs([f'Listing {i+1}' for i in range(10)]) for i in range(10): # Define the content for each tab with tabs[i]: st.image(clean_data_fix.iloc[i]["Img_Hyperlink"], width=None) st.write(f'**Price:** {clean_data_fix.iloc[i]["Harga"]/1000} Milyar') st.write(f'**Location:** {clean_data_fix.iloc[i]["Lokasi"]}') st.markdown(f'**Link:** [Click here]({clean_data_fix.iloc[i]["Hyperlink"]})') else: final = round(y_pred_inf[0], 3) st.markdown(f'Predicted Price: {final:.4f} Juta
', unsafe_allow_html=True) # Get random 10 listings from clean_data_fix.csv based on the price and show them to the user, including image, hyperlink, and price st.write('**10 Random Listing**') clean_data_fix = pd.read_csv('clean_data_fix.csv') clean_data_fix = clean_data_fix[clean_data_fix['Harga'] > y_pred_inf[0]] # Filter 10 random listings with a price around 10% of the predicted price both lower and upper clean_data_fix = clean_data_fix[(clean_data_fix['Harga'] > y_pred_inf[0] * 0.8) & (clean_data_fix['Harga'] < y_pred_inf[0] * 1.2)] # If there are no listings, show a warning to the user that there are no listings in the price range if clean_data_fix.shape[0] == 0: st.warning('Tidak ada listing yang mirip dengan range harga') st.stop() # first get the key of the lokasi for key, value in lokasi_choice.items(): if value == lokasi: lokasi = key break # Filter by the same location clean_data_fix_same_loc = clean_data_fix[clean_data_fix['Lokasi'] == lokasi] # If there are not enough listings with the same location, get additional listings from different locations if clean_data_fix_same_loc.shape[0] < 10: # Filter first by price range clean_data_fix_additional = pd.read_csv('clean_data_fix.csv') clean_data_fix_additional = clean_data_fix_additional[clean_data_fix_additional['Harga'] > y_pred_inf[0]] clean_data_fix_additional = clean_data_fix_additional[(clean_data_fix_additional['Harga'] > y_pred_inf[0] * 0.8) & (clean_data_fix_additional['Harga'] < y_pred_inf[0] * 1.2)] # Filter by different location clean_data_fix_additional = clean_data_fix_additional[clean_data_fix_additional['Lokasi'] != lokasi] # Get the rest of the 10 random listings if clean_data_fix_additional.shape[0] > 10 - clean_data_fix_same_loc.shape[0]: clean_data_fix_additional = clean_data_fix_additional.sample(10 - clean_data_fix_same_loc.shape[0]) # Combine the 10 random listings from the same location and the rest from different locations clean_data_fix = pd.concat([clean_data_fix_same_loc, clean_data_fix_additional]) else: clean_data_fix = clean_data_fix_same_loc.sample(10) # Display the listings using a tab structure tabs = st.tabs([f'Listing {i+1}' for i in range(10)]) for i in range(10): # Define the content for each tab with tabs[i]: st.image(clean_data_fix.iloc[i]["Img_Hyperlink"], use_column_width=True) st.write(f'**Price:** {clean_data_fix.iloc[i]["Harga"]} Juta') st.markdown(f'**Location:** {clean_data_fix.iloc[i]["Lokasi"]}') st.markdown(f'**Link:** [Click here]({clean_data_fix.iloc[i]["Hyperlink"]})') if __name__ == '__main__': app()