Spaces:

lindritdev
/

apartment

Sleeping

App Files Files Community

apartment / app.py

lindritdev

Update app.py

64f32e8 verified 10 months ago

raw

history blame contribute delete

5.94 kB

	import gradio as gr
	import numpy as np
	import pandas as pd
	import pickle

	# -------------------------
	# Load the trained model (which was trained with crime_rate as a feature)
	# -------------------------
	model_filename = "random_forest_regression_new.pkl"
	with open(model_filename, 'rb') as f:
	random_forest_model = pickle.load(f)

	print('Number of features:', random_forest_model.n_features_in_)
	print('Features are:', ['rooms', 'area', 'pop', 'pop_dens', 'frg_pct', 'emp', 'tax_income', 'luxurious', 'crime_rate'])

	# -------------------------
	# Load and prepare municipality data
	# -------------------------
	df_bfs_data = pd.read_csv('bfs_municipality_and_tax_data.csv', sep=',', encoding='utf-8')
	df_bfs_data['tax_income'] = df_bfs_data['tax_income'].str.replace("'", "").astype(float)

	# -------------------------
	# Load and aggregate crime rate data
	# -------------------------
	df_crime = pd.read_csv("crime-rate.csv", sep=",", encoding="utf-8")
	# Group by the municipality BFS number and sum the "Häufigkeitszahl"
	df_crime_agg = df_crime.groupby("Gemeinde_BFS_Nr", as_index=False)["Häufigkeitszahl"].sum()
	# Rename columns to match for merging
	df_crime_agg.rename(columns={"Gemeinde_BFS_Nr": "bfs_number", "Häufigkeitszahl": "crime_rate"}, inplace=True)

	# Merge crime data into the municipality data using the common key
	df_bfs_data = df_bfs_data.merge(df_crime_agg, on="bfs_number", how="left")
	# Fill any missing crime_rate values with the median crime rate
	df_bfs_data['crime_rate'].fillna(df_bfs_data['crime_rate'].median(), inplace=True)

	# -------------------------
	# Define a dictionary mapping town names to their BFS numbers
	# -------------------------
	locations = {
	"Zürich": 261,
	"Kloten": 62,
	"Uster": 198,
	"Illnau-Effretikon": 296,
	"Feuerthalen": 27,
	"Pfäffikon": 177,
	"Ottenbach": 11,
	"Dübendorf": 191,
	"Richterswil": 138,
	"Maur": 195,
	"Embrach": 56,
	"Bülach": 53,
	"Winterthur": 230,
	"Oetwil am See": 157,
	"Russikon": 178,
	"Obfelden": 10,
	"Wald (ZH)": 120,
	"Niederweningen": 91,
	"Dällikon": 84,
	"Buchs (ZH)": 83,
	"Rüti (ZH)": 118,
	"Hittnau": 173,
	"Bassersdorf": 52,
	"Glattfelden": 58,
	"Opfikon": 66,
	"Hinwil": 117,
	"Regensberg": 95,
	"Langnau am Albis": 136,
	"Dietikon": 243,
	"Erlenbach (ZH)": 151,
	"Kappel am Albis": 6,
	"Stäfa": 158,
	"Zell (ZH)": 231,
	"Turbenthal": 228,
	"Oberglatt": 92,
	"Winkel": 72,
	"Volketswil": 199,
	"Kilchberg (ZH)": 135,
	"Wetzikon (ZH)": 121,
	"Zumikon": 160,
	"Weisslingen": 180,
	"Elsau": 219,
	"Hettlingen": 221,
	"Rüschlikon": 139,
	"Stallikon": 13,
	"Dielsdorf": 86,
	"Wallisellen": 69,
	"Dietlikon": 54,
	"Meilen": 156,
	"Wangen-Brüttisellen": 200,
	"Flaach": 28,
	"Regensdorf": 96,
	"Niederhasli": 90,
	"Bauma": 297,
	"Aesch (ZH)": 241,
	"Schlieren": 247,
	"Dürnten": 113,
	"Unterengstringen": 249,
	"Gossau (ZH)": 115,
	"Oberengstringen": 245,
	"Schleinikon": 98,
	"Aeugst am Albis": 1,
	"Rheinau": 38,
	"Höri": 60,
	"Rickenbach (ZH)": 225,
	"Rafz": 67,
	"Adliswil": 131,
	"Zollikon": 161,
	"Urdorf": 250,
	"Hombrechtikon": 153,
	"Birmensdorf (ZH)": 242,
	"Fehraltorf": 172,
	"Weiach": 102,
	"Männedorf": 155,
	"Küsnacht (ZH)": 154,
	"Hausen am Albis": 4,
	"Hochfelden": 59,
	"Fällanden": 193,
	"Greifensee": 194,
	"Mönchaltorf": 196,
	"Dägerlen": 214,
	"Thalheim an der Thur": 39,
	"Uetikon am See": 159,
	"Seuzach": 227,
	"Uitikon": 248,
	"Affoltern am Albis": 2,
	"Geroldswil": 244,
	"Niederglatt": 89,
	"Thalwil": 141,
	"Rorbas": 68,
	"Pfungen": 224,
	"Weiningen (ZH)": 251,
	"Bubikon": 112,
	"Neftenbach": 223,
	"Mettmenstetten": 9,
	"Otelfingen": 94,
	"Flurlingen": 29,
	"Stadel": 100,
	"Grüningen": 116,
	"Henggart": 31,
	"Dachsen": 25,
	"Bonstetten": 3,
	"Bachenbülach": 51,
	"Horgen": 295
	}

	# -------------------------
	# Define the prediction function
	# -------------------------
	def predict_apartment(rooms, area, town, luxurious):
	bfs_number = locations[town]
	df = df_bfs_data[df_bfs_data['bfs_number'] == bfs_number].copy()
	df.reset_index(inplace=True)
	# Update user inputs
	df.loc[0, 'rooms'] = rooms
	df.loc[0, 'area'] = area
	if len(df) != 1:
	return "Error: Data not found for town " + town

	# Convert luxurious input (checkbox) to integer (1 if True, else 0)
	luxurious_value = 1 if luxurious else 0

	# Automatically load the crime_rate from the merged data
	crime_rate_value = df['crime_rate'].iloc[0]

	# Create the input vector (9 features)
	input_features = np.array([
	rooms,
	area,
	df['pop'].iloc[0],
	df['pop_dens'].iloc[0],
	df['frg_pct'].iloc[0],
	df['emp'].iloc[0],
	df['tax_income'].iloc[0],
	luxurious_value,
	crime_rate_value
	])
	input_features = input_features.reshape(1, 9)

	# Get the predicted price from the model
	prediction = random_forest_model.predict(input_features)

	# Return both the predicted price and the automatically loaded crime rate
	return np.round(prediction[0], 0), crime_rate_value

	# -------------------------
	# Create the Gradio interface
	# -------------------------
	# Here we update the outputs to show both the predicted price and the crime rate index.
	iface = gr.Interface(
	fn=predict_apartment,
	inputs=[
	"number",
	"number",
	gr.Dropdown(choices=list(locations.keys()), label="Town", type="value"),
	gr.Checkbox(label="Luxurious?")
	],
	outputs=[
	gr.Number(label="Predicted Price"),
	gr.Number(label="Crime Rate Index")
	],
	examples=[
	[4.5, 120, "Kloten", True],
	[3.5, 60, "Horgen", False]
	]
	)

	iface.launch()