import gradio as gr import numpy as np import pandas as pd import pickle # ------------------------- # Load the trained model (which was trained with crime_rate as a feature) # ------------------------- model_filename = "random_forest_regression_new.pkl" with open(model_filename, 'rb') as f: random_forest_model = pickle.load(f) print('Number of features:', random_forest_model.n_features_in_) print('Features are:', ['rooms', 'area', 'pop', 'pop_dens', 'frg_pct', 'emp', 'tax_income', 'luxurious', 'crime_rate']) # ------------------------- # Load and prepare municipality data # ------------------------- df_bfs_data = pd.read_csv('bfs_municipality_and_tax_data.csv', sep=',', encoding='utf-8') df_bfs_data['tax_income'] = df_bfs_data['tax_income'].str.replace("'", "").astype(float) # ------------------------- # Load and aggregate crime rate data # ------------------------- df_crime = pd.read_csv("crime-rate.csv", sep=",", encoding="utf-8") # Group by the municipality BFS number and sum the "Häufigkeitszahl" df_crime_agg = df_crime.groupby("Gemeinde_BFS_Nr", as_index=False)["Häufigkeitszahl"].sum() # Rename columns to match for merging df_crime_agg.rename(columns={"Gemeinde_BFS_Nr": "bfs_number", "Häufigkeitszahl": "crime_rate"}, inplace=True) # Merge crime data into the municipality data using the common key df_bfs_data = df_bfs_data.merge(df_crime_agg, on="bfs_number", how="left") # Fill any missing crime_rate values with the median crime rate df_bfs_data['crime_rate'].fillna(df_bfs_data['crime_rate'].median(), inplace=True) # ------------------------- # Define a dictionary mapping town names to their BFS numbers # ------------------------- locations = { "Zürich": 261, "Kloten": 62, "Uster": 198, "Illnau-Effretikon": 296, "Feuerthalen": 27, "Pfäffikon": 177, "Ottenbach": 11, "Dübendorf": 191, "Richterswil": 138, "Maur": 195, "Embrach": 56, "Bülach": 53, "Winterthur": 230, "Oetwil am See": 157, "Russikon": 178, "Obfelden": 10, "Wald (ZH)": 120, "Niederweningen": 91, "Dällikon": 84, "Buchs (ZH)": 83, "Rüti (ZH)": 118, "Hittnau": 173, "Bassersdorf": 52, "Glattfelden": 58, "Opfikon": 66, "Hinwil": 117, "Regensberg": 95, "Langnau am Albis": 136, "Dietikon": 243, "Erlenbach (ZH)": 151, "Kappel am Albis": 6, "Stäfa": 158, "Zell (ZH)": 231, "Turbenthal": 228, "Oberglatt": 92, "Winkel": 72, "Volketswil": 199, "Kilchberg (ZH)": 135, "Wetzikon (ZH)": 121, "Zumikon": 160, "Weisslingen": 180, "Elsau": 219, "Hettlingen": 221, "Rüschlikon": 139, "Stallikon": 13, "Dielsdorf": 86, "Wallisellen": 69, "Dietlikon": 54, "Meilen": 156, "Wangen-Brüttisellen": 200, "Flaach": 28, "Regensdorf": 96, "Niederhasli": 90, "Bauma": 297, "Aesch (ZH)": 241, "Schlieren": 247, "Dürnten": 113, "Unterengstringen": 249, "Gossau (ZH)": 115, "Oberengstringen": 245, "Schleinikon": 98, "Aeugst am Albis": 1, "Rheinau": 38, "Höri": 60, "Rickenbach (ZH)": 225, "Rafz": 67, "Adliswil": 131, "Zollikon": 161, "Urdorf": 250, "Hombrechtikon": 153, "Birmensdorf (ZH)": 242, "Fehraltorf": 172, "Weiach": 102, "Männedorf": 155, "Küsnacht (ZH)": 154, "Hausen am Albis": 4, "Hochfelden": 59, "Fällanden": 193, "Greifensee": 194, "Mönchaltorf": 196, "Dägerlen": 214, "Thalheim an der Thur": 39, "Uetikon am See": 159, "Seuzach": 227, "Uitikon": 248, "Affoltern am Albis": 2, "Geroldswil": 244, "Niederglatt": 89, "Thalwil": 141, "Rorbas": 68, "Pfungen": 224, "Weiningen (ZH)": 251, "Bubikon": 112, "Neftenbach": 223, "Mettmenstetten": 9, "Otelfingen": 94, "Flurlingen": 29, "Stadel": 100, "Grüningen": 116, "Henggart": 31, "Dachsen": 25, "Bonstetten": 3, "Bachenbülach": 51, "Horgen": 295 } # ------------------------- # Define the prediction function # ------------------------- def predict_apartment(rooms, area, town, luxurious): bfs_number = locations[town] df = df_bfs_data[df_bfs_data['bfs_number'] == bfs_number].copy() df.reset_index(inplace=True) # Update user inputs df.loc[0, 'rooms'] = rooms df.loc[0, 'area'] = area if len(df) != 1: return "Error: Data not found for town " + town # Convert luxurious input (checkbox) to integer (1 if True, else 0) luxurious_value = 1 if luxurious else 0 # Automatically load the crime_rate from the merged data crime_rate_value = df['crime_rate'].iloc[0] # Create the input vector (9 features) input_features = np.array([ rooms, area, df['pop'].iloc[0], df['pop_dens'].iloc[0], df['frg_pct'].iloc[0], df['emp'].iloc[0], df['tax_income'].iloc[0], luxurious_value, crime_rate_value ]) input_features = input_features.reshape(1, 9) # Get the predicted price from the model prediction = random_forest_model.predict(input_features) # Return both the predicted price and the automatically loaded crime rate return np.round(prediction[0], 0), crime_rate_value # ------------------------- # Create the Gradio interface # ------------------------- # Here we update the outputs to show both the predicted price and the crime rate index. iface = gr.Interface( fn=predict_apartment, inputs=[ "number", "number", gr.Dropdown(choices=list(locations.keys()), label="Town", type="value"), gr.Checkbox(label="Luxurious?") ], outputs=[ gr.Number(label="Predicted Price"), gr.Number(label="Crime Rate Index") ], examples=[ [4.5, 120, "Kloten", True], [3.5, 60, "Horgen", False] ] ) iface.launch()