Spaces:
Sleeping
Sleeping
File size: 5,942 Bytes
719abf6 c693d00 64f32e8 c693d00 d43c1ed c693d00 64f32e8 719abf6 64f32e8 719abf6 64f32e8 719abf6 64f32e8 737141f 719abf6 d43c1ed 64f32e8 d43c1ed 64f32e8 737141f 64f32e8 737141f 64f32e8 737141f 64f32e8 d43c1ed 64f32e8 d43c1ed 64f32e8 719abf6 737141f 64f32e8 d43c1ed 64f32e8 d43c1ed 719abf6 d43c1ed |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 |
import gradio as gr
import numpy as np
import pandas as pd
import pickle
# -------------------------
# Load the trained model (which was trained with crime_rate as a feature)
# -------------------------
model_filename = "random_forest_regression_new.pkl"
with open(model_filename, 'rb') as f:
random_forest_model = pickle.load(f)
print('Number of features:', random_forest_model.n_features_in_)
print('Features are:', ['rooms', 'area', 'pop', 'pop_dens', 'frg_pct', 'emp', 'tax_income', 'luxurious', 'crime_rate'])
# -------------------------
# Load and prepare municipality data
# -------------------------
df_bfs_data = pd.read_csv('bfs_municipality_and_tax_data.csv', sep=',', encoding='utf-8')
df_bfs_data['tax_income'] = df_bfs_data['tax_income'].str.replace("'", "").astype(float)
# -------------------------
# Load and aggregate crime rate data
# -------------------------
df_crime = pd.read_csv("crime-rate.csv", sep=",", encoding="utf-8")
# Group by the municipality BFS number and sum the "Häufigkeitszahl"
df_crime_agg = df_crime.groupby("Gemeinde_BFS_Nr", as_index=False)["Häufigkeitszahl"].sum()
# Rename columns to match for merging
df_crime_agg.rename(columns={"Gemeinde_BFS_Nr": "bfs_number", "Häufigkeitszahl": "crime_rate"}, inplace=True)
# Merge crime data into the municipality data using the common key
df_bfs_data = df_bfs_data.merge(df_crime_agg, on="bfs_number", how="left")
# Fill any missing crime_rate values with the median crime rate
df_bfs_data['crime_rate'].fillna(df_bfs_data['crime_rate'].median(), inplace=True)
# -------------------------
# Define a dictionary mapping town names to their BFS numbers
# -------------------------
locations = {
"Zürich": 261,
"Kloten": 62,
"Uster": 198,
"Illnau-Effretikon": 296,
"Feuerthalen": 27,
"Pfäffikon": 177,
"Ottenbach": 11,
"Dübendorf": 191,
"Richterswil": 138,
"Maur": 195,
"Embrach": 56,
"Bülach": 53,
"Winterthur": 230,
"Oetwil am See": 157,
"Russikon": 178,
"Obfelden": 10,
"Wald (ZH)": 120,
"Niederweningen": 91,
"Dällikon": 84,
"Buchs (ZH)": 83,
"Rüti (ZH)": 118,
"Hittnau": 173,
"Bassersdorf": 52,
"Glattfelden": 58,
"Opfikon": 66,
"Hinwil": 117,
"Regensberg": 95,
"Langnau am Albis": 136,
"Dietikon": 243,
"Erlenbach (ZH)": 151,
"Kappel am Albis": 6,
"Stäfa": 158,
"Zell (ZH)": 231,
"Turbenthal": 228,
"Oberglatt": 92,
"Winkel": 72,
"Volketswil": 199,
"Kilchberg (ZH)": 135,
"Wetzikon (ZH)": 121,
"Zumikon": 160,
"Weisslingen": 180,
"Elsau": 219,
"Hettlingen": 221,
"Rüschlikon": 139,
"Stallikon": 13,
"Dielsdorf": 86,
"Wallisellen": 69,
"Dietlikon": 54,
"Meilen": 156,
"Wangen-Brüttisellen": 200,
"Flaach": 28,
"Regensdorf": 96,
"Niederhasli": 90,
"Bauma": 297,
"Aesch (ZH)": 241,
"Schlieren": 247,
"Dürnten": 113,
"Unterengstringen": 249,
"Gossau (ZH)": 115,
"Oberengstringen": 245,
"Schleinikon": 98,
"Aeugst am Albis": 1,
"Rheinau": 38,
"Höri": 60,
"Rickenbach (ZH)": 225,
"Rafz": 67,
"Adliswil": 131,
"Zollikon": 161,
"Urdorf": 250,
"Hombrechtikon": 153,
"Birmensdorf (ZH)": 242,
"Fehraltorf": 172,
"Weiach": 102,
"Männedorf": 155,
"Küsnacht (ZH)": 154,
"Hausen am Albis": 4,
"Hochfelden": 59,
"Fällanden": 193,
"Greifensee": 194,
"Mönchaltorf": 196,
"Dägerlen": 214,
"Thalheim an der Thur": 39,
"Uetikon am See": 159,
"Seuzach": 227,
"Uitikon": 248,
"Affoltern am Albis": 2,
"Geroldswil": 244,
"Niederglatt": 89,
"Thalwil": 141,
"Rorbas": 68,
"Pfungen": 224,
"Weiningen (ZH)": 251,
"Bubikon": 112,
"Neftenbach": 223,
"Mettmenstetten": 9,
"Otelfingen": 94,
"Flurlingen": 29,
"Stadel": 100,
"Grüningen": 116,
"Henggart": 31,
"Dachsen": 25,
"Bonstetten": 3,
"Bachenbülach": 51,
"Horgen": 295
}
# -------------------------
# Define the prediction function
# -------------------------
def predict_apartment(rooms, area, town, luxurious):
bfs_number = locations[town]
df = df_bfs_data[df_bfs_data['bfs_number'] == bfs_number].copy()
df.reset_index(inplace=True)
# Update user inputs
df.loc[0, 'rooms'] = rooms
df.loc[0, 'area'] = area
if len(df) != 1:
return "Error: Data not found for town " + town
# Convert luxurious input (checkbox) to integer (1 if True, else 0)
luxurious_value = 1 if luxurious else 0
# Automatically load the crime_rate from the merged data
crime_rate_value = df['crime_rate'].iloc[0]
# Create the input vector (9 features)
input_features = np.array([
rooms,
area,
df['pop'].iloc[0],
df['pop_dens'].iloc[0],
df['frg_pct'].iloc[0],
df['emp'].iloc[0],
df['tax_income'].iloc[0],
luxurious_value,
crime_rate_value
])
input_features = input_features.reshape(1, 9)
# Get the predicted price from the model
prediction = random_forest_model.predict(input_features)
# Return both the predicted price and the automatically loaded crime rate
return np.round(prediction[0], 0), crime_rate_value
# -------------------------
# Create the Gradio interface
# -------------------------
# Here we update the outputs to show both the predicted price and the crime rate index.
iface = gr.Interface(
fn=predict_apartment,
inputs=[
"number",
"number",
gr.Dropdown(choices=list(locations.keys()), label="Town", type="value"),
gr.Checkbox(label="Luxurious?")
],
outputs=[
gr.Number(label="Predicted Price"),
gr.Number(label="Crime Rate Index")
],
examples=[
[4.5, 120, "Kloten", True],
[3.5, 60, "Horgen", False]
]
)
iface.launch()
|