Spaces:

lindritdev
/

apartment

Build error

App Files Files Community

lindritdev commited on Mar 26, 2025

Commit

64f32e8

verified ·

1 Parent(s): f8f0c18

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -33

app.py CHANGED Viewed

@@ -1,27 +1,41 @@
-# %%
 import gradio as gr
-from sklearn.ensemble import RandomForestRegressor
 import numpy as np
 import pandas as pd
 import pickle
-# %%
-# TODO change the file to your own model.
-model_filename = "random_forest_regression_luxurious.pkl"
-random_forest_model = RandomForestRegressor()
 with open(model_filename, 'rb') as f:
     random_forest_model = pickle.load(f)
-print('Number of features: ', random_forest_model.n_features_in_)
-print('Features are (see week 1): ', ['rooms', 'area', 'pop', 'pop_dens', 'frg_pct', 'emp', 'tax_income', 'luxurious'])
-random_forest_model
-# %%
 df_bfs_data = pd.read_csv('bfs_municipality_and_tax_data.csv', sep=',', encoding='utf-8')
 df_bfs_data['tax_income'] = df_bfs_data['tax_income'].str.replace("'", "").astype(float)
-# %%
 locations = {
     "Zürich": 261,
     "Kloten": 62,
@@ -129,21 +143,26 @@ locations = {
     "Horgen": 295
 }
-# %%
-# Define the core prediction function including the "luxurious" input
 def predict_apartment(rooms, area, town, luxurious):
     bfs_number = locations[town]
     df = df_bfs_data[df_bfs_data['bfs_number'] == bfs_number].copy()
     df.reset_index(inplace=True)
     df.loc[0, 'rooms'] = rooms
     df.loc[0, 'area'] = area
-    if len(df) != 1:  # if not exactly one record, return -1
-        return -1
-    # Convert the luxurious input (a boolean from the checkbox) to an integer (1 if True, 0 if False)
     luxurious_value = 1 if luxurious else 0
-    # Create the input vector with the new "luxurious" attribute as the last feature
     input_features = np.array([
         rooms,
         area,
@@ -152,17 +171,21 @@ def predict_apartment(rooms, area, town, luxurious):
         df['frg_pct'].iloc[0],
         df['emp'].iloc[0],
         df['tax_income'].iloc[0],
-        luxurious_value
     ])
-    input_features = input_features.reshape(1, 8)
     prediction = random_forest_model.predict(input_features)
-    return np.round(prediction[0], 0)
-# %%
-print(predict_apartment(3, 100, 'Zürich', True))
-# %%
-# Create the Gradio interface with an extra input for luxurious (yes/no)
 iface = gr.Interface(
     fn=predict_apartment,
     inputs=[
@@ -171,13 +194,14 @@ iface = gr.Interface(
         gr.Dropdown(choices=list(locations.keys()), label="Town", type="value"),
         gr.Checkbox(label="Luxurious?")
     ],
-    outputs=gr.Number(),
     examples=[
-        [4.5, 120, "Dietikon", True],
-        [3.5, 60, "Winterthur", False]
     ]
 )
 iface.launch()

 import gradio as gr
 import numpy as np
 import pandas as pd
 import pickle
+# -------------------------
+# Load the trained model (which was trained with crime_rate as a feature)
+# -------------------------
+model_filename = "random_forest_regression_new.pkl"
 with open(model_filename, 'rb') as f:
     random_forest_model = pickle.load(f)
+print('Number of features:', random_forest_model.n_features_in_)
+print('Features are:', ['rooms', 'area', 'pop', 'pop_dens', 'frg_pct', 'emp', 'tax_income', 'luxurious', 'crime_rate'])
+# -------------------------
+# Load and prepare municipality data
+# -------------------------
 df_bfs_data = pd.read_csv('bfs_municipality_and_tax_data.csv', sep=',', encoding='utf-8')
 df_bfs_data['tax_income'] = df_bfs_data['tax_income'].str.replace("'", "").astype(float)
+# -------------------------
+# Load and aggregate crime rate data
+# -------------------------
+df_crime = pd.read_csv("crime-rate.csv", sep=",", encoding="utf-8")
+# Group by the municipality BFS number and sum the "Häufigkeitszahl"
+df_crime_agg = df_crime.groupby("Gemeinde_BFS_Nr", as_index=False)["Häufigkeitszahl"].sum()
+# Rename columns to match for merging
+df_crime_agg.rename(columns={"Gemeinde_BFS_Nr": "bfs_number", "Häufigkeitszahl": "crime_rate"}, inplace=True)
+# Merge crime data into the municipality data using the common key
+df_bfs_data = df_bfs_data.merge(df_crime_agg, on="bfs_number", how="left")
+# Fill any missing crime_rate values with the median crime rate
+df_bfs_data['crime_rate'].fillna(df_bfs_data['crime_rate'].median(), inplace=True)
+# -------------------------
+# Define a dictionary mapping town names to their BFS numbers
+# -------------------------
 locations = {
     "Zürich": 261,
     "Kloten": 62,
     "Horgen": 295
 }
+# -------------------------
+# Define the prediction function
+# -------------------------
 def predict_apartment(rooms, area, town, luxurious):
     bfs_number = locations[town]
     df = df_bfs_data[df_bfs_data['bfs_number'] == bfs_number].copy()
     df.reset_index(inplace=True)
+    # Update user inputs
     df.loc[0, 'rooms'] = rooms
     df.loc[0, 'area'] = area
+    if len(df) != 1:
+        return "Error: Data not found for town " + town
+    # Convert luxurious input (checkbox) to integer (1 if True, else 0)
     luxurious_value = 1 if luxurious else 0
+    # Automatically load the crime_rate from the merged data
+    crime_rate_value = df['crime_rate'].iloc[0]
+    # Create the input vector (9 features)
     input_features = np.array([
         rooms,
         area,
         df['frg_pct'].iloc[0],
         df['emp'].iloc[0],
         df['tax_income'].iloc[0],
+        luxurious_value,
+        crime_rate_value
     ])
+    input_features = input_features.reshape(1, 9)
+    # Get the predicted price from the model
     prediction = random_forest_model.predict(input_features)
+    # Return both the predicted price and the automatically loaded crime rate
+    return np.round(prediction[0], 0), crime_rate_value
+# -------------------------
+# Create the Gradio interface
+# -------------------------
+# Here we update the outputs to show both the predicted price and the crime rate index.
 iface = gr.Interface(
     fn=predict_apartment,
     inputs=[
         gr.Dropdown(choices=list(locations.keys()), label="Town", type="value"),
         gr.Checkbox(label="Luxurious?")
     ],
+    outputs=[
+        gr.Number(label="Predicted Price"),
+        gr.Number(label="Crime Rate Index")
+    ],
     examples=[
+        [4.5, 120, "Kloten", True],
+        [3.5, 60, "Horgen", False]
     ]
 )
 iface.launch()