lindritdev commited on
Commit
64f32e8
·
verified ·
1 Parent(s): f8f0c18

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -33
app.py CHANGED
@@ -1,27 +1,41 @@
1
- # %%
2
  import gradio as gr
3
- from sklearn.ensemble import RandomForestRegressor
4
  import numpy as np
5
  import pandas as pd
6
  import pickle
7
 
8
- # %%
9
- # TODO change the file to your own model.
10
- model_filename = "random_forest_regression_luxurious.pkl"
11
-
12
- random_forest_model = RandomForestRegressor()
13
  with open(model_filename, 'rb') as f:
14
  random_forest_model = pickle.load(f)
15
 
16
- print('Number of features: ', random_forest_model.n_features_in_)
17
- print('Features are (see week 1): ', ['rooms', 'area', 'pop', 'pop_dens', 'frg_pct', 'emp', 'tax_income', 'luxurious'])
18
- random_forest_model
19
 
20
- # %%
 
 
21
  df_bfs_data = pd.read_csv('bfs_municipality_and_tax_data.csv', sep=',', encoding='utf-8')
22
  df_bfs_data['tax_income'] = df_bfs_data['tax_income'].str.replace("'", "").astype(float)
23
 
24
- # %%
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  locations = {
26
  "Zürich": 261,
27
  "Kloten": 62,
@@ -129,21 +143,26 @@ locations = {
129
  "Horgen": 295
130
  }
131
 
132
- # %%
133
- # Define the core prediction function including the "luxurious" input
 
134
  def predict_apartment(rooms, area, town, luxurious):
135
  bfs_number = locations[town]
136
  df = df_bfs_data[df_bfs_data['bfs_number'] == bfs_number].copy()
137
  df.reset_index(inplace=True)
 
138
  df.loc[0, 'rooms'] = rooms
139
  df.loc[0, 'area'] = area
140
- if len(df) != 1: # if not exactly one record, return -1
141
- return -1
142
-
143
- # Convert the luxurious input (a boolean from the checkbox) to an integer (1 if True, 0 if False)
144
  luxurious_value = 1 if luxurious else 0
145
-
146
- # Create the input vector with the new "luxurious" attribute as the last feature
 
 
 
147
  input_features = np.array([
148
  rooms,
149
  area,
@@ -152,17 +171,21 @@ def predict_apartment(rooms, area, town, luxurious):
152
  df['frg_pct'].iloc[0],
153
  df['emp'].iloc[0],
154
  df['tax_income'].iloc[0],
155
- luxurious_value
 
156
  ])
157
- input_features = input_features.reshape(1, 8)
 
 
158
  prediction = random_forest_model.predict(input_features)
159
- return np.round(prediction[0], 0)
 
 
160
 
161
- # %%
162
- print(predict_apartment(3, 100, 'Zürich', True))
163
-
164
- # %%
165
- # Create the Gradio interface with an extra input for luxurious (yes/no)
166
  iface = gr.Interface(
167
  fn=predict_apartment,
168
  inputs=[
@@ -171,13 +194,14 @@ iface = gr.Interface(
171
  gr.Dropdown(choices=list(locations.keys()), label="Town", type="value"),
172
  gr.Checkbox(label="Luxurious?")
173
  ],
174
- outputs=gr.Number(),
 
 
 
175
  examples=[
176
- [4.5, 120, "Dietikon", True],
177
- [3.5, 60, "Winterthur", False]
178
  ]
179
  )
180
 
181
  iface.launch()
182
-
183
-
 
 
1
  import gradio as gr
 
2
  import numpy as np
3
  import pandas as pd
4
  import pickle
5
 
6
+ # -------------------------
7
+ # Load the trained model (which was trained with crime_rate as a feature)
8
+ # -------------------------
9
+ model_filename = "random_forest_regression_new.pkl"
 
10
  with open(model_filename, 'rb') as f:
11
  random_forest_model = pickle.load(f)
12
 
13
+ print('Number of features:', random_forest_model.n_features_in_)
14
+ print('Features are:', ['rooms', 'area', 'pop', 'pop_dens', 'frg_pct', 'emp', 'tax_income', 'luxurious', 'crime_rate'])
 
15
 
16
+ # -------------------------
17
+ # Load and prepare municipality data
18
+ # -------------------------
19
  df_bfs_data = pd.read_csv('bfs_municipality_and_tax_data.csv', sep=',', encoding='utf-8')
20
  df_bfs_data['tax_income'] = df_bfs_data['tax_income'].str.replace("'", "").astype(float)
21
 
22
+ # -------------------------
23
+ # Load and aggregate crime rate data
24
+ # -------------------------
25
+ df_crime = pd.read_csv("crime-rate.csv", sep=",", encoding="utf-8")
26
+ # Group by the municipality BFS number and sum the "Häufigkeitszahl"
27
+ df_crime_agg = df_crime.groupby("Gemeinde_BFS_Nr", as_index=False)["Häufigkeitszahl"].sum()
28
+ # Rename columns to match for merging
29
+ df_crime_agg.rename(columns={"Gemeinde_BFS_Nr": "bfs_number", "Häufigkeitszahl": "crime_rate"}, inplace=True)
30
+
31
+ # Merge crime data into the municipality data using the common key
32
+ df_bfs_data = df_bfs_data.merge(df_crime_agg, on="bfs_number", how="left")
33
+ # Fill any missing crime_rate values with the median crime rate
34
+ df_bfs_data['crime_rate'].fillna(df_bfs_data['crime_rate'].median(), inplace=True)
35
+
36
+ # -------------------------
37
+ # Define a dictionary mapping town names to their BFS numbers
38
+ # -------------------------
39
  locations = {
40
  "Zürich": 261,
41
  "Kloten": 62,
 
143
  "Horgen": 295
144
  }
145
 
146
+ # -------------------------
147
+ # Define the prediction function
148
+ # -------------------------
149
  def predict_apartment(rooms, area, town, luxurious):
150
  bfs_number = locations[town]
151
  df = df_bfs_data[df_bfs_data['bfs_number'] == bfs_number].copy()
152
  df.reset_index(inplace=True)
153
+ # Update user inputs
154
  df.loc[0, 'rooms'] = rooms
155
  df.loc[0, 'area'] = area
156
+ if len(df) != 1:
157
+ return "Error: Data not found for town " + town
158
+
159
+ # Convert luxurious input (checkbox) to integer (1 if True, else 0)
160
  luxurious_value = 1 if luxurious else 0
161
+
162
+ # Automatically load the crime_rate from the merged data
163
+ crime_rate_value = df['crime_rate'].iloc[0]
164
+
165
+ # Create the input vector (9 features)
166
  input_features = np.array([
167
  rooms,
168
  area,
 
171
  df['frg_pct'].iloc[0],
172
  df['emp'].iloc[0],
173
  df['tax_income'].iloc[0],
174
+ luxurious_value,
175
+ crime_rate_value
176
  ])
177
+ input_features = input_features.reshape(1, 9)
178
+
179
+ # Get the predicted price from the model
180
  prediction = random_forest_model.predict(input_features)
181
+
182
+ # Return both the predicted price and the automatically loaded crime rate
183
+ return np.round(prediction[0], 0), crime_rate_value
184
 
185
+ # -------------------------
186
+ # Create the Gradio interface
187
+ # -------------------------
188
+ # Here we update the outputs to show both the predicted price and the crime rate index.
 
189
  iface = gr.Interface(
190
  fn=predict_apartment,
191
  inputs=[
 
194
  gr.Dropdown(choices=list(locations.keys()), label="Town", type="value"),
195
  gr.Checkbox(label="Luxurious?")
196
  ],
197
+ outputs=[
198
+ gr.Number(label="Predicted Price"),
199
+ gr.Number(label="Crime Rate Index")
200
+ ],
201
  examples=[
202
+ [4.5, 120, "Kloten", True],
203
+ [3.5, 60, "Horgen", False]
204
  ]
205
  )
206
 
207
  iface.launch()