Nicolas Pierson commited on
Commit
bbee008
·
unverified ·
1 Parent(s): af88f4d
Dockerfile CHANGED
@@ -22,6 +22,8 @@ COPY --chown=user data/ ./data/
22
  COPY --chown=user .streamlit/ ./.streamlit/
23
  COPY --chown=user src/ ./src/
24
  COPY --chown=user src/pages/1_Historical_Prices.py ./src/pages/1_Historical_Prices.py
 
 
25
 
26
  EXPOSE 8501
27
 
 
22
  COPY --chown=user .streamlit/ ./.streamlit/
23
  COPY --chown=user src/ ./src/
24
  COPY --chown=user src/pages/1_Historical_Prices.py ./src/pages/1_Historical_Prices.py
25
+ COPY --chown=user src/pages/2_Prediction_Prices.py ./src/pages/2_Prediction_Prices.py
26
+ COPY --chown=user src/pages/3_Historical_Risk_Score_Fire.py ./src/pages/3_Historical_Risk_Score_Fire.py
27
 
28
  EXPOSE 8501
29
 
src/pages/1_Historical_Prices.py CHANGED
@@ -172,19 +172,17 @@ top_departements_variation_display["prixm2moyen_2024"] = (
172
  top_departements_variation_display["prixm2moyen_difference"] = (
173
  top_departements_variation_display["prixm2moyen_difference"].map("{:,.0f}€".format)
174
  )
175
- top_departements_variation_display["variation_percentage"] = (
176
- top_departements_variation_display["variation_percentage"].map("{:,.2f}%".format)
177
- )
178
-
179
  top_departements_variation_display = top_departements_variation_display.sort_values(
180
  by="variation_percentage", ascending=False
181
  )
182
 
183
  fig = px.bar(
184
- top_departements_variation_display.sort_values(by="variation_percentage", ascending=True),
 
 
185
  x="nom_departement",
186
  y="variation_percentage",
187
- title="Top Departments by Price Variation (2015-2024)",
188
  labels={
189
  "nom_departement": "Department Name",
190
  "variation_percentage": "Price Variation (%)",
@@ -193,6 +191,10 @@ fig = px.bar(
193
  )
194
  st.plotly_chart(fig, use_container_width=True)
195
 
 
 
 
 
196
  st.dataframe(
197
  top_departements_variation_display,
198
  hide_index=True,
@@ -323,6 +325,7 @@ with right_co:
323
  "Average Price per m²": {"formatter": "currency", "currency": "EUR"}
324
  },
325
  )
 
326
  ###############################################################################
327
 
328
  st.subheader("Select Department(s) to View Historical Prices", divider=True)
@@ -489,6 +492,7 @@ selected_year_communes = st.selectbox(
489
  options=dataset_housing_prices["annee"].unique(),
490
  format_func=lambda x: str(x),
491
  key="year_communes_selectbox",
 
492
  )
493
 
494
  top_communes = (
 
172
  top_departements_variation_display["prixm2moyen_difference"] = (
173
  top_departements_variation_display["prixm2moyen_difference"].map("{:,.0f}€".format)
174
  )
 
 
 
 
175
  top_departements_variation_display = top_departements_variation_display.sort_values(
176
  by="variation_percentage", ascending=False
177
  )
178
 
179
  fig = px.bar(
180
+ top_departements_variation_display.sort_values(
181
+ by="variation_percentage", ascending=True
182
+ ),
183
  x="nom_departement",
184
  y="variation_percentage",
185
+ title=f"Top Departments by Price Variation ({selected_year_1} to {selected_year_2})",
186
  labels={
187
  "nom_departement": "Department Name",
188
  "variation_percentage": "Price Variation (%)",
 
191
  )
192
  st.plotly_chart(fig, use_container_width=True)
193
 
194
+ top_departements_variation_display["variation_percentage"] = (
195
+ top_departements_variation_display["variation_percentage"].map("{:,.2f}%".format)
196
+ )
197
+
198
  st.dataframe(
199
  top_departements_variation_display,
200
  hide_index=True,
 
325
  "Average Price per m²": {"formatter": "currency", "currency": "EUR"}
326
  },
327
  )
328
+
329
  ###############################################################################
330
 
331
  st.subheader("Select Department(s) to View Historical Prices", divider=True)
 
492
  options=dataset_housing_prices["annee"].unique(),
493
  format_func=lambda x: str(x),
494
  key="year_communes_selectbox",
495
+ index=list(dataset_housing_prices["annee"].unique()).index(2024) if 2024 in dataset_housing_prices["annee"].unique() else 0
496
  )
497
 
498
  top_communes = (
src/pages/2_Prediction_Prices.py CHANGED
@@ -3,6 +3,7 @@ import pandas as pd
3
  import asyncio
4
  import plotly.express as px
5
  import geopandas as gpd
 
6
 
7
  from pages.utils.utils import (
8
  async_load_file_s3,
@@ -56,15 +57,9 @@ def load_all_data_wrapper_predictions_prices_region():
56
  # --- Streamlit App Layout ---
57
  st.set_page_config(page_title="Oasis", page_icon=":house:", layout="wide")
58
 
59
- st.header("Historical Price - France")
60
- st.subheader("An overview of real estate prices in France from 2015 to 2024")
61
 
62
- st.write(
63
- "This map shows the average price per square meter in French departments over the years, with a focus on climatic events."
64
- )
65
-
66
- # Display a table of the top 10 departments with the highest average price per square meter
67
- st.subheader("Top 10 Departments by Average Price per Square Meter (2015-2024)")
68
 
69
  with st.spinner("Loading data and preparing maps..."):
70
  (
@@ -73,10 +68,170 @@ with st.spinner("Loading data and preparing maps..."):
73
  insee_df,
74
  ) = load_all_data_wrapper_predictions_prices_country()
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  selected_year = st.selectbox(
77
  "Select a Year",
78
  options=dataset_departements_housing_prices["annee"].unique(),
79
  format_func=lambda x: f"{x}",
 
 
 
80
  )
81
 
82
  top_departements = (
@@ -104,36 +259,89 @@ top_departements = (
104
  )
105
  }
106
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  .assign(
108
  **{
109
- "Average Price per m²": lambda x: x["Average Price per m²"].apply(
110
- lambda y: f"{y:,.2f} €"
 
 
 
 
111
  )
112
  }
113
  )
114
- .head(10)
115
  )
116
- st.dataframe(top_departements)
117
 
118
- fig_france = display_choropleth_map_country(
119
- dataset_departements_housing_prices,
120
- departements_geojson,
121
- metric_name="prixm2moyen",
122
- metric_description="Average Price per Square Meter",
123
- title="Average Predicted Price per Square Meter in French Departments",
124
- height=1400,
125
- width=1400,
126
- )
127
- st.plotly_chart(fig_france, use_container_width=False)
128
- st.write("Hover over the map to see detailed information for each department and year.")
129
- st.write(
130
- "Missing values are represented in light grey, while actual data is shown in a gradient from red (high prices) to green (low prices)."
131
- )
132
- st.write(
133
- "Note: The color scale is customized to highlight missing values in light grey, while the actual data is represented using a reversed RdYlGn color scale, where red indicates higher prices and green indicates lower prices."
134
- )
 
 
 
 
 
135
 
136
- st.subheader("Select a Department to View Commune Prices")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
 
138
  with st.spinner("Loading data and preparing maps..."):
139
  (dataset_housing_prices, communes_geojson, insee_df) = (
@@ -145,36 +353,403 @@ with st.spinner("Loading data and preparing maps..."):
145
  ]["prixm2moyen"].min()
146
  max_global_commune_prixm2moyen = dataset_housing_prices["prixm2moyen"].max()
147
 
148
- selected_departement = st.selectbox(
149
- "Select a Department",
150
- options=_format_department_code(insee_df)["code_departement"].unique(),
151
- format_func=lambda x: f"{x} - {insee_df[insee_df['code_departement'] == x]['nom_departement'].values[0]}"
152
- if x in insee_df["code_departement"].values
153
- else x,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
 
 
 
 
156
  fig_department = display_choropleth_map_for_department(
157
  dataset_housing_prices,
158
  selected_departement,
159
  communes_geojson,
160
  min_global_commune_prixm2moyen,
161
  max_global_commune_prixm2moyen,
162
- metric_name="prixm2moyen",
163
- metric_description="Average Price per Square Meter",
164
- title=f"Average Predicted Price per Square Meter in Department {selected_departement} (Animated by Year)",
165
- height_graph=1400,
166
  width_graph=1400,
167
  )
168
-
169
- st.subheader("Average Price per Square Meter in French Communes (2015-2024)")
170
  st.plotly_chart(fig_department, use_container_width=False)
171
- st.write("Hover over the map to see detailed information for each commune and year.")
172
  st.write(
173
  "Missing values are represented in light grey, while actual data is shown in a gradient from red (high prices) to green (low prices)."
174
  )
175
- st.write(
176
- "Note: The color scale is customized to highlight missing values in light grey, while the actual data is represented using a reversed RdYlGn color scale, where red indicates higher prices and green indicates lower prices."
177
- )
178
- st.write(
179
- "The map is animated by year, allowing you to see how the average price per square meter changes over time."
 
180
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  import asyncio
4
  import plotly.express as px
5
  import geopandas as gpd
6
+ from functools import reduce
7
 
8
  from pages.utils.utils import (
9
  async_load_file_s3,
 
57
  # --- Streamlit App Layout ---
58
  st.set_page_config(page_title="Oasis", page_icon=":house:", layout="wide")
59
 
60
+ ###############################################################################
 
61
 
62
+ st.header("Forecast Prices (2024-2029)")
 
 
 
 
 
63
 
64
  with st.spinner("Loading data and preparing maps..."):
65
  (
 
68
  insee_df,
69
  ) = load_all_data_wrapper_predictions_prices_country()
70
 
71
+ st.subheader("An overview of real estate forecasted prices in France", divider=True)
72
+
73
+ st.write(
74
+ "This map shows the average price per square meter in French departments over the years, with a focus on climatic events."
75
+ )
76
+
77
+ fig_france = display_choropleth_map_country(
78
+ dataset_departements_housing_prices,
79
+ departements_geojson,
80
+ metric_name="prixm2moyen",
81
+ metric_description="Average Price per Square Meter",
82
+ title="Average Forecasted Price in French Departments",
83
+ )
84
+ st.plotly_chart(fig_france, use_container_width=False)
85
+ st.write(
86
+ "Missing values are represented in light grey, while actual data is shown in a gradient from red (high prices) to green (low prices)."
87
+ )
88
+
89
+ ###############################################################################
90
+
91
+ st.subheader(
92
+ "Top 7 Departments with Highest Variation",
93
+ divider=True,
94
+ )
95
+
96
+ st.write(
97
+ "Select the period to view the top 7 departments with the highest variation in average price."
98
+ )
99
+
100
+ left_co, right_co = st.columns(2)
101
+ with left_co:
102
+ selected_year_1 = st.selectbox(
103
+ "Select Year 1", options=range(2024, 2030), index=0, key="year_select_1"
104
+ )
105
+ with right_co:
106
+ selected_year_2 = st.selectbox(
107
+ "Select Year 2", options=range(2024, 2030), index=5, key="year_select_2"
108
+ )
109
+
110
+ # 1. Get prices for 2024 and 2030
111
+ prices_2024 = dataset_departements_housing_prices[
112
+ dataset_departements_housing_prices["annee"] == selected_year_1
113
+ ][["code_departement", "prixm2moyen"]].rename(
114
+ columns={"prixm2moyen": "prixm2moyen_2024"}
115
+ )
116
+
117
+ prices_2030 = dataset_departements_housing_prices[
118
+ dataset_departements_housing_prices["annee"] == selected_year_2
119
+ ][["code_departement", "prixm2moyen"]].rename(
120
+ columns={"prixm2moyen": "prixm2moyen_2030"}
121
+ )
122
+
123
+ # 2. Merge them based on department code
124
+ # Use an outer merge to keep departments even if they are missing data for one of the years
125
+ variation_data = pd.merge(prices_2024, prices_2030, on="code_departement", how="outer")
126
+
127
+ # 3. Calculate absolute difference and percentage variation
128
+ variation_data["prixm2moyen_difference"] = (
129
+ variation_data["prixm2moyen_2030"] - variation_data["prixm2moyen_2024"]
130
+ )
131
+ variation_data["variation_percentage"] = (
132
+ variation_data["prixm2moyen_difference"] / variation_data["prixm2moyen_2024"]
133
+ ) * 100
134
+
135
+ # Handle cases where prixm2moyen_2024 might be 0 or NaN, leading to Inf or NaN percentage
136
+ variation_data.replace([float("inf"), -float("inf")], pd.NA, inplace=True)
137
+ variation_data.fillna(
138
+ 0, inplace=True
139
+ ) # Or pd.NA for missing values depending on how you want to display
140
+
141
+ # 4. Sort and select top N
142
+ # Sort by the absolute difference for "highest variation"
143
+ # If "highest variation" means biggest absolute change (up or down), use abs()
144
+ # If it means biggest increase, sort by prixm2moyen_difference directly
145
+ top_departements_variation = variation_data.sort_values(
146
+ by="prixm2moyen_difference",
147
+ ascending=False, # For highest increase
148
+ # by="prixm2moyen_difference".abs(), ascending=False # For highest absolute change (increase or decrease)
149
+ ).head(7)
150
+
151
+ # add the department name to the dataframe
152
+ top_departements_variation = top_departements_variation.assign(
153
+ nom_departement=lambda x: x["code_departement"].apply(
154
+ lambda code: insee_df[
155
+ _format_department_code(insee_df)["code_departement"] == code
156
+ ]["nom_departement"].values[0]
157
+ if code in _format_department_code(insee_df)["code_departement"].values
158
+ else "Unknown"
159
+ )
160
+ )
161
+
162
+ # Select and reorder columns for display
163
+ display_cols = [
164
+ "nom_departement",
165
+ "code_departement",
166
+ "prixm2moyen_2024",
167
+ "prixm2moyen_2030",
168
+ "prixm2moyen_difference",
169
+ "variation_percentage",
170
+ ]
171
+ top_departements_variation_display = top_departements_variation[display_cols].copy()
172
+
173
+ # Format the numerical columns for better display in Streamlit
174
+ top_departements_variation_display["prixm2moyen_2024"] = (
175
+ top_departements_variation_display["prixm2moyen_2024"].map("{:,.0f}€".format)
176
+ )
177
+ top_departements_variation_display["prixm2moyen_2030"] = (
178
+ top_departements_variation_display["prixm2moyen_2030"].map("{:,.0f}€".format)
179
+ )
180
+ top_departements_variation_display["prixm2moyen_difference"] = (
181
+ top_departements_variation_display["prixm2moyen_difference"].map("{:,.0f}€".format)
182
+ )
183
+ top_departements_variation_display = top_departements_variation_display.sort_values(
184
+ by="variation_percentage", ascending=False
185
+ )
186
+
187
+ fig = px.bar(
188
+ top_departements_variation_display.sort_values(
189
+ by="variation_percentage", ascending=True
190
+ ),
191
+ x="nom_departement",
192
+ y="variation_percentage",
193
+ title=f"Top Departments by Price Variation ({selected_year_1} to {selected_year_2})",
194
+ labels={
195
+ "nom_departement": "Department Name",
196
+ "variation_percentage": "Price Variation (%)",
197
+ },
198
+ color_continuous_scale=px.colors.sequential.Reds,
199
+ )
200
+ st.plotly_chart(fig, use_container_width=True)
201
+
202
+ top_departements_variation_display["variation_percentage"] = (
203
+ top_departements_variation_display["variation_percentage"].map("{:,.2f}%".format)
204
+ )
205
+
206
+ st.dataframe(
207
+ top_departements_variation_display,
208
+ hide_index=True,
209
+ # You can explicitly set column headers if you want more user-friendly names
210
+ column_config={
211
+ "nom_departement": "Department Name",
212
+ "code_departement": "Dept. Code",
213
+ "prixm2moyen_2024": "Avg. Price 2024",
214
+ "prixm2moyen_2030": "Avg. Price 2030",
215
+ "prixm2moyen_difference": "Abs. Change (€)",
216
+ "variation_percentage": "Change (%)",
217
+ },
218
+ )
219
+
220
+ ###############################################################################
221
+
222
+ st.subheader("Top & Bottom 5 Departments", divider=True)
223
+
224
+ st.write(
225
+ "Select a year to view the top and bottom departments by average price per square meter."
226
+ )
227
+
228
  selected_year = st.selectbox(
229
  "Select a Year",
230
  options=dataset_departements_housing_prices["annee"].unique(),
231
  format_func=lambda x: f"{x}",
232
+ index=list(dataset_departements_housing_prices["annee"].unique()).index(2024)
233
+ if 2024 in dataset_departements_housing_prices["annee"].unique()
234
+ else 0,
235
  )
236
 
237
  top_departements = (
 
259
  )
260
  }
261
  )
262
+ .head(5)
263
+ )
264
+
265
+ bottom_departements = (
266
+ dataset_departements_housing_prices[
267
+ (dataset_departements_housing_prices["annee"] == selected_year)
268
+ & (dataset_departements_housing_prices["prixm2moyen"] > 0)
269
+ ]
270
+ .groupby("code_departement")["prixm2moyen"]
271
+ .mean()
272
+ .reset_index()
273
+ .sort_values(by="prixm2moyen", ascending=True)
274
+ .rename(
275
+ columns={
276
+ "code_departement": "Department Code",
277
+ "prixm2moyen": "Average Price per m²",
278
+ }
279
+ )
280
  .assign(
281
  **{
282
+ "Department Name": lambda x: x["Department Code"].apply(
283
+ lambda code: insee_df[
284
+ _format_department_code(insee_df)["code_departement"] == code
285
+ ]["nom_departement"].values[0]
286
+ if code in _format_department_code(insee_df)["code_departement"].values
287
+ else "Unknown"
288
  )
289
  }
290
  )
291
+ .head(5)
292
  )
293
+ left_co, right_co = st.columns(2)
294
 
295
+ with left_co:
296
+ fig = px.bar(
297
+ bottom_departements,
298
+ x="Department Name",
299
+ y="Average Price per ",
300
+ title="Bottom 5 Departments by Average Price per Square Meter",
301
+ )
302
+ st.plotly_chart(fig)
303
+ st.dataframe(
304
+ bottom_departements.assign(
305
+ **{
306
+ "Average Price per m²": lambda x: x["Average Price per m²"].apply(
307
+ lambda y: f"{y:,.2f} €"
308
+ )
309
+ }
310
+ ),
311
+ hide_index=True,
312
+ column_order=("Department Code", "Department Name", "Average Price per m²"),
313
+ column_config={
314
+ "Average Price per m²": {"formatter": "currency", "currency": "EUR"}
315
+ },
316
+ )
317
 
318
+ with right_co:
319
+ # display a bar chart of the top_departements
320
+ fig = px.bar(
321
+ top_departements.sort_values(by="Average Price per m²", ascending=True),
322
+ x="Department Name",
323
+ y="Average Price per m²",
324
+ title="Top 5 Departments by Average Price per Square Meter",
325
+ )
326
+ st.plotly_chart(fig)
327
+ st.dataframe(
328
+ top_departements.assign(
329
+ **{
330
+ "Average Price per m²": lambda x: x["Average Price per m²"].apply(
331
+ lambda y: f"{y:,.2f} €"
332
+ )
333
+ }
334
+ ),
335
+ hide_index=True,
336
+ column_order=("Department Code", "Department Name", "Average Price per m²"),
337
+ column_config={
338
+ "Average Price per m²": {"formatter": "currency", "currency": "EUR"}
339
+ },
340
+ )
341
+
342
+ ###############################################################################
343
+
344
+ st.subheader("Select Department(s) to View Historical Prices", divider=True)
345
 
346
  with st.spinner("Loading data and preparing maps..."):
347
  (dataset_housing_prices, communes_geojson, insee_df) = (
 
353
  ]["prixm2moyen"].min()
354
  max_global_commune_prixm2moyen = dataset_housing_prices["prixm2moyen"].max()
355
 
356
+ # Get all unique department codes for selectbox options
357
+ all_departement_codes = _format_department_code(insee_df)["code_departement"].unique()
358
+
359
+ # --- Department Selection 1 & 2 ---
360
+ col_dept1, col_dept2 = st.columns(2)
361
+
362
+ # Set default department values
363
+ default_dept_1 = all_departement_codes[0] if len(all_departement_codes) > 0 else None
364
+ default_dept_2 = all_departement_codes[1] if len(all_departement_codes) > 1 else None
365
+
366
+ with col_dept1:
367
+ selected_departement = st.selectbox(
368
+ "Select the first Department",
369
+ options=all_departement_codes,
370
+ format_func=lambda x: f"{x} - {insee_df[insee_df['code_departement'] == x]['nom_departement'].values[0]}"
371
+ if x in insee_df["code_departement"].values
372
+ else x,
373
+ key="departement_select_1",
374
+ index=list(all_departement_codes).index(default_dept_1)
375
+ if default_dept_1
376
+ else 0,
377
+ )
378
+
379
+ with col_dept2:
380
+ selected_departement_2 = st.selectbox(
381
+ "Select the second Department (Optional for comparison)",
382
+ options=[None] + list(all_departement_codes), # Add None option
383
+ format_func=lambda x: f"{x} - {insee_df[insee_df['code_departement'] == x]['nom_departement'].values[0]}"
384
+ if x is not None and x in insee_df["code_departement"].values
385
+ else "None (Only show Department 1)",
386
+ key="departement_select_2",
387
+ index=list([None] + list(all_departement_codes)).index(default_dept_2)
388
+ if default_dept_2
389
+ else 0,
390
+ )
391
+
392
+ st.write(
393
+ "This chart shows the average price per square meter in the selected department(s) over the years, with a focus on climatic events."
394
+ )
395
+
396
+ # --- Data Preparation for Department Chart ---
397
+ all_departments_to_plot = []
398
+
399
+ # Process Department 1 data
400
+ if selected_departement: # Ensure a department is selected
401
+ department_data_1 = dataset_housing_prices[
402
+ dataset_housing_prices["code_departement"] == selected_departement
403
+ ].copy()
404
+ if not department_data_1.empty:
405
+ department_data_1["annee"] = department_data_1["annee"].astype(str)
406
+ # Group by year and calculate mean for the department
407
+ department_data_1 = (
408
+ department_data_1.groupby("annee")["prixm2moyen"].mean().reset_index()
409
+ )
410
+ # Get the department name for the legend
411
+ departement_name_1 = (
412
+ insee_df[insee_df["code_departement"] == selected_departement][
413
+ "nom_departement"
414
+ ].values[0]
415
+ if selected_departement in insee_df["code_departement"].values
416
+ else selected_departement
417
+ )
418
+ department_data_1.rename(
419
+ columns={"prixm2moyen": departement_name_1}, inplace=True
420
+ )
421
+ all_departments_to_plot.append(department_data_1)
422
+ else:
423
+ st.warning(f"No data available for Department 1: {selected_departement}")
424
+
425
+ # Process Department 2 data if selected
426
+ if (
427
+ selected_departement_2 and selected_departement_2 != selected_departement
428
+ ): # Ensure a valid second department is chosen and it's not the same as the first
429
+ department_data_2 = dataset_housing_prices[
430
+ dataset_housing_prices["code_departement"] == selected_departement_2
431
+ ].copy()
432
+ if not department_data_2.empty:
433
+ department_data_2["annee"] = department_data_2["annee"].astype(str)
434
+ # Group by year and calculate mean for the department
435
+ department_data_2 = (
436
+ department_data_2.groupby("annee")["prixm2moyen"].mean().reset_index()
437
+ )
438
+ # Get the department name for the legend
439
+ departement_name_2 = (
440
+ insee_df[insee_df["code_departement"] == selected_departement_2][
441
+ "nom_departement"
442
+ ].values[0]
443
+ if selected_departement_2 in insee_df["code_departement"].values
444
+ else selected_departement_2
445
+ )
446
+ department_data_2.rename(
447
+ columns={"prixm2moyen": departement_name_2}, inplace=True
448
+ )
449
+ all_departments_to_plot.append(department_data_2)
450
+ else:
451
+ st.warning(f"No data available for Department 2: {selected_departement_2}")
452
+ elif (
453
+ selected_departement_2 == selected_departement
454
+ and selected_departement_2 is not None
455
+ ):
456
+ st.info("You've selected the same department for both. Showing only one line.")
457
+
458
+
459
+ # Combine dataframes for plotting the department comparison chart
460
+ if all_departments_to_plot:
461
+ combined_dept_df = reduce(
462
+ lambda left, right: pd.merge(left, right, on="annee", how="outer"),
463
+ all_departments_to_plot,
464
+ )
465
+ combined_dept_df.set_index("annee", inplace=True)
466
+
467
+ fig_dept = px.line(
468
+ combined_dept_df.reset_index(),
469
+ x="annee",
470
+ y=combined_dept_df.columns,
471
+ title="Average Price per m² in Selected Department(s) Over the Years",
472
+ labels={"annee": "Year", "value": "Average Price per m² (€)"},
473
+ )
474
+ fig_dept.update_layout(
475
+ xaxis_title="Year",
476
+ yaxis_title="Average Price per m² (€)",
477
+ legend_title_text="Department",
478
+ )
479
+ st.plotly_chart(fig_dept, use_container_width=True)
480
+ else:
481
+ st.info("Please select at least one department to display data.")
482
+
483
+ selected_departement_label = (
484
+ f"{insee_df[insee_df['code_departement'] == selected_departement]['nom_departement'].values[0]}"
485
+ if selected_departement in insee_df["code_departement"].values
486
+ else selected_departement
487
+ )
488
+
489
+ # Prepare data for box plot
490
+ box_plot_data = dataset_housing_prices[
491
+ (dataset_housing_prices["code_departement"] == selected_departement)
492
+ | (dataset_housing_prices["code_departement"] == selected_departement_2)
493
+ ].copy()
494
+ box_plot_data["annee"] = box_plot_data["annee"].astype(
495
+ str
496
+ ) # Ensure 'annee' is string for categorical x-axis
497
+ # Create the box plot
498
+ fig_box = px.box(
499
+ box_plot_data,
500
+ x="annee",
501
+ y="prixm2moyen",
502
+ color="code_departement",
503
+ title=f"Distribution of Prices in Department {selected_departement_label} by Year",
504
+ )
505
+ fig_box.update_layout(
506
+ xaxis_title="Year",
507
+ yaxis_title="Average Price per m² (€)",
508
+ )
509
+ st.plotly_chart(fig_box, use_container_width=True)
510
+
511
+ ###############################################################################
512
+
513
+ # display the top 5 communes in the selected department by average price per square meter
514
+ st.subheader(
515
+ f"Top and Bottom 5 Communes in Department {selected_departement_label}",
516
+ divider=True,
517
+ )
518
+
519
+ # selected year
520
+ selected_year_communes = st.selectbox(
521
+ "Select a Year",
522
+ options=dataset_housing_prices["annee"].unique(),
523
+ format_func=lambda x: str(x),
524
+ key="year_communes_selectbox",
525
+ index=list(dataset_housing_prices["annee"].unique()).index(2029) if 2029 in dataset_housing_prices["annee"].unique()
526
+ else 0,
527
+ )
528
+
529
+ top_communes = (
530
+ dataset_housing_prices[
531
+ (dataset_housing_prices["code_departement"] == selected_departement)
532
+ & (dataset_housing_prices["annee"] == selected_year_communes)
533
+ ]
534
+ .groupby("code_commune_insee")["prixm2moyen"]
535
+ .mean()
536
+ .reset_index()
537
+ .sort_values(by="prixm2moyen", ascending=False)
538
+ .rename(
539
+ columns={
540
+ "code_commune_insee": "Commune Code",
541
+ "prixm2moyen": "Average Price per m²",
542
+ }
543
+ )
544
+ .assign(
545
+ **{
546
+ "Commune Name": lambda x: x["Commune Code"].apply(
547
+ lambda code: insee_df[
548
+ _format_department_code(insee_df)["code_commune_INSEE"] == code
549
+ ]["nom_commune_complet"].values[0]
550
+ if code
551
+ in _format_department_code(insee_df)["code_commune_INSEE"].values
552
+ else "Unknown"
553
+ )
554
+ }
555
+ )
556
+ .head(5)
557
  )
558
+ bottom_communes = (
559
+ dataset_housing_prices[
560
+ (dataset_housing_prices["code_departement"] == selected_departement)
561
+ & (dataset_housing_prices["annee"] == selected_year_communes)
562
+ & (dataset_housing_prices["prixm2moyen"] > 0)
563
+ ]
564
+ .groupby("code_commune_insee")["prixm2moyen"]
565
+ .mean()
566
+ .reset_index()
567
+ .sort_values(by="prixm2moyen", ascending=True)
568
+ .rename(
569
+ columns={
570
+ "code_commune_insee": "Commune Code",
571
+ "prixm2moyen": "Average Price per m²",
572
+ }
573
+ )
574
+ .assign(
575
+ **{
576
+ "Commune Name": lambda x: x["Commune Code"].apply(
577
+ lambda code: insee_df[
578
+ _format_department_code(insee_df)["code_commune_INSEE"] == code
579
+ ]["nom_commune_complet"].values[0]
580
+ if code
581
+ in _format_department_code(insee_df)["code_commune_INSEE"].values
582
+ else "Unknown"
583
+ )
584
+ }
585
+ )
586
+ .head(5)
587
+ )
588
+ left_co, right_co = st.columns(2)
589
+ with left_co:
590
+ fig = px.bar(
591
+ bottom_communes.sort_values(by="Average Price per m²", ascending=True),
592
+ x="Commune Name",
593
+ y="Average Price per m²",
594
+ title="Bottom 5 Communes by Average Price per Square Meter",
595
+ )
596
+ st.plotly_chart(fig)
597
+ st.dataframe(
598
+ bottom_communes.assign(
599
+ **{
600
+ "Average Price per m²": lambda x: x["Average Price per m²"].apply(
601
+ lambda y: f"{y:,.2f} €"
602
+ )
603
+ }
604
+ ),
605
+ hide_index=True,
606
+ column_order=("Commune Code", "Commune Name", "Average Price per m²"),
607
+ )
608
+
609
+ with right_co:
610
+ fig = px.bar(
611
+ top_communes.sort_values(by="Average Price per m²", ascending=True),
612
+ x="Commune Name",
613
+ y="Average Price per m²",
614
+ title="Top 5 Communes by Average Price per Square Meter",
615
+ )
616
+ st.plotly_chart(fig)
617
+ st.dataframe(
618
+ top_communes.assign(
619
+ **{
620
+ "Average Price per m²": lambda x: x["Average Price per m²"].apply(
621
+ lambda y: f"{y:,.2f} €"
622
+ )
623
+ }
624
+ ),
625
+ hide_index=True,
626
+ column_order=("Commune Code", "Commune Name", "Average Price per m²"),
627
+ )
628
 
629
+ ###############################################################################
630
+
631
+ st.subheader("Average Price per Square Meter in French Communes", divider=True)
632
  fig_department = display_choropleth_map_for_department(
633
  dataset_housing_prices,
634
  selected_departement,
635
  communes_geojson,
636
  min_global_commune_prixm2moyen,
637
  max_global_commune_prixm2moyen,
638
+ title=f"Average Price per Square Meter in Department {selected_departement_label} (Animated by Year)",
639
+ height_graph=1000,
 
 
640
  width_graph=1400,
641
  )
 
 
642
  st.plotly_chart(fig_department, use_container_width=False)
 
643
  st.write(
644
  "Missing values are represented in light grey, while actual data is shown in a gradient from red (high prices) to green (low prices)."
645
  )
646
+
647
+ ###############################################################################
648
+
649
+ st.subheader(
650
+ f"Historical Price comparaison in Selected Commune in Departement {selected_departement_label}",
651
+ divider=True,
652
  )
653
+
654
+ available_communes = dataset_housing_prices[
655
+ dataset_housing_prices["code_departement"] == selected_departement
656
+ ]["code_commune_insee"].unique()
657
+
658
+ let_col1, right_col2 = st.columns(2)
659
+ with let_col1:
660
+ # --- Commune Selection 1 ---
661
+ selected_commune_1 = st.selectbox(
662
+ "Select the first Commune",
663
+ options=available_communes,
664
+ format_func=lambda x: f"{x} - {insee_df[insee_df['code_commune_INSEE'] == x]['nom_commune_complet'].values[0]}"
665
+ if x in insee_df["code_commune_INSEE"].values
666
+ else x,
667
+ key="commune_select_1",
668
+ index=0 if len(available_communes) > 1 else 0,
669
+ )
670
+ with right_col2:
671
+ # --- Commune Selection 2 ---
672
+ selected_commune_2 = st.selectbox(
673
+ "Select the second Commune (Optional for comparison)",
674
+ options=[None]
675
+ + list(
676
+ available_communes
677
+ ), # Add None as an option to not select a second commune
678
+ format_func=lambda x: f"{x} - {insee_df[insee_df['code_commune_INSEE'] == x]['nom_commune_complet'].values[0]}"
679
+ if x is not None and x in insee_df["code_commune_INSEE"].values
680
+ else "None (Only show Commune 1)",
681
+ key="commune_select_2",
682
+ index=2 if len(available_communes) > 2 else 0,
683
+ )
684
+
685
+ # --- Data Preparation for Chart ---
686
+ all_communes_to_plot = []
687
+
688
+ # Process Commune 1 data
689
+ commune_data_1 = dataset_housing_prices[
690
+ dataset_housing_prices["code_commune_insee"] == selected_commune_1
691
+ ].copy()
692
+ if not commune_data_1.empty:
693
+ commune_data_1["annee"] = commune_data_1["annee"].astype(str)
694
+ commune_data_1 = commune_data_1.groupby("annee")["prixm2moyen"].mean().reset_index()
695
+ # Rename the price column to reflect the commune for the legend
696
+ commune_name_1 = (
697
+ insee_df[insee_df["code_commune_INSEE"] == selected_commune_1][
698
+ "nom_commune_complet"
699
+ ].values[0]
700
+ if selected_commune_1 in insee_df["code_commune_INSEE"].values
701
+ else selected_commune_1
702
+ )
703
+ commune_data_1.rename(columns={"prixm2moyen": commune_name_1}, inplace=True)
704
+ all_communes_to_plot.append(commune_data_1)
705
+ else:
706
+ st.warning(f"No data available for Commune 1: {selected_commune_1}")
707
+
708
+
709
+ # Process Commune 2 data if selected
710
+ if (
711
+ selected_commune_2 and selected_commune_2 != selected_commune_1
712
+ ): # Ensure a valid second commune is chosen and it's not the same as the first
713
+ commune_data_2 = dataset_housing_prices[
714
+ dataset_housing_prices["code_commune_insee"] == selected_commune_2
715
+ ].copy()
716
+ if not commune_data_2.empty:
717
+ commune_data_2["annee"] = commune_data_2["annee"].astype(str)
718
+ commune_data_2 = (
719
+ commune_data_2.groupby("annee")["prixm2moyen"].mean().reset_index()
720
+ )
721
+ # Rename the price column for the second commune
722
+ commune_name_2 = (
723
+ insee_df[insee_df["code_commune_INSEE"] == selected_commune_2][
724
+ "nom_commune_complet"
725
+ ].values[0]
726
+ if selected_commune_2 in insee_df["code_commune_INSEE"].values
727
+ else selected_commune_2
728
+ )
729
+ commune_data_2.rename(columns={"prixm2moyen": commune_name_2}, inplace=True)
730
+ all_communes_to_plot.append(commune_data_2)
731
+ else:
732
+ st.warning(f"No data available for Commune 2: {selected_commune_2}")
733
+ elif selected_commune_2 == selected_commune_1 and selected_commune_2 is not None:
734
+ st.info("You've selected the same commune for both. Showing only one line.")
735
+
736
+ # Combine dataframes for plotting
737
+ if all_communes_to_plot:
738
+ # Use reduce or pd.merge to combine, ensuring 'annee' is the common key
739
+
740
+ # Start with the first dataframe, then merge others
741
+ combined_df = reduce(
742
+ lambda left, right: pd.merge(left, right, on="annee", how="outer"),
743
+ all_communes_to_plot,
744
+ )
745
+ combined_df.set_index("annee", inplace=True)
746
+
747
+ st.line_chart(
748
+ combined_df,
749
+ use_container_width=True,
750
+ height=400,
751
+ x_label="Year",
752
+ y_label="Average Price per m² (€)",
753
+ )
754
+ else:
755
+ st.info("Please select at least one commune to display data.")