Nicolas Pierson commited on
Commit
95ce22b
·
unverified ·
1 Parent(s): 6c44bcf
Dockerfile CHANGED
@@ -23,7 +23,7 @@ COPY --chown=user .streamlit/ ./.streamlit/
23
  COPY --chown=user src/ ./src/
24
  COPY --chown=user src/pages/1_Historical_Prices.py ./src/pages/1_Historical_Prices.py
25
  COPY --chown=user src/pages/2_Prediction_Prices.py ./src/pages/2_Prediction_Prices.py
26
- COPY --chown=user src/pages/3_Historical_Risk_Score_Fire.py ./src/pages/3_Historical_Risk_Score_Fire.py
27
 
28
  EXPOSE 8501
29
 
 
23
  COPY --chown=user src/ ./src/
24
  COPY --chown=user src/pages/1_Historical_Prices.py ./src/pages/1_Historical_Prices.py
25
  COPY --chown=user src/pages/2_Prediction_Prices.py ./src/pages/2_Prediction_Prices.py
26
+ COPY --chown=user src/pages/4_Historical_Risk_Score_Fire.py ./src/pages/4_Historical_Risk_Score_Fire.py
27
 
28
  EXPOSE 8501
29
 
src/pages/0_Summary.py ADDED
@@ -0,0 +1,636 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import asyncio
3
+ import pandas as pd
4
+ from functools import reduce
5
+ import plotly.express as px
6
+
7
+ from pages.utils.utils import (
8
+ async_load_file_s3,
9
+ async_load_geojson_from_s3,
10
+ _format_department_code,
11
+ async_load_file_s3_gzip,
12
+ )
13
+
14
+ from pages.utils.graphs import (
15
+ display_choropleth_map_country,
16
+ display_choropleth_map_for_department,
17
+ )
18
+
19
+
20
+ # Internal async function to gather all data loading tasks
21
+ async def _load_all_data_async_internal_departements():
22
+ departements_df_task = async_load_file_s3(
23
+ "processed/housing/dataset_departements_housing_prices.csv"
24
+ )
25
+ departements_geojson_task = async_load_geojson_from_s3(
26
+ "processed/referentiel/departements.geojson"
27
+ )
28
+ insee_task = async_load_file_s3("processed/referentiel/ref_espace_communes.csv")
29
+ risks_df_task = async_load_file_s3_gzip(
30
+ "processed/risk-scores/risk-scores-departements-final.csv.gz"
31
+ )
32
+
33
+ return await asyncio.gather(
34
+ departements_df_task, departements_geojson_task, insee_task, risks_df_task
35
+ )
36
+
37
+
38
+ # Internal async function to gather all data loading tasks
39
+ async def _load_all_data_async_internal_communes():
40
+ communes_df_task = async_load_file_s3(
41
+ "processed/housing/dataset_housing_prices.csv"
42
+ )
43
+ communes_geojson_task = async_load_geojson_from_s3(
44
+ "processed/referentiel/communes.geojson"
45
+ )
46
+ risks_df_task = async_load_file_s3_gzip(
47
+ "processed/risk-scores/risk-scores-final.csv.gz"
48
+ )
49
+ return await asyncio.gather(communes_df_task, communes_geojson_task, risks_df_task)
50
+
51
+
52
+ @st.cache_resource
53
+ def load_all_data_wrapper_summary_country():
54
+ return asyncio.run(_load_all_data_async_internal_departements())
55
+
56
+
57
+ @st.cache_resource
58
+ def load_all_data_wrapper_summary_region():
59
+ return asyncio.run(_load_all_data_async_internal_communes())
60
+
61
+
62
+ # --- Streamlit App Layout ---
63
+ st.set_page_config(page_title="Oasis - Summary", page_icon=":money_with_wings:", layout="wide")
64
+
65
+ ###############################################################################
66
+
67
+ st.header("Summary of Historical \"Good places\"")
68
+
69
+ with st.spinner("Loading data and preparing maps..."):
70
+ (
71
+ dataset_departements_housing_prices,
72
+ departements_geojson,
73
+ insee_df,
74
+ dataset_departements_risks,
75
+ ) = load_all_data_wrapper_summary_country()
76
+
77
+ # merge risks with housing prices (code_departement and annee)
78
+ dataset_departements_risks = _format_department_code(dataset_departements_risks)
79
+ dataset_departements_housing_prices = dataset_departements_housing_prices.merge(
80
+ dataset_departements_risks[["code_departement", "annee", "avg_risk_score"]],
81
+ on=["code_departement", "annee"],
82
+ how="left",
83
+ )
84
+ # scale the prixm2moyen to a range of 0-1 for better visualization
85
+ dataset_departements_housing_prices["prixm2moyen"] = (
86
+ dataset_departements_housing_prices["prixm2moyen"] - dataset_departements_housing_prices["prixm2moyen"].min()
87
+ ) / (
88
+ dataset_departements_housing_prices["prixm2moyen"].max()
89
+ - dataset_departements_housing_prices["prixm2moyen"].min()
90
+ )
91
+
92
+ # scale the avg_risk_score to a range of 0-1 for better visualization
93
+ dataset_departements_housing_prices["avg_risk_score"] = (
94
+ dataset_departements_housing_prices["avg_risk_score"] - dataset_departements_housing_prices["avg_risk_score"].min()
95
+ ) / (
96
+ dataset_departements_housing_prices["avg_risk_score"].max()
97
+ - dataset_departements_housing_prices["avg_risk_score"].min()
98
+ )
99
+ # combine prixm2moyen and avg_risk_score into a single column for visualization
100
+ dataset_departements_housing_prices["combined_score"] = (
101
+ dataset_departements_housing_prices["prixm2moyen"]
102
+ * dataset_departements_housing_prices["avg_risk_score"]
103
+ )
104
+
105
+ st.subheader(
106
+ "The summary between prices and global risks in France", divider=True
107
+ )
108
+ st.write(
109
+ "This map summarizes the relationship between real estate prices and global risks in French departments."
110
+ )
111
+
112
+ fig_france = display_choropleth_map_country(
113
+ dataset_departements_housing_prices,
114
+ departements_geojson,
115
+ metric_name="combined_score",
116
+ metric_description="Summary of the relationship between prices and risks",
117
+ red_gradient=True,
118
+ )
119
+ st.plotly_chart(fig_france, use_container_width=False)
120
+ st.write(
121
+ "Missing values are represented in light grey, while actual data is shown in a gradient from red (high prices) to green (low prices)."
122
+ )
123
+
124
+ ###############################################################################
125
+
126
+ st.subheader("Top & Bottom 5 Departments", divider=True)
127
+
128
+ st.write("Select a year to view the top and bottom departments by combined score.")
129
+
130
+ selected_year = st.selectbox(
131
+ "Select a Year",
132
+ options=dataset_departements_housing_prices["annee"].unique(),
133
+ format_func=lambda x: f"{x}",
134
+ index=list(dataset_departements_housing_prices["annee"].unique()).index(2024) if 2024 in dataset_departements_housing_prices["annee"].unique() else 0
135
+ )
136
+
137
+ top_departements = (
138
+ dataset_departements_housing_prices[
139
+ dataset_departements_housing_prices["annee"] == selected_year
140
+ ]
141
+ .groupby("code_departement")["combined_score"]
142
+ .mean()
143
+ .reset_index()
144
+ .sort_values(by="combined_score", ascending=False)
145
+ .rename(
146
+ columns={
147
+ "code_departement": "Department Code",
148
+ "combined_score": "Combined score (price and global risk)",
149
+ }
150
+ )
151
+ .assign(
152
+ **{
153
+ "Department Name": lambda x: x["Department Code"].apply(
154
+ lambda code: insee_df[
155
+ _format_department_code(insee_df)["code_departement"] == code
156
+ ]["nom_departement"].values[0]
157
+ if code in _format_department_code(insee_df)["code_departement"].values
158
+ else "Unknown"
159
+ )
160
+ }
161
+ )
162
+ .head(5)
163
+ )
164
+
165
+ bottom_departements = (
166
+ dataset_departements_housing_prices[
167
+ (dataset_departements_housing_prices["annee"] == selected_year) & (dataset_departements_housing_prices["combined_score"] > 0)
168
+ ]
169
+ .groupby("code_departement")["combined_score"]
170
+ .mean()
171
+ .reset_index()
172
+ .sort_values(by="combined_score", ascending=True)
173
+ .rename(
174
+ columns={
175
+ "code_departement": "Department Code",
176
+ "combined_score": "Combined score (price and global risk)",
177
+ }
178
+ )
179
+ .assign(
180
+ **{
181
+ "Department Name": lambda x: x["Department Code"].apply(
182
+ lambda code: insee_df[
183
+ _format_department_code(insee_df)["code_departement"] == code
184
+ ]["nom_departement"].values[0]
185
+ if code in _format_department_code(insee_df)["code_departement"].values
186
+ else "Unknown"
187
+ )
188
+ }
189
+ )
190
+ .head(5)
191
+ )
192
+ left_co, right_co = st.columns(2)
193
+
194
+ with left_co:
195
+ fig = px.bar(
196
+ bottom_departements,
197
+ x="Department Name",
198
+ y="Combined score (price and global risk)",
199
+ title="Bottom 5 Departments",
200
+ )
201
+ st.plotly_chart(fig)
202
+ st.dataframe(
203
+ bottom_departements,
204
+ hide_index=True,
205
+ column_order=("Department Code", "Department Name", "Combined score (price and global risk)",),
206
+ )
207
+
208
+ with right_co:
209
+ # display a bar chart of the top_departements
210
+ fig = px.bar(
211
+ top_departements.sort_values(by="Combined score (price and global risk)", ascending=True),
212
+ x="Department Name",
213
+ y="Combined score (price and global risk)",
214
+ title="Top 5 Departments",
215
+ )
216
+ st.plotly_chart(fig)
217
+ st.dataframe(
218
+ top_departements,
219
+ hide_index=True,
220
+ column_order=("Department Code", "Department Name", "Combined score (price and global risk)",),
221
+ )
222
+
223
+ ###############################################################################
224
+
225
+ # st.subheader("Select Department(s) to View Historical Combined Scores", divider=True)
226
+
227
+ # with st.spinner("Loading data and preparing maps..."):
228
+ # (
229
+ # dataset_housing_prices,
230
+ # communes_geojson,
231
+ # dataset_risks,
232
+ # ) = load_all_data_wrapper_summary_region()
233
+
234
+
235
+ # # merge risks with housing prices (code_departement and annee)
236
+ # dataset_risks = _format_department_code(dataset_risks)
237
+ # dataset_housing_prices = dataset_housing_prices.merge(
238
+ # dataset_risks[["code_departement", "annee", "avg_risk_score"]],
239
+ # on=["code_departement", "annee"],
240
+ # how="left",
241
+ # )
242
+ # # scale the prixm2moyen to a range of 0-1 for better visualization
243
+ # dataset_housing_prices["prixm2moyen"] = (
244
+ # dataset_housing_prices["prixm2moyen"]
245
+ # - dataset_housing_prices["prixm2moyen"].min()
246
+ # ) / (
247
+ # dataset_housing_prices["prixm2moyen"].max()
248
+ # - dataset_housing_prices["prixm2moyen"].min()
249
+ # )
250
+
251
+ # # scale the avg_risk_score to a range of 0-1 for better visualization
252
+ # dataset_housing_prices["avg_risk_score"] = (
253
+ # dataset_housing_prices["avg_risk_score"]
254
+ # - dataset_housing_prices["avg_risk_score"].min()
255
+ # ) / (
256
+ # dataset_housing_prices["avg_risk_score"].max()
257
+ # - dataset_housing_prices["avg_risk_score"].min()
258
+ # )
259
+ # # combine prixm2moyen and avg_risk_score into a single column for visualization
260
+ # dataset_housing_prices["combined_score"] = (
261
+ # dataset_housing_prices["prixm2moyen"]
262
+ # * dataset_housing_prices["avg_risk_score"]
263
+ # )
264
+
265
+ # min_global_commune_avg_risk_score = dataset_housing_prices[
266
+ # dataset_housing_prices["combined_score"] > 0
267
+ # ]["combined_score"].min()
268
+ # max_global_commune_avg_risk_score = dataset_housing_prices["combined_score"].max()
269
+
270
+
271
+ # # Get all unique department codes for selectbox options
272
+ # all_departement_codes = _format_department_code(insee_df)["code_departement"].unique()
273
+
274
+ # # --- Department Selection 1 & 2 ---
275
+ # col_dept1, col_dept2 = st.columns(2)
276
+
277
+ # # Set default department values
278
+ # default_dept_1 = all_departement_codes[0] if len(all_departement_codes) > 0 else None
279
+ # default_dept_2 = all_departement_codes[1] if len(all_departement_codes) > 1 else None
280
+
281
+ # with col_dept1:
282
+ # selected_departement = st.selectbox(
283
+ # "Select the first Department",
284
+ # options=all_departement_codes,
285
+ # format_func=lambda x: f"{x} - {insee_df[insee_df['code_departement'] == x]['nom_departement'].values[0]}"
286
+ # if x in insee_df["code_departement"].values
287
+ # else x,
288
+ # key="departement_select_1",
289
+ # index=list(all_departement_codes).index(default_dept_1) if default_dept_1 else 0
290
+ # )
291
+
292
+ # with col_dept2:
293
+ # selected_departement_2 = st.selectbox(
294
+ # "Select the second Department (Optional for comparison)",
295
+ # options=[None] + list(all_departement_codes), # Add None option
296
+ # format_func=lambda x: f"{x} - {insee_df[insee_df['code_departement'] == x]['nom_departement'].values[0]}"
297
+ # if x is not None and x in insee_df["code_departement"].values
298
+ # else "None (Only show Department 1)",
299
+ # key="departement_select_2",
300
+ # index=list([None] + list(all_departement_codes)).index(default_dept_2) if default_dept_2 else 0
301
+ # )
302
+
303
+ # st.write("This chart shows the average price per square meter in the selected department(s) over the years, with a focus on climatic events.")
304
+
305
+ # # --- Data Preparation for Department Chart ---
306
+ # all_departments_to_plot = []
307
+
308
+ # # Process Department 1 data
309
+ # if selected_departement: # Ensure a department is selected
310
+ # department_data_1 = dataset_housing_prices[
311
+ # dataset_housing_prices["code_departement"] == selected_departement
312
+ # ].copy()
313
+ # if not department_data_1.empty:
314
+ # department_data_1["annee"] = department_data_1["annee"].astype(str)
315
+ # # Group by year and calculate mean for the department
316
+ # department_data_1 = department_data_1.groupby("annee")["combined_score"].mean().reset_index()
317
+ # # Get the department name for the legend
318
+ # departement_name_1 = (
319
+ # insee_df[insee_df["code_departement"] == selected_departement][
320
+ # "nom_departement"
321
+ # ].values[0]
322
+ # if selected_departement in insee_df["code_departement"].values
323
+ # else selected_departement
324
+ # )
325
+ # department_data_1.rename(columns={"combined_score": departement_name_1}, inplace=True)
326
+ # all_departments_to_plot.append(department_data_1)
327
+ # else:
328
+ # st.warning(f"No data available for Department 1: {selected_departement}")
329
+
330
+ # # Process Department 2 data if selected
331
+ # if (
332
+ # selected_departement_2 and selected_departement_2 != selected_departement
333
+ # ): # Ensure a valid second department is chosen and it's not the same as the first
334
+ # department_data_2 = dataset_housing_prices[
335
+ # dataset_housing_prices["code_departement"] == selected_departement_2
336
+ # ].copy()
337
+ # if not department_data_2.empty:
338
+ # department_data_2["annee"] = department_data_2["annee"].astype(str)
339
+ # # Group by year and calculate mean for the department
340
+ # department_data_2 = department_data_2.groupby("annee")["combined_score"].mean().reset_index()
341
+ # # Get the department name for the legend
342
+ # departement_name_2 = (
343
+ # insee_df[insee_df["code_departement"] == selected_departement_2][
344
+ # "nom_departement"
345
+ # ].values[0]
346
+ # if selected_departement_2 in insee_df["code_departement"].values
347
+ # else selected_departement_2
348
+ # )
349
+ # department_data_2.rename(columns={"combined_score": departement_name_2}, inplace=True)
350
+ # all_departments_to_plot.append(department_data_2)
351
+ # else:
352
+ # st.warning(f"No data available for Department 2: {selected_departement_2}")
353
+ # elif selected_departement_2 == selected_departement and selected_departement_2 is not None:
354
+ # st.info("You've selected the same department for both. Showing only one line.")
355
+
356
+
357
+ # # Combine dataframes for plotting the department comparison chart
358
+ # if all_departments_to_plot:
359
+ # combined_dept_df = reduce(
360
+ # lambda left, right: pd.merge(left, right, on="annee", how="outer"),
361
+ # all_departments_to_plot,
362
+ # )
363
+ # combined_dept_df.set_index("annee", inplace=True)
364
+
365
+ # fig_dept = px.line(
366
+ # combined_dept_df.reset_index(),
367
+ # x="annee",
368
+ # y=combined_dept_df.columns,
369
+ # title="Combined score (price and global risk)",
370
+ # labels={"annee": "Year", "value": "Combined score (price and global risk)"},
371
+ # )
372
+ # fig_dept.update_layout(
373
+ # xaxis_title="Year",
374
+ # yaxis_title="Combined score (price and global risk)",
375
+ # legend_title_text="Department",
376
+ # )
377
+ # st.plotly_chart(fig_dept, use_container_width=True)
378
+ # else:
379
+ # st.info("Please select at least one department to display data.")
380
+
381
+ # selected_departement_label = (f"{insee_df[insee_df['code_departement'] == selected_departement]['nom_departement'].values[0]}"
382
+ # if selected_departement in insee_df["code_departement"].values
383
+ # else selected_departement
384
+ # )
385
+
386
+ # # Prepare data for box plot
387
+ # box_plot_data = dataset_housing_prices[
388
+ # (dataset_housing_prices["code_departement"] == selected_departement)
389
+ # | (dataset_housing_prices["code_departement"] == selected_departement_2)
390
+ # ].copy()
391
+ # box_plot_data["annee"] = box_plot_data["annee"].astype(
392
+ # str
393
+ # ) # Ensure 'annee' is string for categorical x-axis
394
+ # # Create the box plot
395
+ # fig_box = px.box(
396
+ # box_plot_data,
397
+ # x="annee",
398
+ # y="combined_score",
399
+ # color="code_departement",
400
+ # title=f"Distribution of Prices in Department {selected_departement_label} by Year",
401
+ # )
402
+ # fig_box.update_layout(
403
+ # xaxis_title="Year",
404
+ # yaxis_title="Combined score (price and global risk)",
405
+ # )
406
+ # st.plotly_chart(fig_box, use_container_width=True)
407
+
408
+ # ###############################################################################
409
+
410
+ # # display the top 5 communes in the selected department
411
+ # st.subheader(
412
+ # f"Top and Bottom 5 Communes in Department {selected_departement_label}",
413
+ # divider=True,
414
+ # )
415
+
416
+ # # selected year
417
+ # selected_year_communes = st.selectbox(
418
+ # "Select a Year",
419
+ # options=dataset_housing_prices["annee"].unique(),
420
+ # format_func=lambda x: str(x),
421
+ # key="year_communes_selectbox",
422
+ # index=list(dataset_housing_prices["annee"].unique()).index(2024) if 2024 in dataset_housing_prices["annee"].unique() else 0
423
+ # )
424
+
425
+ # top_communes = (
426
+ # dataset_housing_prices[
427
+ # (dataset_housing_prices["code_departement"] == selected_departement)
428
+ # & (dataset_housing_prices["annee"] == selected_year_communes)
429
+ # ]
430
+ # .groupby("code_commune_insee")["combined_score"]
431
+ # .mean()
432
+ # .reset_index()
433
+ # .sort_values(by="combined_score", ascending=False)
434
+ # .rename(
435
+ # columns={
436
+ # "code_commune_insee": "Commune Code",
437
+ # "combined_score": "Combined score (price and global risk)",
438
+ # }
439
+ # )
440
+ # .assign(
441
+ # **{
442
+ # "Commune Name": lambda x: x["Commune Code"].apply(
443
+ # lambda code: insee_df[
444
+ # _format_department_code(insee_df)["code_commune_INSEE"] == code
445
+ # ]["nom_commune_complet"].values[0]
446
+ # if code
447
+ # in _format_department_code(insee_df)["code_commune_INSEE"].values
448
+ # else "Unknown"
449
+ # )
450
+ # }
451
+ # )
452
+ # .head(5)
453
+ # )
454
+ # bottom_communes = (
455
+ # dataset_housing_prices[
456
+ # (dataset_housing_prices["code_departement"] == selected_departement)
457
+ # & (dataset_housing_prices["annee"] == selected_year_communes)
458
+ # & (dataset_housing_prices["combined_score"] > 0)
459
+ # ]
460
+ # .groupby("code_commune_insee")["combined_score"]
461
+ # .mean()
462
+ # .reset_index()
463
+ # .sort_values(by="combined_score", ascending=True)
464
+ # .rename(
465
+ # columns={
466
+ # "code_commune_insee": "Commune Code",
467
+ # "combined_score": "Combined score (price and global risk)",
468
+ # }
469
+ # )
470
+ # .assign(
471
+ # **{
472
+ # "Commune Name": lambda x: x["Commune Code"].apply(
473
+ # lambda code: insee_df[
474
+ # _format_department_code(insee_df)["code_commune_INSEE"] == code
475
+ # ]["nom_commune_complet"].values[0]
476
+ # if code
477
+ # in _format_department_code(insee_df)["code_commune_INSEE"].values
478
+ # else "Unknown"
479
+ # )
480
+ # }
481
+ # )
482
+ # .head(5)
483
+ # )
484
+ # left_co, right_co = st.columns(2)
485
+ # with left_co:
486
+ # fig = px.bar(
487
+ # bottom_communes.sort_values(by="Combined score (price and global risk)", ascending=True),
488
+ # x="Commune Name",
489
+ # y="Combined score (price and global risk)",
490
+ # title="Bottom 5 Communes",
491
+ # )
492
+ # st.plotly_chart(fig)
493
+ # st.dataframe(
494
+ # bottom_communes,
495
+ # hide_index=True,
496
+ # column_order=("Commune Code", "Commune Name", "Combined score (price and global risk)"),
497
+ # )
498
+
499
+ # with right_co:
500
+ # fig = px.bar(
501
+ # top_communes.sort_values(by="Combined score (price and global risk)", ascending=True),
502
+ # x="Commune Name",
503
+ # y="Combined score (price and global risk)",
504
+ # title="Top 5 Communes",
505
+ # )
506
+ # st.plotly_chart(fig)
507
+ # st.dataframe(
508
+ # top_communes,
509
+ # hide_index=True,
510
+ # column_order=("Commune Code", "Commune Name", "Combined score (price and global risk)"),
511
+ # )
512
+
513
+ # ###############################################################################
514
+
515
+ # st.subheader(
516
+ # "Average Price per Square Meter in French Communes", divider=True
517
+ # )
518
+ # fig_department = display_choropleth_map_for_department(
519
+ # dataset_housing_prices,
520
+ # selected_departement,
521
+ # communes_geojson,
522
+ # min_global_commune_avg_risk_score,
523
+ # max_global_commune_avg_risk_score,
524
+ # title=f"Average Price per Square Meter in Department {selected_departement_label} (Animated by Year)",
525
+ # height_graph=1000,
526
+ # width_graph=1400,
527
+ # )
528
+ # st.plotly_chart(fig_department, use_container_width=False)
529
+ # st.write(
530
+ # "Missing values are represented in light grey, while actual data is shown in a gradient from red (high prices) to green (low prices)."
531
+ # )
532
+
533
+ # ###############################################################################
534
+
535
+ # st.subheader(f"Historical Price comparaison in Selected Commune in Departement {selected_departement_label}", divider=True)
536
+
537
+ # available_communes = dataset_housing_prices[
538
+ # dataset_housing_prices["code_departement"] == selected_departement
539
+ # ]["code_commune_insee"].unique()
540
+
541
+ # let_col1, right_col2 = st.columns(2)
542
+ # with let_col1:
543
+ # # --- Commune Selection 1 ---
544
+ # selected_commune_1 = st.selectbox(
545
+ # "Select the first Commune",
546
+ # options=available_communes,
547
+ # format_func=lambda x: f"{x} - {insee_df[insee_df['code_commune_INSEE'] == x]['nom_commune_complet'].values[0]}"
548
+ # if x in insee_df["code_commune_INSEE"].values
549
+ # else x,
550
+ # key="commune_select_1",
551
+ # index=0 if len(available_communes) > 1 else 0
552
+ # )
553
+ # with right_col2:
554
+ # # --- Commune Selection 2 ---
555
+ # selected_commune_2 = st.selectbox(
556
+ # "Select the second Commune (Optional for comparison)",
557
+ # options=[None]
558
+ # + list(available_communes), # Add None as an option to not select a second commune
559
+ # format_func=lambda x: f"{x} - {insee_df[insee_df['code_commune_INSEE'] == x]['nom_commune_complet'].values[0]}"
560
+ # if x is not None and x in insee_df["code_commune_INSEE"].values
561
+ # else "None (Only show Commune 1)",
562
+ # key="commune_select_2",
563
+ # index=2 if len(available_communes) > 2 else 0
564
+ # )
565
+
566
+ # # --- Data Preparation for Chart ---
567
+ # all_communes_to_plot = []
568
+
569
+ # # Process Commune 1 data
570
+ # commune_data_1 = dataset_housing_prices[
571
+ # dataset_housing_prices["code_commune_insee"] == selected_commune_1
572
+ # ].copy()
573
+ # if not commune_data_1.empty:
574
+ # commune_data_1["annee"] = commune_data_1["annee"].astype(str)
575
+ # commune_data_1 = commune_data_1.groupby("annee")["combined_score"].mean().reset_index()
576
+ # # Rename the price column to reflect the commune for the legend
577
+ # commune_name_1 = (
578
+ # insee_df[insee_df["code_commune_INSEE"] == selected_commune_1][
579
+ # "nom_commune_complet"
580
+ # ].values[0]
581
+ # if selected_commune_1 in insee_df["code_commune_INSEE"].values
582
+ # else selected_commune_1
583
+ # )
584
+ # commune_data_1.rename(columns={"combined_score": commune_name_1}, inplace=True)
585
+ # all_communes_to_plot.append(commune_data_1)
586
+ # else:
587
+ # st.warning(f"No data available for Commune 1: {selected_commune_1}")
588
+
589
+
590
+ # # Process Commune 2 data if selected
591
+ # if (
592
+ # selected_commune_2 and selected_commune_2 != selected_commune_1
593
+ # ): # Ensure a valid second commune is chosen and it's not the same as the first
594
+ # commune_data_2 = dataset_housing_prices[
595
+ # dataset_housing_prices["code_commune_insee"] == selected_commune_2
596
+ # ].copy()
597
+ # if not commune_data_2.empty:
598
+ # commune_data_2["annee"] = commune_data_2["annee"].astype(str)
599
+ # commune_data_2 = (
600
+ # commune_data_2.groupby("annee")["combined_score"].mean().reset_index()
601
+ # )
602
+ # # Rename the price column for the second commune
603
+ # commune_name_2 = (
604
+ # insee_df[insee_df["code_commune_INSEE"] == selected_commune_2][
605
+ # "nom_commune_complet"
606
+ # ].values[0]
607
+ # if selected_commune_2 in insee_df["code_commune_INSEE"].values
608
+ # else selected_commune_2
609
+ # )
610
+ # commune_data_2.rename(columns={"combined_score": commune_name_2}, inplace=True)
611
+ # all_communes_to_plot.append(commune_data_2)
612
+ # else:
613
+ # st.warning(f"No data available for Commune 2: {selected_commune_2}")
614
+ # elif selected_commune_2 == selected_commune_1 and selected_commune_2 is not None:
615
+ # st.info("You've selected the same commune for both. Showing only one line.")
616
+
617
+ # # Combine dataframes for plotting
618
+ # if all_communes_to_plot:
619
+ # # Use reduce or pd.merge to combine, ensuring 'annee' is the common key
620
+
621
+ # # Start with the first dataframe, then merge others
622
+ # combined_df = reduce(
623
+ # lambda left, right: pd.merge(left, right, on="annee", how="outer"),
624
+ # all_communes_to_plot,
625
+ # )
626
+ # combined_df.set_index("annee", inplace=True)
627
+
628
+ # st.line_chart(
629
+ # combined_df,
630
+ # use_container_width=True,
631
+ # height=400,
632
+ # x_label="Year",
633
+ # y_label="Combined score (price and global risk)",
634
+ # )
635
+ # else:
636
+ # st.info("Please select at least one commune to display data.")
src/pages/1_Historical_Prices.py CHANGED
@@ -26,12 +26,9 @@ async def _load_all_data_async_internal_departements():
26
  "processed/referentiel/departements.geojson"
27
  )
28
  insee_task = async_load_file_s3("processed/referentiel/ref_espace_communes.csv")
29
- risks_df_task = async_load_file_s3_gzip(
30
- "processed/risk-scores/risk-scores-departements-final.csv.gz"
31
- )
32
 
33
  return await asyncio.gather(
34
- departements_df_task, departements_geojson_task, insee_task, risks_df_task
35
  )
36
 
37
 
@@ -43,10 +40,7 @@ async def _load_all_data_async_internal_communes():
43
  communes_geojson_task = async_load_geojson_from_s3(
44
  "processed/referentiel/communes.geojson"
45
  )
46
- risks_df_task = async_load_file_s3_gzip(
47
- "processed/risk-scores/risk-scores-final.csv.gz"
48
- )
49
- return await asyncio.gather(communes_df_task, communes_geojson_task, risks_df_task)
50
 
51
 
52
  @st.cache_resource
@@ -71,7 +65,6 @@ with st.spinner("Loading data and preparing maps..."):
71
  dataset_departements_housing_prices,
72
  departements_geojson,
73
  insee_df,
74
- dataset_departements_risks,
75
  ) = load_all_data_wrapper_historical_prices_country()
76
 
77
  st.subheader(
@@ -339,7 +332,6 @@ with st.spinner("Loading data and preparing maps..."):
339
  (
340
  dataset_housing_prices,
341
  communes_geojson,
342
- dataset_risks,
343
  ) = load_all_data_wrapper_historical_prices_region()
344
 
345
  min_global_commune_prixm2moyen = dataset_housing_prices[
 
26
  "processed/referentiel/departements.geojson"
27
  )
28
  insee_task = async_load_file_s3("processed/referentiel/ref_espace_communes.csv")
 
 
 
29
 
30
  return await asyncio.gather(
31
+ departements_df_task, departements_geojson_task, insee_task
32
  )
33
 
34
 
 
40
  communes_geojson_task = async_load_geojson_from_s3(
41
  "processed/referentiel/communes.geojson"
42
  )
43
+ return await asyncio.gather(communes_df_task, communes_geojson_task)
 
 
 
44
 
45
 
46
  @st.cache_resource
 
65
  dataset_departements_housing_prices,
66
  departements_geojson,
67
  insee_df,
 
68
  ) = load_all_data_wrapper_historical_prices_country()
69
 
70
  st.subheader(
 
332
  (
333
  dataset_housing_prices,
334
  communes_geojson,
 
335
  ) = load_all_data_wrapper_historical_prices_region()
336
 
337
  min_global_commune_prixm2moyen = dataset_housing_prices[
src/pages/utils/graphs.py CHANGED
@@ -65,7 +65,7 @@ def display_choropleth_map_country(
65
  zoom=5,
66
  opacity=0.75,
67
  hover_name="code_departement",
68
- hover_data={metric_name: ":.0f", "annee": True},
69
  title=title,
70
  height=height,
71
  width=width,
 
65
  zoom=5,
66
  opacity=0.75,
67
  hover_name="code_departement",
68
+ hover_data={metric_name: ":.1f", "annee": True},
69
  title=title,
70
  height=height,
71
  width=width,