Spaces:

pradelf
/

oasis-web

Runtime error

App Files Files Community

oasis-web / src /pages /0_Summary.py

pradelf

Update src/pages/0_Summary.py

90f0bcb verified 3 months ago

raw

history blame contribute delete

24.7 kB

	import streamlit as st
	import asyncio
	import pandas as pd
	from functools import reduce
	import plotly.express as px

	from pages.utils.utils import (
	async_load_file_s3,
	async_load_geojson_from_s3,
	_format_department_code,
	async_load_file_s3_gzip,
	)

	from pages.utils.graphs import (
	display_choropleth_map_country,
	display_choropleth_map_for_department,
	)


	# Internal async function to gather all data loading tasks
	async def _load_all_data_async_internal_departements():
	departements_df_task = async_load_file_s3(
	"processed/housing/dataset_departements_prices.csv"
	)
	departements_geojson_task = async_load_geojson_from_s3(
	"processed/referentiel/departements.geojson"
	)
	insee_task = async_load_file_s3("processed/referentiel/ref_espace_communes.csv")
	risks_df_task = async_load_file_s3_gzip(
	"processed/risk-scores/risk-scores-departements-final.csv.gz"
	)

	return await asyncio.gather(
	departements_df_task, departements_geojson_task, insee_task, risks_df_task
	)


	# Internal async function to gather all data loading tasks
	async def _load_all_data_async_internal_communes():
	communes_df_task = async_load_file_s3(
	"processed/housing/dataset_housing_prices.csv"
	)
	communes_geojson_task = async_load_geojson_from_s3(
	"processed/referentiel/communes.geojson"
	)
	risks_df_task = async_load_file_s3_gzip(
	"processed/risk-scores/risk-scores-final.csv.gz"
	)
	return await asyncio.gather(communes_df_task, communes_geojson_task, risks_df_task)


	@st.cache_resource
	def load_all_data_wrapper_summary_country():
	return asyncio.run(_load_all_data_async_internal_departements())


	@st.cache_resource
	def load_all_data_wrapper_summary_region():
	return asyncio.run(_load_all_data_async_internal_communes())


	# --- Streamlit App Layout ---
	st.set_page_config(page_title="Oasis - Summary", page_icon=":money_with_wings:", layout="wide")

	###############################################################################

	st.header("Summary of Historical \"Good places\"")

	with st.spinner("Loading data and preparing maps..."):
	(
	dataset_departements_housing_prices,
	departements_geojson,
	insee_df,
	dataset_departements_risks,
	) = load_all_data_wrapper_summary_country()

	# merge risks with housing prices (code_departement and annee)
	dataset_departements_risks = _format_department_code(dataset_departements_risks)
	dataset_departements_housing_prices = dataset_departements_housing_prices.merge(
	dataset_departements_risks[["code_departement", "annee", "avg_risk_score"]],
	on=["code_departement", "annee"],
	how="left",
	)
	# scale the prixm2moyen to a range of 0-1 for better visualization
	dataset_departements_housing_prices["prixm2moyen"] = (
	dataset_departements_housing_prices["prixm2moyen"] - dataset_departements_housing_prices["prixm2moyen"].min()
	) / (
	dataset_departements_housing_prices["prixm2moyen"].max()
	- dataset_departements_housing_prices["prixm2moyen"].min()
	)

	# scale the avg_risk_score to a range of 0-1 for better visualization
	dataset_departements_housing_prices["avg_risk_score"] = (
	dataset_departements_housing_prices["avg_risk_score"] - dataset_departements_housing_prices["avg_risk_score"].min()
	) / (
	dataset_departements_housing_prices["avg_risk_score"].max()
	- dataset_departements_housing_prices["avg_risk_score"].min()
	)
	# combine prixm2moyen and avg_risk_score into a single column for visualization
	dataset_departements_housing_prices["combined_score"] = (
	dataset_departements_housing_prices["prixm2moyen"]
	* dataset_departements_housing_prices["avg_risk_score"]
	)

	st.subheader(
	"The summary between prices and global risks in France", divider=True
	)
	st.write(
	"This map summarizes the relationship between real estate prices and global risks in French departments."
	)

	fig_france = display_choropleth_map_country(
	dataset_departements_housing_prices,
	departements_geojson,
	metric_name="combined_score",
	metric_description="Summary of the relationship between prices and risks",
	red_gradient=True,
	)
	st.plotly_chart(fig_france, use_container_width=False)
	st.write(
	"Missing values are represented in light grey, while actual data is shown in a gradient from red (high prices) to green (low prices)."
	)

	###############################################################################

	st.subheader("Top & Bottom 5 Departments", divider=True)

	st.write("Select a year to view the top and bottom departments by combined score.")

	selected_year = st.selectbox(
	"Select a Year",
	options=dataset_departements_housing_prices["annee"].unique(),
	format_func=lambda x: f"{x}",
	index=list(dataset_departements_housing_prices["annee"].unique()).index(2024) if 2024 in dataset_departements_housing_prices["annee"].unique() else 0
	)

	top_departements = (
	dataset_departements_housing_prices[
	dataset_departements_housing_prices["annee"] == selected_year
	]
	.groupby("code_departement")["combined_score"]
	.mean()
	.reset_index()
	.sort_values(by="combined_score", ascending=False)
	.rename(
	columns={
	"code_departement": "Department Code",
	"combined_score": "Combined score (price and global risk)",
	}
	)
	.assign(
	**{
	"Department Name": lambda x: x["Department Code"].apply(
	lambda code: insee_df[
	_format_department_code(insee_df)["code_departement"] == code
	]["nom_departement"].values[0]
	if code in _format_department_code(insee_df)["code_departement"].values
	else "Unknown"
	)
	}
	)
	.head(5)
	)

	bottom_departements = (
	dataset_departements_housing_prices[
	(dataset_departements_housing_prices["annee"] == selected_year) & (dataset_departements_housing_prices["combined_score"] > 0)
	]
	.groupby("code_departement")["combined_score"]
	.mean()
	.reset_index()
	.sort_values(by="combined_score", ascending=True)
	.rename(
	columns={
	"code_departement": "Department Code",
	"combined_score": "Combined score (price and global risk)",
	}
	)
	.assign(
	**{
	"Department Name": lambda x: x["Department Code"].apply(
	lambda code: insee_df[
	_format_department_code(insee_df)["code_departement"] == code
	]["nom_departement"].values[0]
	if code in _format_department_code(insee_df)["code_departement"].values
	else "Unknown"
	)
	}
	)
	.head(5)
	)
	left_co, right_co = st.columns(2)

	with left_co:
	fig = px.bar(
	bottom_departements,
	x="Department Name",
	y="Combined score (price and global risk)",
	title="Bottom 5 Departments",
	)
	st.plotly_chart(fig)
	st.dataframe(
	bottom_departements,
	hide_index=True,
	column_order=("Department Code", "Department Name", "Combined score (price and global risk)",),
	)

	with right_co:
	# display a bar chart of the top_departements
	fig = px.bar(
	top_departements.sort_values(by="Combined score (price and global risk)", ascending=True),
	x="Department Name",
	y="Combined score (price and global risk)",
	title="Top 5 Departments",
	)
	st.plotly_chart(fig)
	st.dataframe(
	top_departements,
	hide_index=True,
	column_order=("Department Code", "Department Name", "Combined score (price and global risk)",),
	)

	###############################################################################

	# st.subheader("Select Department(s) to View Historical Combined Scores", divider=True)

	# with st.spinner("Loading data and preparing maps..."):
	# (
	# dataset_housing_prices,
	# communes_geojson,
	# dataset_risks,
	# ) = load_all_data_wrapper_summary_region()


	# # merge risks with housing prices (code_departement and annee)
	# dataset_risks = _format_department_code(dataset_risks)
	# dataset_housing_prices = dataset_housing_prices.merge(
	# dataset_risks[["code_departement", "annee", "avg_risk_score"]],
	# on=["code_departement", "annee"],
	# how="left",
	# )
	# # scale the prixm2moyen to a range of 0-1 for better visualization
	# dataset_housing_prices["prixm2moyen"] = (
	# dataset_housing_prices["prixm2moyen"]
	# - dataset_housing_prices["prixm2moyen"].min()
	# ) / (
	# dataset_housing_prices["prixm2moyen"].max()
	# - dataset_housing_prices["prixm2moyen"].min()
	# )

	# # scale the avg_risk_score to a range of 0-1 for better visualization
	# dataset_housing_prices["avg_risk_score"] = (
	# dataset_housing_prices["avg_risk_score"]
	# - dataset_housing_prices["avg_risk_score"].min()
	# ) / (
	# dataset_housing_prices["avg_risk_score"].max()
	# - dataset_housing_prices["avg_risk_score"].min()
	# )
	# # combine prixm2moyen and avg_risk_score into a single column for visualization
	# dataset_housing_prices["combined_score"] = (
	# dataset_housing_prices["prixm2moyen"]
	# * dataset_housing_prices["avg_risk_score"]
	# )

	# min_global_commune_avg_risk_score = dataset_housing_prices[
	# dataset_housing_prices["combined_score"] > 0
	# ]["combined_score"].min()
	# max_global_commune_avg_risk_score = dataset_housing_prices["combined_score"].max()


	# # Get all unique department codes for selectbox options
	# all_departement_codes = _format_department_code(insee_df)["code_departement"].unique()

	# # --- Department Selection 1 & 2 ---
	# col_dept1, col_dept2 = st.columns(2)

	# # Set default department values
	# default_dept_1 = all_departement_codes[0] if len(all_departement_codes) > 0 else None
	# default_dept_2 = all_departement_codes[1] if len(all_departement_codes) > 1 else None

	# with col_dept1:
	# selected_departement = st.selectbox(
	# "Select the first Department",
	# options=all_departement_codes,
	# format_func=lambda x: f"{x} - {insee_df[insee_df['code_departement'] == x]['nom_departement'].values[0]}"
	# if x in insee_df["code_departement"].values
	# else x,
	# key="departement_select_1",
	# index=list(all_departement_codes).index(default_dept_1) if default_dept_1 else 0
	# )

	# with col_dept2:
	# selected_departement_2 = st.selectbox(
	# "Select the second Department (Optional for comparison)",
	# options=[None] + list(all_departement_codes), # Add None option
	# format_func=lambda x: f"{x} - {insee_df[insee_df['code_departement'] == x]['nom_departement'].values[0]}"
	# if x is not None and x in insee_df["code_departement"].values
	# else "None (Only show Department 1)",
	# key="departement_select_2",
	# index=list([None] + list(all_departement_codes)).index(default_dept_2) if default_dept_2 else 0
	# )

	# st.write("This chart shows the average price per square meter in the selected department(s) over the years, with a focus on climatic events.")

	# # --- Data Preparation for Department Chart ---
	# all_departments_to_plot = []

	# # Process Department 1 data
	# if selected_departement: # Ensure a department is selected
	# department_data_1 = dataset_housing_prices[
	# dataset_housing_prices["code_departement"] == selected_departement
	# ].copy()
	# if not department_data_1.empty:
	# department_data_1["annee"] = department_data_1["annee"].astype(str)
	# # Group by year and calculate mean for the department
	# department_data_1 = department_data_1.groupby("annee")["combined_score"].mean().reset_index()
	# # Get the department name for the legend
	# departement_name_1 = (
	# insee_df[insee_df["code_departement"] == selected_departement][
	# "nom_departement"
	# ].values[0]
	# if selected_departement in insee_df["code_departement"].values
	# else selected_departement
	# )
	# department_data_1.rename(columns={"combined_score": departement_name_1}, inplace=True)
	# all_departments_to_plot.append(department_data_1)
	# else:
	# st.warning(f"No data available for Department 1: {selected_departement}")

	# # Process Department 2 data if selected
	# if (
	# selected_departement_2 and selected_departement_2 != selected_departement
	# ): # Ensure a valid second department is chosen and it's not the same as the first
	# department_data_2 = dataset_housing_prices[
	# dataset_housing_prices["code_departement"] == selected_departement_2
	# ].copy()
	# if not department_data_2.empty:
	# department_data_2["annee"] = department_data_2["annee"].astype(str)
	# # Group by year and calculate mean for the department
	# department_data_2 = department_data_2.groupby("annee")["combined_score"].mean().reset_index()
	# # Get the department name for the legend
	# departement_name_2 = (
	# insee_df[insee_df["code_departement"] == selected_departement_2][
	# "nom_departement"
	# ].values[0]
	# if selected_departement_2 in insee_df["code_departement"].values
	# else selected_departement_2
	# )
	# department_data_2.rename(columns={"combined_score": departement_name_2}, inplace=True)
	# all_departments_to_plot.append(department_data_2)
	# else:
	# st.warning(f"No data available for Department 2: {selected_departement_2}")
	# elif selected_departement_2 == selected_departement and selected_departement_2 is not None:
	# st.info("You've selected the same department for both. Showing only one line.")


	# # Combine dataframes for plotting the department comparison chart
	# if all_departments_to_plot:
	# combined_dept_df = reduce(
	# lambda left, right: pd.merge(left, right, on="annee", how="outer"),
	# all_departments_to_plot,
	# )
	# combined_dept_df.set_index("annee", inplace=True)

	# fig_dept = px.line(
	# combined_dept_df.reset_index(),
	# x="annee",
	# y=combined_dept_df.columns,
	# title="Combined score (price and global risk)",
	# labels={"annee": "Year", "value": "Combined score (price and global risk)"},
	# )
	# fig_dept.update_layout(
	# xaxis_title="Year",
	# yaxis_title="Combined score (price and global risk)",
	# legend_title_text="Department",
	# )
	# st.plotly_chart(fig_dept, use_container_width=True)
	# else:
	# st.info("Please select at least one department to display data.")

	# selected_departement_label = (f"{insee_df[insee_df['code_departement'] == selected_departement]['nom_departement'].values[0]}"
	# if selected_departement in insee_df["code_departement"].values
	# else selected_departement
	# )

	# # Prepare data for box plot
	# box_plot_data = dataset_housing_prices[
	# (dataset_housing_prices["code_departement"] == selected_departement)
	# \| (dataset_housing_prices["code_departement"] == selected_departement_2)
	# ].copy()
	# box_plot_data["annee"] = box_plot_data["annee"].astype(
	# str
	# ) # Ensure 'annee' is string for categorical x-axis
	# # Create the box plot
	# fig_box = px.box(
	# box_plot_data,
	# x="annee",
	# y="combined_score",
	# color="code_departement",
	# title=f"Distribution of Prices in Department {selected_departement_label} by Year",
	# )
	# fig_box.update_layout(
	# xaxis_title="Year",
	# yaxis_title="Combined score (price and global risk)",
	# )
	# st.plotly_chart(fig_box, use_container_width=True)

	# ###############################################################################

	# # display the top 5 communes in the selected department
	# st.subheader(
	# f"Top and Bottom 5 Communes in Department {selected_departement_label}",
	# divider=True,
	# )

	# # selected year
	# selected_year_communes = st.selectbox(
	# "Select a Year",
	# options=dataset_housing_prices["annee"].unique(),
	# format_func=lambda x: str(x),
	# key="year_communes_selectbox",
	# index=list(dataset_housing_prices["annee"].unique()).index(2024) if 2024 in dataset_housing_prices["annee"].unique() else 0
	# )

	# top_communes = (
	# dataset_housing_prices[
	# (dataset_housing_prices["code_departement"] == selected_departement)
	# & (dataset_housing_prices["annee"] == selected_year_communes)
	# ]
	# .groupby("code_commune_insee")["combined_score"]
	# .mean()
	# .reset_index()
	# .sort_values(by="combined_score", ascending=False)
	# .rename(
	# columns={
	# "code_commune_insee": "Commune Code",
	# "combined_score": "Combined score (price and global risk)",
	# }
	# )
	# .assign(
	# **{
	# "Commune Name": lambda x: x["Commune Code"].apply(
	# lambda code: insee_df[
	# _format_department_code(insee_df)["code_commune_INSEE"] == code
	# ]["nom_commune_complet"].values[0]
	# if code
	# in _format_department_code(insee_df)["code_commune_INSEE"].values
	# else "Unknown"
	# )
	# }
	# )
	# .head(5)
	# )
	# bottom_communes = (
	# dataset_housing_prices[
	# (dataset_housing_prices["code_departement"] == selected_departement)
	# & (dataset_housing_prices["annee"] == selected_year_communes)
	# & (dataset_housing_prices["combined_score"] > 0)
	# ]
	# .groupby("code_commune_insee")["combined_score"]
	# .mean()
	# .reset_index()
	# .sort_values(by="combined_score", ascending=True)
	# .rename(
	# columns={
	# "code_commune_insee": "Commune Code",
	# "combined_score": "Combined score (price and global risk)",
	# }
	# )
	# .assign(
	# **{
	# "Commune Name": lambda x: x["Commune Code"].apply(
	# lambda code: insee_df[
	# _format_department_code(insee_df)["code_commune_INSEE"] == code
	# ]["nom_commune_complet"].values[0]
	# if code
	# in _format_department_code(insee_df)["code_commune_INSEE"].values
	# else "Unknown"
	# )
	# }
	# )
	# .head(5)
	# )
	# left_co, right_co = st.columns(2)
	# with left_co:
	# fig = px.bar(
	# bottom_communes.sort_values(by="Combined score (price and global risk)", ascending=True),
	# x="Commune Name",
	# y="Combined score (price and global risk)",
	# title="Bottom 5 Communes",
	# )
	# st.plotly_chart(fig)
	# st.dataframe(
	# bottom_communes,
	# hide_index=True,
	# column_order=("Commune Code", "Commune Name", "Combined score (price and global risk)"),
	# )

	# with right_co:
	# fig = px.bar(
	# top_communes.sort_values(by="Combined score (price and global risk)", ascending=True),
	# x="Commune Name",
	# y="Combined score (price and global risk)",
	# title="Top 5 Communes",
	# )
	# st.plotly_chart(fig)
	# st.dataframe(
	# top_communes,
	# hide_index=True,
	# column_order=("Commune Code", "Commune Name", "Combined score (price and global risk)"),
	# )

	# ###############################################################################

	# st.subheader(
	# "Average Price per Square Meter in French Communes", divider=True
	# )
	# fig_department = display_choropleth_map_for_department(
	# dataset_housing_prices,
	# selected_departement,
	# communes_geojson,
	# min_global_commune_avg_risk_score,
	# max_global_commune_avg_risk_score,
	# title=f"Average Price per Square Meter in Department {selected_departement_label} (Animated by Year)",
	# height_graph=1000,
	# width_graph=1400,
	# )
	# st.plotly_chart(fig_department, use_container_width=False)
	# st.write(
	# "Missing values are represented in light grey, while actual data is shown in a gradient from red (high prices) to green (low prices)."
	# )

	# ###############################################################################

	# st.subheader(f"Historical Price comparaison in Selected Commune in Departement {selected_departement_label}", divider=True)

	# available_communes = dataset_housing_prices[
	# dataset_housing_prices["code_departement"] == selected_departement
	# ]["code_commune_insee"].unique()

	# let_col1, right_col2 = st.columns(2)
	# with let_col1:
	# # --- Commune Selection 1 ---
	# selected_commune_1 = st.selectbox(
	# "Select the first Commune",
	# options=available_communes,
	# format_func=lambda x: f"{x} - {insee_df[insee_df['code_commune_INSEE'] == x]['nom_commune_complet'].values[0]}"
	# if x in insee_df["code_commune_INSEE"].values
	# else x,
	# key="commune_select_1",
	# index=0 if len(available_communes) > 1 else 0
	# )
	# with right_col2:
	# # --- Commune Selection 2 ---
	# selected_commune_2 = st.selectbox(
	# "Select the second Commune (Optional for comparison)",
	# options=[None]
	# + list(available_communes), # Add None as an option to not select a second commune
	# format_func=lambda x: f"{x} - {insee_df[insee_df['code_commune_INSEE'] == x]['nom_commune_complet'].values[0]}"
	# if x is not None and x in insee_df["code_commune_INSEE"].values
	# else "None (Only show Commune 1)",
	# key="commune_select_2",
	# index=2 if len(available_communes) > 2 else 0
	# )

	# # --- Data Preparation for Chart ---
	# all_communes_to_plot = []

	# # Process Commune 1 data
	# commune_data_1 = dataset_housing_prices[
	# dataset_housing_prices["code_commune_insee"] == selected_commune_1
	# ].copy()
	# if not commune_data_1.empty:
	# commune_data_1["annee"] = commune_data_1["annee"].astype(str)
	# commune_data_1 = commune_data_1.groupby("annee")["combined_score"].mean().reset_index()
	# # Rename the price column to reflect the commune for the legend
	# commune_name_1 = (
	# insee_df[insee_df["code_commune_INSEE"] == selected_commune_1][
	# "nom_commune_complet"
	# ].values[0]
	# if selected_commune_1 in insee_df["code_commune_INSEE"].values
	# else selected_commune_1
	# )
	# commune_data_1.rename(columns={"combined_score": commune_name_1}, inplace=True)
	# all_communes_to_plot.append(commune_data_1)
	# else:
	# st.warning(f"No data available for Commune 1: {selected_commune_1}")


	# # Process Commune 2 data if selected
	# if (
	# selected_commune_2 and selected_commune_2 != selected_commune_1
	# ): # Ensure a valid second commune is chosen and it's not the same as the first
	# commune_data_2 = dataset_housing_prices[
	# dataset_housing_prices["code_commune_insee"] == selected_commune_2
	# ].copy()
	# if not commune_data_2.empty:
	# commune_data_2["annee"] = commune_data_2["annee"].astype(str)
	# commune_data_2 = (
	# commune_data_2.groupby("annee")["combined_score"].mean().reset_index()
	# )
	# # Rename the price column for the second commune
	# commune_name_2 = (
	# insee_df[insee_df["code_commune_INSEE"] == selected_commune_2][
	# "nom_commune_complet"
	# ].values[0]
	# if selected_commune_2 in insee_df["code_commune_INSEE"].values
	# else selected_commune_2
	# )
	# commune_data_2.rename(columns={"combined_score": commune_name_2}, inplace=True)
	# all_communes_to_plot.append(commune_data_2)
	# else:
	# st.warning(f"No data available for Commune 2: {selected_commune_2}")
	# elif selected_commune_2 == selected_commune_1 and selected_commune_2 is not None:
	# st.info("You've selected the same commune for both. Showing only one line.")

	# # Combine dataframes for plotting
	# if all_communes_to_plot:
	# # Use reduce or pd.merge to combine, ensuring 'annee' is the common key

	# # Start with the first dataframe, then merge others
	# combined_df = reduce(
	# lambda left, right: pd.merge(left, right, on="annee", how="outer"),
	# all_communes_to_plot,
	# )
	# combined_df.set_index("annee", inplace=True)

	# st.line_chart(
	# combined_df,
	# use_container_width=True,
	# height=400,
	# x_label="Year",
	# y_label="Combined score (price and global risk)",
	# )
	# else:
	# st.info("Please select at least one commune to display data.")