Spaces:

joseph-data
/

daioe

Sleeping

App Files Files Community

daioe / app.py

joseph-data

updated the dockerfile

8cae453 unverified 4 months ago

raw

history blame contribute delete

13 kB

	# DAIOE Explorer (Shiny app).
	#
	# High-level flow:
	# - Load pipeline outputs once at startup via `src.data_manager.load_data()` (disk-cached by default).
	# - Treat everything below as filtering + presentation; no pipeline recomputation in reactives.
	#
	# Developer notes:
	# - Add/adjust selectable options in `src/config.py`.
	# - The sidebar contains all user-controlled inputs; `Reset filters` restores defaults.

	from __future__ import annotations

	from pathlib import Path
	from typing import Dict, List, Tuple
	from faicons import icon_svg
	import pandas as pd
	from shiny import reactive, render
	import shiny.ui as classic_ui
	from shiny.express import input, ui
	from shinywidgets import render_widget

	from src import config
	from src.data_manager import load_data
	from src.plotting import (
	build_bar_plot,
	build_trend_plot,
	format_metric_value,
	format_raw_value,
	)

	TAXONOMY_OPTIONS = config.TAXONOMY_OPTIONS
	METRIC_OPTIONS: List[Tuple[str, str]] = config.METRIC_OPTIONS
	WEIGHTING_OPTIONS: List[Tuple[str, str]] = config.WEIGHTING_OPTIONS

	LEVEL_OPTIONS = config.LEVEL_OPTIONS
	LEVEL_LABELS = {value: label for label, value in LEVEL_OPTIONS}
	LEVEL_CHOICES = {str(value): label for label, value in LEVEL_OPTIONS}


	DATA = load_data()

	# Defaults are mostly sourced from `src/config.py`, except the global year range
	# which is derived from whatever data is available at startup.

	ALL_YEARS = sorted(
	{int(year) for frame in DATA.values() for year in frame["year"].unique()}
	)
	GLOBAL_YEAR_MIN = ALL_YEARS[0]
	GLOBAL_YEAR_MAX = ALL_YEARS[-1]
	DEFAULT_TAXONOMY = config.DEFAULT_TAXONOMY
	DEFAULT_LEVEL = config.DEFAULT_LEVEL
	DEFAULT_WEIGHTING = config.DEFAULT_WEIGHTING
	DEFAULT_YEAR_RANGE = (GLOBAL_YEAR_MIN, GLOBAL_YEAR_MAX)
	DEFAULT_SORT_DESC = config.DEFAULT_SORT_DESC
	DEFAULT_LEVEL_CHOICE = str(DEFAULT_LEVEL)
	DEFAULT_TOP_N = config.DEFAULT_TOP_N


	css_file = Path(__file__).parent / "css" / "theme.scss"

	ui.page_opts(
	fillable=False,
	fillable_mobile=True,
	full_width=True,
	id="page",
	lang="en",
	)

	ui.include_css(css_file)


	def metric_mapping() -> Dict[str, str]:
	return {value: label for label, value in METRIC_OPTIONS}


	def weighting_mapping() -> Dict[str, str]:
	return {value: label for label, value in WEIGHTING_OPTIONS}


	def taxonomy_mapping() -> Dict[str, str]:
	return {value: label for label, value in TAXONOMY_OPTIONS}


	def apply_search_filter(df: pd.DataFrame) -> pd.DataFrame:
	search_term = input.search().strip().lower()
	if not search_term:
	return df

	labels = df["label"].astype(str).str.lower()
	return df[labels.str.contains(search_term, na=False)]


	@reactive.calc
	def chart_title() -> str:
	# Shared title capturing the current metric/taxonomy/weighting/level plus the latest year.
	df = filtered_data()
	latest_year = int(df["year"].max()) if not df.empty else None

	metric_text = metric_label()
	weight_label = weighting_mapping().get(input.weighting(), input.weighting())
	taxonomy_label = taxonomy_mapping().get(input.taxonomy(), input.taxonomy())
	level_value = int(input.level())
	group_label = LEVEL_LABELS.get(level_value, f"Level {level_value}")

	base = f"{metric_text} ({weight_label}, {taxonomy_label}) — {group_label}"
	if latest_year is None:
	return base
	return f"{base} in {latest_year}"


	# ---------------------------------------------------------------------------
	# Sidebar UI
	# ---------------------------------------------------------------------------

	with ui.sidebar(open="open", class_="bg-light p-3", width=300, position="right"):
	ui.img(
	src="lab.svg", # because it's in www/
	style="max-width: 200px; margin-bottom: 8px;",
	)
	ui.input_radio_buttons(
	"taxonomy",
	"Taxonomy",
	taxonomy_mapping(),
	selected=DEFAULT_TAXONOMY,
	)
	ui.input_select(
	"level",
	"Level",
	LEVEL_CHOICES,
	selected=DEFAULT_LEVEL_CHOICE,
	)
	ui.input_select(
	"metric",
	"Sub-index",
	metric_mapping(),
	selected=METRIC_OPTIONS[0][1],
	)
	ui.input_select(
	"weighting",
	"Weighting",
	weighting_mapping(),
	selected=DEFAULT_WEIGHTING,
	)

	ui.input_slider(
	"year_range",
	"Year range",
	min=GLOBAL_YEAR_MIN,
	max=GLOBAL_YEAR_MAX,
	value=DEFAULT_YEAR_RANGE,
	step=1,
	sep="",
	)

	ui.input_slider(
	"top_n",
	"Occupations to display (0 = all)",
	min=0,
	max=30,
	value=DEFAULT_TOP_N,
	step=1,
	)

	ui.input_switch("sort_desc", "Sort descending", value=DEFAULT_SORT_DESC)
	ui.input_text("search", "Search by occupation", placeholder="e.g. statistician")
	with ui.popover(id="help_popover"):
	ui.input_action_button(
	"show_help",
	"Quick Guide",
	class_="btn btn-outline-primary btn-sm w-100",
	icon=icon_svg("circle-info"),
	)
	ui.markdown(
	"""
	#### Quick Guide

	- Taxonomy: SSYK 2012 = current; SSYK 1996 = historic.
	- Level: 4-digit shows individual occupations; 1-digit shows broad groups.
	- Sub-index: Pick the DAIOE metric to visualize; chart titles reflect your selection.
	- Weighting: Employment-weighted highlights labour-market impact; Simple average treats each occupation equally.
	- Years: Use the slider; charts always use the latest year within the range.
	- Top N / Search: Limit to the N highest values (0 shows all) and filter by occupation name; toggle sort direction.
	- Reading charts: Hover lines for per-year values; bars display raw + percentile labels; value boxes show the most/least exposed in the latest year.
	"""
	)

	ui.hr()
	ui.input_action_button(
	"reset_filters",
	"Reset filters",
	class_="btn btn-primary w-100",
	icon=icon_svg("arrow-rotate-left"),
	)


	# ---------------------------------------------------------------------------
	# Reactive helpers
	# ---------------------------------------------------------------------------


	@reactive.effect
	@reactive.event(input.reset_filters)
	def _reset_filters() -> None:
	# Push defaults back into the UI (does not trigger a data reload).
	ui.update_radio_buttons("taxonomy", selected=DEFAULT_TAXONOMY)
	ui.update_select("level", selected=DEFAULT_LEVEL_CHOICE)
	ui.update_select("metric", selected=METRIC_OPTIONS[0][1])
	ui.update_select("weighting", selected=DEFAULT_WEIGHTING)
	ui.update_slider("year_range", value=DEFAULT_YEAR_RANGE)
	ui.update_slider("top_n", value=DEFAULT_TOP_N)
	ui.update_switch("sort_desc", value=DEFAULT_SORT_DESC)
	ui.update_text("search", value="")


	@reactive.calc
	def metric_name() -> str:
	return f"daioe_{input.metric()}"


	@reactive.calc
	def metric_label() -> str:
	return metric_mapping()[input.metric()]


	@reactive.calc
	def percentile_metric_name() -> str:
	return f"pct_rank_{input.metric()}"


	@reactive.calc
	def current_data() -> pd.DataFrame:
	# Structural filter: taxonomy + weighting + level (shared by all downstream reactives).
	taxonomy = input.taxonomy()
	if taxonomy not in DATA:
	return pd.DataFrame()

	df = DATA[taxonomy]

	level = int(input.level())
	weight = input.weighting()

	# Filter once here instead of repeatedly in downstream logic
	df = df[(df["weighting"] == weight) & (df["level"] == level)]

	return df


	@reactive.calc
	def filtered_data() -> pd.DataFrame:
	# Adds metric/year/search/top-N filtering on top of `current_data()`.
	df = current_data()
	if df.empty:
	return df

	metric_col = metric_name()

	# Keep only rows with valid metric values
	df = df.dropna(subset=[metric_col])

	# Year range filter
	year_min, year_max = input.year_range()
	df = df[(df["year"] >= year_min) & (df["year"] <= year_max)]

	# Search filter (occupation label in Swedish)
	df = apply_search_filter(df)

	if df.empty:
	return df

	# Top-N by latest year metric value
	latest_year = df["year"].max()
	latest_slice = df[df["year"] == latest_year].sort_values(
	metric_col,
	ascending=not input.sort_desc(),
	)

	top_n = input.top_n()
	if top_n > 0:
	latest_slice = latest_slice.head(top_n)

	keep_codes = latest_slice["code"].unique()
	df = df[df["code"].isin(keep_codes)]

	return df


	@reactive.calc
	def latest_order() -> List[str]:
	# Consistent label ordering based on the latest year and chosen sort direction.
	df = filtered_data()
	if df.empty:
	return []

	metric_col = metric_name()
	latest_year = df["year"].max()
	ascending = not input.sort_desc()

	latest_slice = df[df["year"] == latest_year].sort_values(
	metric_col, ascending=ascending
	)

	return latest_slice["label"].tolist()


	# ---------------------------------------------------------------------------
	# Extremes (value boxes)
	# ---------------------------------------------------------------------------
	@reactive.calc
	def latest_extremes() -> Dict[str, Dict[str, float \| str]]:
	# Use all occupations (ignore top_n) but respect other filters
	df = current_data()
	if df.empty:
	return {}

	metric_col = metric_name() # raw DAIOE index
	percentile_col = percentile_metric_name()
	df = df.dropna(subset=[metric_col, percentile_col])

	# Apply year range filter (but not top_n)
	year_min, year_max = input.year_range()
	df = df[(df["year"] >= year_min) & (df["year"] <= year_max)]

	# Apply search filter (consistent with main plots)
	df = apply_search_filter(df)

	if df.empty:
	return {}

	latest_year = df["year"].max()
	latest_df = df[df["year"] == latest_year]

	sorted_df = latest_df.sort_values(metric_col, ascending=False)
	top_row = sorted_df.iloc[0]
	bottom_row = sorted_df.iloc[-1]

	return {
	"year": int(latest_year),
	"most": {
	"label": str(top_row["label"]),
	"value": float(top_row[metric_col]),
	"percentile": float(top_row[percentile_col]),
	},
	"least": {
	"label": str(bottom_row["label"]),
	"value": float(bottom_row[metric_col]),
	"percentile": float(bottom_row[percentile_col]),
	},
	}


	# ---------------------------------------------------------------------------
	# Top summary boxes
	# ---------------------------------------------------------------------------
	with ui.layout_columns(col_widths=[6, 6], gap="16px", class_="mb-3"):

	@render.ui
	def most_exposed_box():
	info = latest_extremes()
	if not info:
	return ui.value_box(
	"Most exposed occupation",
	"No data in range",
	"Adjust filters to see values",
	)

	most = info["most"]
	year = info["year"]
	return classic_ui.value_box(
	"Most exposed occupation\n",
	ui.h4(most["label"]),
	f"{metric_label()} raw: {format_raw_value(most['value'])} \| \n"
	f"percentile: {format_metric_value(most['percentile'])} (year {year})",
	theme="blue",
	fill=True,
	)

	@render.ui
	def least_exposed_box():
	info = latest_extremes()
	if not info:
	return ui.value_box(
	"Least exposed occupation",
	"No data in range",
	"Adjust filters to see values",
	)

	least = info["least"]
	year = info["year"]
	return classic_ui.value_box(
	"Least exposed occupation\n",
	ui.h4(least["label"]),
	f"{metric_label()} raw: {format_raw_value(least['value'])} \| \n"
	f"percentile: {format_metric_value(least['percentile'])} (year {year})",
	theme="bg-gradient-orange-red",
	full_screen=True,
	fill=True,
	)


	# ---------------------------------------------------------------------------
	# Main UI cards & plots
	# ---------------------------------------------------------------------------
	with ui.card(full_screen=True, fill=True, class_="mb-3"):
	ui.card_header("Trend by occupation")

	@render_widget
	def trend_plot():
	df = filtered_data()
	return build_trend_plot(
	df,
	metric_col=metric_name(),
	metric_label=metric_label(),
	title=chart_title(),
	order=latest_order(),
	)


	with ui.card(full_screen=True, fill=True, class_="mb-3"):
	ui.card_header("Latest year comparison")

	@render_widget
	def bar_plot():
	df = filtered_data()
	return build_bar_plot(
	df,
	percentile_col=percentile_metric_name(),
	raw_col=metric_name(),
	metric_label=metric_label(),
	title=chart_title(),
	order=latest_order(),
	)


	if __name__ == "__main__":
	ui.run()