Spaces:

joseph-data
/

yearly_explorer

Sleeping

App Files Files Community

joseph-data commited on 26 days ago

Commit

48e682d

verified ·

1 Parent(s): c5e58f4

Sync from GitHub via hub-sync

Browse files

Files changed (14) hide show

.dockerignore +16 -0
.python-version +1 -0
Dockerfile +38 -0
README.md +0 -10
_brand.yml +79 -0
app.py +278 -0
calcs.py +152 -0
data/daioe_scb_years_processed.parquet +3 -0
logos/lab.svg +0 -0
md_files/intro.md +1 -0
pyproject.toml +17 -0
setup.py +257 -0
uv.lock +0 -0
visuals.py +188 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,16 @@

+# Virtual environment large, rebuilt inside the image
+.venv/
+# Python cache
+__pycache__/
+*.pyc
+*.pyo
+# Dev/scratch files
+app_v*.py
+md_files/
+# Git
+.git/
+.gitignore

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.14

Dockerfile ADDED Viewed

	@@ -0,0 +1,38 @@

+# ------------------------------- Builder Stage ------------------------------ #
+FROM python:3.14-bookworm AS builder
+# Install uv from official image (faster, reproducible, no curl needed)
+COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
+ENV UV_PROJECT_ENVIRONMENT=/app/.venv
+WORKDIR /app
+# Install deps from lockfile (cache uv downloads for faster rebuilds).
+# This is a flat Shiny app, so only install dependencies, not a package.
+COPY pyproject.toml uv.lock ./
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv sync --frozen --no-dev --no-install-project
+## ------------------------------ Production Stage ---------------------------- ##
+FROM python:3.14-slim-bookworm AS production
+WORKDIR /app
+# Environment set-up
+COPY --from=builder /app/.venv /app/.venv
+ENV PATH="/app/.venv/bin:$PATH"
+# Copy only what the app needs at runtime
+COPY app.py ./app.py
+COPY setup.py ./setup.py
+COPY calcs.py ./calcs.py
+COPY visuals.py ./visuals.py
+COPY data ./data
+COPY logos ./logos
+COPY _brand.yml ./_brand.yml
+# Requirement for deployment at hf
+EXPOSE 7860
+CMD ["shiny", "run", "app.py", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,10 +0,0 @@
----
-title: Yearly Explorer
-emoji: 👀
-colorFrom: blue
-colorTo: blue
-sdk: docker
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

_brand.yml ADDED Viewed

	@@ -0,0 +1,79 @@

+# ─────────────────────────────────────────────
+# _brand.yml — joseph-data
+# ─────────────────────────────────────────────
+meta:
+  name: joseph-data
+  link: https://github.com/joseph-data
+logo:
+  images:
+    lab: logos/lab.svg
+  small:  lab
+  medium: lab
+  large:  lab
+# ─── Color ───────────────────────────────────
+color:
+  palette:
+    black:   "#1C2826"
+    blue:    "#0C0A3E"
+    gray:    "#2A2E45"
+    neutral: "#F9F7F1"
+    red:     "#BA274A"
+    violet:  "#4D6CFA"
+  background: neutral
+  foreground: black
+  primary:    blue
+  secondary:  violet
+  tertiary:   gray
+  danger:     red
+  light:      neutral
+  dark:       black
+# ─── Typography ──────────────────────────────
+typography:
+  fonts:
+    - family: Nunito Sans
+      source:  google
+      weight:  [300, 400, 600]
+      style:   normal
+    - family: Montserrat
+      source:  google
+      weight:  [600, 700, 800]
+      style:   normal
+    - family: Fira Code
+      source:  google
+      weight:  [400, 500]
+      style:   normal
+  base:
+    family:      Nunito Sans
+    size:        1rem
+    weight:      400
+    line-height: 1.5
+  headings:
+    family:      Montserrat
+    weight:      700
+    color:       primary
+    line-height: 1.2
+  monospace:
+    family: Fira Code
+    size:   0.9em
+    weight: 400
+  monospace-inline:
+    color:            primary
+    background-color: light
+  monospace-block:
+    color:            foreground
+    background-color: light
+    line-height:      1.6
+  link:
+    color:      secondary
+    decoration: underline

app.py ADDED Viewed

	@@ -0,0 +1,278 @@

+from pathlib import Path
+import polars as pl
+from shiny import reactive
+from shiny.express import app_opts, input, render, ui
+from shinywidgets import render_plotly
+app_opts(static_assets={"/logos": Path(__file__).parent / "logos"})
+import calcs
+import visuals
+from setup import (
+    AGES,
+    INTRO_MD,
+    LEVELS,
+    METRICS,
+    SEXES,
+    YEAR_MAX,
+    YEAR_MIN,
+    YEARS,
+    as_great_table_html,
+    build_choices_by_level,
+    download_extension,
+    download_media_type,
+    export_filtered_data,
+    lf,
+)
+LEVEL_LABELS = {
+    "SSYK1": "SSYK 1 - Major groups",
+    "SSYK2": "SSYK 2 - Minor groups",
+    "SSYK3": "SSYK 3 - Unit groups",
+    "SSYK4": "SSYK 4 - Detailed units",
+}
+OCCUPATION_CHOICES = build_choices_by_level(lf, LEVELS)
+DEFAULT_LEVEL = "SSYK4" if "SSYK4" in LEVELS else LEVELS[0]
+DEFAULT_OCCUPATION = next(iter(OCCUPATION_CHOICES[DEFAULT_LEVEL]))
+ui.page_opts(
+    title=ui.tags.span(
+        ui.tags.img(src="logos/lab.svg", height="32px", style="margin-right:10px;vertical-align:middle;"),
+        "Yearly DAIOE Explorer of Swedish Occupations",
+    ),
+    theme=ui.Theme.from_brand(__file__),
+    fillable=True,
+    lang="en",
+    full_width=True,
+)
+@reactive.calc
+def _download_frame():
+    """Collect filtered rows for the download tab."""
+    occupations = list(input.download_occupation()) if input.download_occupation() else None
+    years = input.download_years()
+    age = input.download_age()
+    sexes = list(input.download_sex())
+    data = lf.filter(
+        (pl.col("level") == input.download_level())
+        & pl.col("year").is_between(int(years[0]), int(years[1])),
+    )
+    if sexes:
+        data = data.filter(pl.col("sex").is_in(sexes))
+    if age != "All":
+        data = data.filter(pl.col("age_group") == age)
+    if occupations:
+        data = data.filter(pl.col("occupation").is_in(occupations))
+    return data.collect()
+@reactive.calc
+def occ_summary():
+    """Reactive wrapper: returns summary dict for the selected occupation and year."""
+    return calcs.get_occ_summary(lf, input.occupation(), int(input.occ_year()))
+@reactive.calc
+def occ_employment_by_age():
+    """Reactive wrapper: returns long-format employment by age group for the line chart."""
+    return calcs.get_occ_employment_by_age(
+        lf,
+        input.occupation(),
+        (int(input.chart_year_range()[0]), int(input.chart_year_range()[1])),
+        list(input.chart_age_groups()),
+    )
+with ui.navset_pill(id="tab"):
+    with ui.nav_panel(title="1. Occupation View"):
+        with ui.layout_columns(col_widths=[6, 6]):
+            with ui.card(full_screen=True):
+                ui.markdown(INTRO_MD)
+                with ui.div(class_="d-flex gap-3 align-items-end"):
+                    ui.input_select(
+                        "occ_level",
+                        "SSYK level",
+                        choices={level: LEVEL_LABELS.get(level, level) for level in LEVELS},
+                        selected=DEFAULT_LEVEL,
+                        width="200px",
+                    )
+                    ui.input_selectize(
+                        "occupation",
+                        "Occupation",
+                        choices=OCCUPATION_CHOICES[DEFAULT_LEVEL],
+                        selected=DEFAULT_OCCUPATION,
+                    )
+                    ui.input_select(
+                        "occ_year",
+                        "Year",
+                        choices={y: str(y) for y in YEARS},
+                        selected=YEAR_MAX,
+                        width="120px",
+                    )
+                @render.ui
+                def occ_value_boxes():
+                    """Render employment and % change value boxes for the selected occupation."""
+                    summary = occ_summary()
+                    if summary is None:
+                        return ui.p("No data available.")
+                    return visuals.build_value_boxes(summary, input.occupation())
+            with ui.card(full_screen=True):
+                ui.card_header("AI Exposure by Sub-domain")
+                @render_plotly
+                def ai_exposure_bar():
+                    """Render bar chart of AI exposure level per sub-domain, coloured by index score."""
+                    df = calcs.get_occ_ai_exposure(lf, input.occupation(), int(input.occ_year()))
+                    return visuals.build_ai_exposure_bar(df.to_pandas(), input.occupation(), int(input.occ_year()))
+                ui.markdown(visuals.DAIOE_SOURCE_MD)
+            with ui.card(full_screen=True):
+                ui.card_header("Employment by Age Group")
+                with ui.layout_sidebar():
+                    with ui.sidebar(width="220px"):
+                        ui.input_slider(
+                            "chart_year_range",
+                            "Year range",
+                            min=min(YEARS),
+                            max=max(YEARS),
+                            value=(min(YEARS), max(YEARS)),
+                            step=1,
+                            sep="",
+                        )
+                        ui.input_selectize(
+                            "chart_age_groups",
+                            "Age groups",
+                            choices=AGES,
+                            selected=AGES[:2],
+                            multiple=True,
+                        )
+                    @render_plotly
+                    def occ_age_chart():
+                        """Render a line chart of 1-yr employment % change per age group."""
+                        df = occ_employment_by_age()
+                        return visuals.build_age_chart(df.to_pandas(), input.occupation())
+                    ui.markdown(visuals.SCB_SOURCE_MD)
+            with ui.card():
+                "Card 4"
+    with ui.nav_panel(title="2. Comparison View"):
+        "Panel B content"
+    with ui.nav_panel(title="3. Download"):
+        ui.p(
+            "Export the filtered row-level dataset or inspect a compact preview before downloading.",
+            class_="text-muted mb-3",
+        )
+        with ui.div(class_="d-flex gap-3 align-items-end flex-wrap mb-3"):
+            ui.input_select(
+                "download_level",
+                "SSYK level",
+                choices={level: LEVEL_LABELS.get(level, level) for level in LEVELS},
+                selected=DEFAULT_LEVEL,
+                width="200px",
+            )
+            ui.input_slider(
+                "download_years",
+                "Year range",
+                min=YEAR_MIN,
+                max=YEAR_MAX,
+                value=(YEAR_MIN, YEAR_MAX),
+                step=1,
+                sep="",
+                width="220px",
+            )
+            ui.input_checkbox_group(
+                "download_sex",
+                "Sex",
+                choices={"men": "Men", "women": "Women"},
+                selected=SEXES,
+                inline=True,
+            )
+            ui.input_select(
+                "download_age",
+                "Age group",
+                choices={"All": "All ages"} | {a: a for a in AGES},
+                selected="All",
+                width="200px",
+            )
+            ui.input_selectize(
+                "download_occupation",
+                "Occupations",
+                choices=OCCUPATION_CHOICES[DEFAULT_LEVEL],
+                multiple=True,
+                options={"placeholder": "All occupations"},
+            )
+            ui.input_select(
+                "download_format",
+                "Format",
+                choices={"csv": "CSV", "parquet": "Parquet", "excel": "Excel"},
+                selected="csv",
+                width="120px",
+            )
+        with ui.layout_columns(col_widths=[3, 9]):
+            with ui.value_box(theme="primary"):
+                "Rows"
+                @render.text
+                def download_rows_count():
+                    """Show count of rows matching current download filters."""
+                    return f"{_download_frame().height:,}"
+            with ui.card():
+                ui.card_header("Export")
+                @render.download(
+                    filename=lambda: (
+                        "daioe_swedish_occupations_"
+                        f"{__import__('datetime').datetime.now().strftime('%Y-%m-%d')}."
+                        f"{download_extension(input.download_format())}"
+                    ),
+                    media_type=lambda: download_media_type(input.download_format()),
+                    label="Download filtered data",
+                )
+                def download_data():
+                    """Export filtered data in the selected format."""
+                    return export_filtered_data(
+                        _download_frame().to_pandas(),
+                        input.download_format(),
+                    )
+        with ui.card(full_screen=True):
+            ui.card_header("Preview (first 50 rows)")
+            @render.ui
+            def download_preview():
+                """Render a preview table of the filtered download data."""
+                cols = [
+                    "level", "ssyk_code", "occupation", "year", "sex",
+                    "age_group", "count", "daioe_genai_wavg",
+                    "daioe_allapps_wavg", "pct_chg_1y",
+                ]
+                data = _download_frame().select(cols).head(50).to_pandas()
+                return as_great_table_html(data, METRICS)
+@reactive.effect
+def _sync_occupation_choices():
+    """Update the occupation selectize choices whenever the SSYK level changes."""
+    level = input.occ_level()
+    choices = OCCUPATION_CHOICES[level]
+    ui.update_selectize("occupation", choices=choices, selected=next(iter(choices)))
+@reactive.effect
+def _sync_download_occupation_choices():
+    """Update the download occupation selectize when the download SSYK level changes."""
+    level = input.download_level()
+    ui.update_selectize("download_occupation", choices=OCCUPATION_CHOICES[level], selected=[])

calcs.py ADDED Viewed

	@@ -0,0 +1,152 @@

+import polars as pl
+def get_occ_summary(lf: pl.LazyFrame, occupation: str, year: int) -> dict | None:
+    """
+    Aggregate employment count and percentage changes for one occupation and year.
+    Returns a dict with keys: employment, pct_1y, pct_3y, pct_5y, year.
+    Returns None if no data matches the filters.
+    """
+    df = (
+        lf.filter(
+            (pl.col("occupation") == occupation) & (pl.col("year") == year),
+        )
+        .select(["count", "pct_chg_1y", "pct_chg_3y", "pct_chg_5y", "year"])
+        .collect()
+    )
+    if df.is_empty():
+        return None
+    def _mean_or_none(col: str) -> float | None:
+        val = df[col].mean()
+        return None if val is None else float(val)
+    return {
+        "employment": df["count"].sum(),
+        "pct_1y": _mean_or_none("pct_chg_1y"),
+        "pct_3y": _mean_or_none("pct_chg_3y"),
+        "pct_5y": _mean_or_none("pct_chg_5y"),
+        "year": int(df["year"][0]),
+    }
+AI_WAVG_COLS = [
+    "daioe_genai_wavg",
+    "daioe_allapps_wavg",
+    "daioe_stratgames_wavg",
+    "daioe_videogames_wavg",
+    "daioe_imgrec_wavg",
+    "daioe_imgcompr_wavg",
+    "daioe_imggen_wavg",
+    "daioe_readcompr_wavg",
+    "daioe_lngmod_wavg",
+    "daioe_translat_wavg",
+    "daioe_speechrec_wavg",
+]
+AI_LABELS = {
+    "daioe_genai_wavg": "🧠 Generative AI",
+    "daioe_allapps_wavg": "📚 All Applications",
+    "daioe_stratgames_wavg": "♟️ Strategy Games",
+    "daioe_videogames_wavg": "🎮 Video Games",
+    "daioe_imgrec_wavg": "🖼️ Image Recognition",
+    "daioe_imgcompr_wavg": "🧩 Image Comprehension",
+    "daioe_imggen_wavg": "🎨 Image Generation",
+    "daioe_readcompr_wavg": "📖 Reading Comprehension",
+    "daioe_lngmod_wavg": "✍️ Language Modeling",
+    "daioe_translat_wavg": "🌐 Translation",
+    "daioe_speechrec_wavg": "🎙️ Speech Recognition",
+}
+AI_LEVEL_COLS = [c.replace("_wavg", "_Level_Exposure") for c in AI_WAVG_COLS]
+AI_PCTL_COLS = [f"pctl_{c}" for c in AI_WAVG_COLS]
+EXPOSURE_LABELS = {1: "Very Low", 2: "Low", 3: "Medium", 4: "High", 5: "Very High"}
+def get_occ_ai_exposure(
+    lf: pl.LazyFrame, occupation: str, year: int,
+) -> pl.DataFrame:
+    """
+    Return mean weighted AI exposure scores, exposure levels, and percentile ranks per sub-domain.
+    Returns a long-format DataFrame with columns: domain, score, level, level_label, percentile.
+    Used to power the ranked horizontal bar chart in Card 2.
+    """
+    select_cols = AI_WAVG_COLS + AI_LEVEL_COLS + AI_PCTL_COLS
+    df = (
+        lf.filter(
+            (pl.col("occupation") == occupation) & (pl.col("year") == year),
+        )
+        .select(select_cols)
+        .collect()
+    )
+    rows = []
+    for wavg_col, level_col, pctl_col in zip(AI_WAVG_COLS, AI_LEVEL_COLS, AI_PCTL_COLS, strict=False):
+        raw_level = df[level_col].mean()
+        level_val = round(raw_level) if raw_level is not None else None
+        rows.append({
+            "domain": AI_LABELS[wavg_col],
+            "score": df[wavg_col].mean(),
+            "level": level_val,
+            "level_label": EXPOSURE_LABELS.get(level_val, "Unknown") if level_val else "Unknown",
+            "percentile": df[pctl_col].mean(),
+        })
+    return pl.DataFrame(rows).sort("score")
+def get_occ_ai_trend(
+    lf: pl.LazyFrame, occupation: str, year_range: tuple[int, int],
+) -> pl.DataFrame:
+    """
+    Return yearly mean weighted AI exposure (All Applications) for one occupation over a year range.
+    Returns a DataFrame with columns: year, daioe_allapps_wavg.
+    Used to power the trend line in Card 2.
+    """
+    year_min, year_max = year_range
+    return (
+        lf.filter(
+            (pl.col("occupation") == occupation)
+            & (pl.col("year") >= year_min)
+            & (pl.col("year") <= year_max),
+        )
+        .group_by("year")
+        .agg(pl.col("daioe_allapps_wavg").mean())
+        .sort("year")
+        .collect()
+    )
+def get_occ_employment_by_age(
+    lf: pl.LazyFrame,
+    occupation: str,
+    year_range: tuple[int, int],
+    age_groups: list[str],
+) -> pl.DataFrame:
+    """
+    Return yearly employment counts per age group for a given occupation and year range.
+    Used to power the employment change line chart in Card 3.
+    Returns a long-format DataFrame with columns: year, age_group, count.
+    """
+    year_min, year_max = year_range
+    return (
+        lf.filter(
+            (pl.col("occupation") == occupation)
+            & (pl.col("year") >= year_min)
+            & (pl.col("year") <= year_max)
+            & (pl.col("age_group").is_in(age_groups)),
+        )
+        .group_by(["year", "age_group"])
+        .agg([
+            pl.col("count").sum(),
+            pl.col("pct_chg_1y").mean(),
+        ])
+        .sort(["age_group", "year"])
+        .collect()
+    )

data/daioe_scb_years_processed.parquet ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b1ecf3208ed35a24a24d20882e72a9106f3fda8a9dcc52eb0e040661b3ecf18f
+size 3022908

logos/lab.svg ADDED Viewed

md_files/intro.md ADDED Viewed

	@@ -0,0 +1 @@


1	+ This dashboard provides data on the current employment by occupation accross the SSYK Levels, and also highlights the changes in the employment for varied levels of AI exposure.

pyproject.toml ADDED Viewed

	@@ -0,0 +1,17 @@

+[project]
+name = "ai-econ-daioe-years-v2"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.14"
+dependencies = [
+    "faicons>=0.2.2",
+    "great-tables>=0.21.0",
+    "pandas>=3.0.2",
+    "pathlib>=1.0.1",
+    "plotly[express]>=6.7.0",
+    "polars>=1.40.1",
+    "pyarrow>=24.0.0",
+    "shiny[theme]>=1.6.1",
+    "shinywidgets>=0.8.1",
+]

setup.py ADDED Viewed

	@@ -0,0 +1,257 @@

+import importlib.util
+import io
+import re
+from pathlib import Path
+import pandas as pd
+import plotly.graph_objects as go
+import polars as pl
+from great_tables import GT
+from shiny import ui
+# ---------------------------------------------------
+# Mardown Files
+# ------------
+# BASE_DIR = Path(__file__).resolve().parent
+BASE_DIR = Path.cwd()
+INTRO_MD = (BASE_DIR / "md_files" / "intro.md").read_text(encoding="utf-8")
+# ---------------------------------------------------
+# Data Preliminaries
+# ---------------------------------------------------
+DATA_PATH = BASE_DIR / "data" / "daioe_scb_years_processed.parquet"
+lf = pl.scan_parquet(DATA_PATH)
+lf.collect_schema()
+# ---------------------------------------------------
+# Defining Input Values
+# ---------------------------------------------------
+# 1. SSYK12 Levels
+LEVELS = lf.select(pl.col("level").unique().sort()).collect().to_series().to_list()
+def build_choices_by_level(
+    lf: pl.LazyFrame,
+    levels: list[str],
+) -> dict[str, dict[str, str]]:
+    out = {}
+    for lvl in levels:
+        occs = (
+            lf.filter(pl.col("level") == lvl)
+            .select(pl.col("occupation").unique().sort())
+            .collect()
+            .to_series()
+            .to_list()
+        )
+        out[lvl] = {o: o for o in occs}
+    return out
+# 2. Men and Women
+SEXES = lf.select(pl.col("sex").unique().sort()).collect().to_series().to_list()
+# 3. Age groupings
+AGE_ORDER = [
+    "Early Career 1 (16-24)",
+    "Early Career 2 (25-29)",
+    "Developing (30-34)",
+    "Mid-Career 1 (35-39)",
+    "Mid-Career 1 (40-44)",
+    "Mid-Career 2 (45-49)",
+    "Senior (50+)",
+]
+present = lf.select(pl.col("age_group").unique()).collect().to_series().to_list()
+AGES = [x for x in AGE_ORDER if x in present]
+YEARS = lf.select(pl.col("year").unique().sort()).collect().to_series().to_list()
+# 4. Years from the dataset
+YEAR_MIN, YEAR_MAX = min(YEARS), max(YEARS)
+# 5. AI Sub-Indexes
+METRICS: dict[str, str] = {
+    "daioe_genai": "🧠 Generative AI",
+    "daioe_allapps": "📚 All Applications",
+    "daioe_stratgames": "♟️ Strategy Games",
+    "daioe_videogames": "🎮 Video Games (Real-Time)",
+    "daioe_imgrec": "🖼️🔎 Image Recognition",
+    "daioe_imgcompr": "🧩🖼️ Image Comprehension",
+    "daioe_imggen": "🖌️🖼️ Image Generation",
+    "daioe_readcompr": "📖 Reading Comprehension",
+    "daioe_lngmod": "✍️🤖 Language Modeling",
+    "daioe_translat": "🌐🔤 Translation",
+    "daioe_speechrec": "🗣️🎙️ Speech Recognition",
+}
+first_cols = [
+    "level",
+    "ssyk_code",
+    "occupation",
+    "year",
+    "sex",
+    "age",
+    "age_group",
+    "count",
+    "weight_sum",
+    "chg_1y",
+    "chg_3y",
+    "chg_5y",
+    "pct_chg_1y",
+    "pct_chg_3y",
+    "pct_chg_5y",
+]
+# ---------------------------------------------------
+# Shared UI Helpers
+# ---------------------------------------------------
+def apply_plot_style(fig: go.Figure, brand: dict[str, str]) -> go.Figure:
+    """Apply a consistent visual style to Plotly charts."""
+    fig.update_layout(
+        paper_bgcolor="rgba(0,0,0,0)",
+        plot_bgcolor="rgba(0,0,0,0)",
+        font={"family": "Nunito Sans", "color": brand["text"]},
+        hoverlabel={"bgcolor": "white", "font_size": 12},
+        margin={"l": 20, "r": 20, "t": 40, "b": 20},
+    )
+    fig.update_xaxes(gridcolor="#E5E5E5", zeroline=False)
+    fig.update_yaxes(gridcolor="#E5E5E5", zeroline=False)
+    return fig
+def empty_figure(message: str, brand: dict[str, str]) -> go.Figure:
+    """Create a styled empty Plotly figure with a centered message."""
+    fig = go.Figure()
+    fig.add_annotation(text=message, showarrow=False, font_size=16)
+    fig.update_xaxes(visible=False)
+    fig.update_yaxes(visible=False)
+    return apply_plot_style(fig, brand)
+# ---------------------------------------------------
+# Shared Table/Label Helpers
+# ---------------------------------------------------
+def metric_display_name(metric_key: str, metrics: dict[str, str]) -> str:
+    """Return a clean human-readable metric label without leading icons."""
+    label = metrics.get(metric_key, metric_key.replace("_", " ").title())
+    return re.sub(r"^[^A-Za-z0-9]+\s*", "", label).strip()
+def readable_column_name(col: str, metrics: dict[str, str]) -> str:
+    """Convert raw dataset column names into readable table headers."""
+    exact = {
+        "ssyk_code": "SSYK Code",
+        "age_group": "Age Group",
+        "count": "Employees",
+        "year": "Year",
+        "sex": "Sex",
+        "level": "SSYK Level",
+        "occupation": "Occupation",
+        "chg_1y": "1-year Change",
+        "chg_3y": "3-year Change",
+        "chg_5y": "5-year Change",
+    }
+    if col in exact:
+        return exact[col]
+    col_l = col.lower()
+    if col_l.startswith("pctl_") and col_l.endswith("_wavg"):
+        metric_key = col[5:-5]
+        return f"{metric_display_name(metric_key, metrics)} Percentile (Weighted Avg)"
+    if col_l.endswith("_wavg"):
+        metric_key = col[:-5]
+        return f"{metric_display_name(metric_key, metrics)} (Weighted Avg)"
+    if col_l.endswith("_avg"):
+        metric_key = col[:-4]
+        return f"{metric_display_name(metric_key, metrics)} (Average)"
+    if col_l.endswith("_level_exposure"):
+        metric_key = col[: -len("_level_exposure")]
+        return f"{metric_display_name(metric_key, metrics)} Exposure Level"
+    fallback = col.replace("_", " ").title()
+    return (
+        fallback.replace("Ssyk", "SSYK").replace("Ai", "AI").replace("Daioe", "DAIOE")
+    )
+def as_great_table_html(df, metrics: dict[str, str]) -> ui.TagChild:
+    """Render a pandas DataFrame as Great Tables HTML with readable headers."""
+    if df.empty:
+        return ui.p("No data available for the selected filters.")
+    df_display = df.rename(
+        columns={c: readable_column_name(c, metrics) for c in df.columns},
+    )
+    float_cols = [
+        c
+        for c in df_display.columns
+        if c != "Year" and pd.api.types.is_float_dtype(df_display[c])
+    ]
+    gt = (
+        GT(df_display)
+        .opt_row_striping()
+        .tab_options(table_font_names=["Nunito Sans", "Arial", "sans-serif"])
+        .opt_stylize(style=2, color="blue")
+    )
+    if float_cols:
+        gt = gt.fmt_number(columns=float_cols, decimals=2)
+    return ui.HTML(gt.as_raw_html())
+# ---------------------------------------------------
+# Shared Download Helpers
+# ---------------------------------------------------
+def download_extension(fmt: str) -> str:
+    """Map selected download format to its file extension."""
+    return {"csv": "csv", "parquet": "parquet", "excel": "xlsx"}.get(fmt, "csv")
+def download_media_type(fmt: str) -> str:
+    """Return browser media type for each supported download format."""
+    if fmt == "parquet":
+        return "application/octet-stream"
+    if fmt == "excel":
+        return "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+    return "text/csv"
+def export_filtered_data(df, fmt: str) -> str | bytes:
+    """Export a pandas DataFrame to csv/parquet/excel payload for Shiny download."""
+    if fmt == "parquet":
+        return df.to_parquet(index=False)
+    if fmt == "excel":
+        engine = None
+        if importlib.util.find_spec("openpyxl") is not None:
+            engine = "openpyxl"
+        elif importlib.util.find_spec("xlsxwriter") is not None:
+            engine = "xlsxwriter"
+        else:
+            raise RuntimeError("Excel export requires openpyxl or xlsxwriter.")
+        buffer = io.BytesIO()
+        df.to_excel(buffer, index=False, engine=engine)
+        return buffer.getvalue()
+    return df.to_csv(index=False)

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

visuals.py ADDED Viewed

	@@ -0,0 +1,188 @@

+import faicons as fa
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+from shiny import ui
+SCB_SOURCE_MD = (
+    "Source: [Swedish Occupational Register, SCB]"
+    "(https://www.scb.se/en/finding-statistics/statistics-by-subject-area/"
+    "labour-market/labour-force-supply/"
+    "the-swedish-occupational-register-with-statistics/)"
+)
+DAIOE_SOURCE_MD = "Source: [DAIOEs](https://www.ai-econlab.com/ai-exposure-daioe)"
+# Brand colours from _brand.yml
+_C_BG = "rgba(0,0,0,0)"
+_C_GRID = "#E5E5E5"
+_C_TEXT = "#1C2826"  # black
+_C_TITLE = "#0C0A3E"  # primary / blue
+_FONT_BASE = "Nunito Sans"
+_FONT_HEAD = "Montserrat"
+_BASE_LAYOUT = {
+    "paper_bgcolor": _C_BG,
+    "plot_bgcolor": _C_BG,
+    "font": {"family": _FONT_BASE, "color": _C_TEXT, "size": 13},
+    "title_font": {"family": _FONT_HEAD, "color": _C_TITLE, "size": 15},
+    "hoverlabel": {"font": {"family": _FONT_BASE, "size": 12}},
+    "margin": {"l": 20, "r": 20, "t": 45, "b": 20},
+}
+def build_value_boxes(summary: dict, occupation: str) -> ui.Tag:
+    """
+    Build the employment summary value boxes for a given occupation.
+    Returns a div containing a heading, four value boxes (employment, 1/3/5-yr
+    change), and a markdown source note.
+    """
+    def _arrow(v):
+        return "▼" if v < 0 else "▲"
+    def _theme(v):
+        return "danger" if v < 0 else "success"
+    def _fmt_pct(v):
+        return f"{_arrow(v)} {v:.0f}%" if v is not None else "N/A"
+    def _fmt_theme(v):
+        return _theme(v) if v is not None else "secondary"
+    emp = summary["employment"]
+    pct1 = summary["pct_1y"]
+    pct3 = summary["pct_3y"]
+    pct5 = summary["pct_5y"]
+    year = summary["year"]
+    return ui.div(
+        ui.h6(f"National Employment of {occupation}", class_="mt-3 mb-2 fw-semibold"),
+        ui.layout_columns(
+            ui.value_box(
+                title="Employment",
+                showcase=fa.icon_svg("users"),
+                value=f"{emp:,.0f}",
+                theme="primary",
+            ),
+            ui.value_box(
+                title="1-yr change",
+                value=_fmt_pct(pct1),
+                showcase=fa.icon_svg("arrow-trend-up" if pct1 is None or pct1 >= 0 else "arrow-trend-down"),
+                theme=_fmt_theme(pct1),
+            ),
+            ui.value_box(
+                title="3-yr change",
+                value=_fmt_pct(pct3),
+                showcase=fa.icon_svg("arrow-trend-up" if pct3 is None or pct3 >= 0 else "arrow-trend-down"),
+                theme=_fmt_theme(pct3),
+            ),
+            ui.value_box(
+                title="5-yr change",
+                value=_fmt_pct(pct5),
+                showcase=fa.icon_svg("arrow-trend-up" if pct5 is None or pct5 >= 0 else "arrow-trend-down"),
+                theme=_fmt_theme(pct5),
+            ),
+            col_widths=[3, 3, 3, 3],
+        ),
+        ui.markdown(f"Data as at **{year}**.\n\n{SCB_SOURCE_MD}"),
+    )
+def build_age_chart(df: pd.DataFrame, occupation: str) -> go.Figure:
+    """
+    Build a Plotly line chart of 1-yr employment % change by age group over time.
+    Absolute employment count is shown on hover. Returns an empty figure if df is empty.
+    """
+    if df.empty:
+        return go.Figure()
+    fig = px.line(
+        df,
+        x="year",
+        y="pct_chg_1y",
+        color="age_group",
+        markers=True,
+        custom_data=["count"],
+        labels={
+            "year": "Year",
+            "pct_chg_1y": "Employment change (%)",
+            "age_group": "Age Group",
+        },
+    )
+    fig.update_traces(
+        hovertemplate=(
+            "<b>%{fullData.name}</b><br>"
+            "Year: %{x}<br>"
+            "Change: %{y:.1f}%<br>"
+            "Employment: %{customdata[0]:,}<extra></extra>"
+        ),
+    )
+    fig.add_hline(y=0, line_color="grey", line_width=1)
+    fig.update_layout(
+        **_BASE_LAYOUT,
+        title={
+            "text": f"Annual Employment Change of {occupation} in Sweden",
+            "x": 0.01,
+            "xanchor": "left",
+        },
+        legend={"title": None},
+        yaxis={"ticksuffix": "%"},
+    )
+    fig.update_xaxes(gridcolor=_C_GRID, zeroline=False, dtick=1)
+    fig.update_yaxes(gridcolor=_C_GRID, zeroline=False)
+    return fig
+def build_ai_exposure_bar(df: pd.DataFrame, occupation: str, year: int) -> go.Figure:
+    """
+    Build a vertical bar chart of AI exposure level per sub-domain.
+    X-axis: AI sub-domains with emoji labels.
+    Y-axis: exposure level (1=Low, 2=Medium, 3=High).
+    Bar colour intensity driven by the weighted average score.
+    Hover shows exposure level label, index score, and percentile rank.
+    """
+    if df.empty:
+        return go.Figure()
+    fig = go.Figure(
+        go.Bar(
+            x=df["percentile"],
+            y=df["domain"],
+            orientation="h",
+            marker={
+                "color": df["percentile"],
+                "colorscale": "Blues",
+                "colorbar": {"title": "Percentile Rank"},
+                "showscale": True,
+                "cmin": 0,
+                "cmax": 100,
+            },
+            customdata=list(
+                zip(df["level_label"], df["level"], df["score"], strict=False)
+            ),
+            hovertemplate=(
+                "<b>%{y}</b><br>"
+                "Percentile Rank: %{x:.0f}<br>"
+                "Exposure Level: %{customdata[0]} (%{customdata[1]}/5)<br>"
+                "Index Score: %{customdata[2]:.3f}<extra></extra>"
+            ),
+        ),
+    )
+    fig.update_layout(
+        **_BASE_LAYOUT,
+        title={
+            "text": f"{occupation} Level of AI Exposure ({year})",
+            "x": 0.01,
+            "xanchor": "left",
+        },
+        xaxis={"title": "Percentile Rank", "range": [0, 100]},
+        yaxis={"title": None},
+    )
+    fig.update_xaxes(gridcolor=_C_GRID, zeroline=False)
+    fig.update_yaxes(gridcolor=_C_GRID, zeroline=False)
+    return fig