joseph-data commited on
Commit
48e682d
·
verified ·
1 Parent(s): c5e58f4

Sync from GitHub via hub-sync

Browse files
Files changed (14) hide show
  1. .dockerignore +16 -0
  2. .python-version +1 -0
  3. Dockerfile +38 -0
  4. README.md +0 -10
  5. _brand.yml +79 -0
  6. app.py +278 -0
  7. calcs.py +152 -0
  8. data/daioe_scb_years_processed.parquet +3 -0
  9. logos/lab.svg +0 -0
  10. md_files/intro.md +1 -0
  11. pyproject.toml +17 -0
  12. setup.py +257 -0
  13. uv.lock +0 -0
  14. visuals.py +188 -0
.dockerignore ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Virtual environment large, rebuilt inside the image
2
+ .venv/
3
+
4
+ # Python cache
5
+ __pycache__/
6
+ *.pyc
7
+ *.pyo
8
+
9
+ # Dev/scratch files
10
+ app_v*.py
11
+ md_files/
12
+
13
+ # Git
14
+ .git/
15
+ .gitignore
16
+
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.14
Dockerfile ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ------------------------------- Builder Stage ------------------------------ #
2
+ FROM python:3.14-bookworm AS builder
3
+
4
+ # Install uv from official image (faster, reproducible, no curl needed)
5
+ COPY --from=ghcr.io/astral-sh/uv:latest /uv /usr/local/bin/uv
6
+
7
+ ENV UV_PROJECT_ENVIRONMENT=/app/.venv
8
+
9
+ WORKDIR /app
10
+
11
+ # Install deps from lockfile (cache uv downloads for faster rebuilds).
12
+ # This is a flat Shiny app, so only install dependencies, not a package.
13
+ COPY pyproject.toml uv.lock ./
14
+ RUN --mount=type=cache,target=/root/.cache/uv \
15
+ uv sync --frozen --no-dev --no-install-project
16
+
17
+
18
+ ## ------------------------------ Production Stage ---------------------------- ##
19
+ FROM python:3.14-slim-bookworm AS production
20
+
21
+ WORKDIR /app
22
+
23
+ # Environment set-up
24
+ COPY --from=builder /app/.venv /app/.venv
25
+ ENV PATH="/app/.venv/bin:$PATH"
26
+
27
+ # Copy only what the app needs at runtime
28
+ COPY app.py ./app.py
29
+ COPY setup.py ./setup.py
30
+ COPY calcs.py ./calcs.py
31
+ COPY visuals.py ./visuals.py
32
+ COPY data ./data
33
+ COPY logos ./logos
34
+ COPY _brand.yml ./_brand.yml
35
+
36
+ # Requirement for deployment at hf
37
+ EXPOSE 7860
38
+ CMD ["shiny", "run", "app.py", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,10 +0,0 @@
1
- ---
2
- title: Yearly Explorer
3
- emoji: 👀
4
- colorFrom: blue
5
- colorTo: blue
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
_brand.yml ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ─────────────────────────────────────────────
2
+ # _brand.yml — joseph-data
3
+ # ─────────────────────────────────────────────
4
+
5
+ meta:
6
+ name: joseph-data
7
+ link: https://github.com/joseph-data
8
+
9
+ logo:
10
+ images:
11
+ lab: logos/lab.svg
12
+ small: lab
13
+ medium: lab
14
+ large: lab
15
+
16
+ # ─── Color ───────────────────────────────────
17
+ color:
18
+ palette:
19
+ black: "#1C2826"
20
+ blue: "#0C0A3E"
21
+ gray: "#2A2E45"
22
+ neutral: "#F9F7F1"
23
+ red: "#BA274A"
24
+ violet: "#4D6CFA"
25
+
26
+ background: neutral
27
+ foreground: black
28
+ primary: blue
29
+ secondary: violet
30
+ tertiary: gray
31
+ danger: red
32
+ light: neutral
33
+ dark: black
34
+
35
+ # ─── Typography ──────────────────────────────
36
+ typography:
37
+ fonts:
38
+ - family: Nunito Sans
39
+ source: google
40
+ weight: [300, 400, 600]
41
+ style: normal
42
+ - family: Montserrat
43
+ source: google
44
+ weight: [600, 700, 800]
45
+ style: normal
46
+ - family: Fira Code
47
+ source: google
48
+ weight: [400, 500]
49
+ style: normal
50
+
51
+ base:
52
+ family: Nunito Sans
53
+ size: 1rem
54
+ weight: 400
55
+ line-height: 1.5
56
+
57
+ headings:
58
+ family: Montserrat
59
+ weight: 700
60
+ color: primary
61
+ line-height: 1.2
62
+
63
+ monospace:
64
+ family: Fira Code
65
+ size: 0.9em
66
+ weight: 400
67
+
68
+ monospace-inline:
69
+ color: primary
70
+ background-color: light
71
+
72
+ monospace-block:
73
+ color: foreground
74
+ background-color: light
75
+ line-height: 1.6
76
+
77
+ link:
78
+ color: secondary
79
+ decoration: underline
app.py ADDED
@@ -0,0 +1,278 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ import polars as pl
4
+ from shiny import reactive
5
+ from shiny.express import app_opts, input, render, ui
6
+ from shinywidgets import render_plotly
7
+
8
+ app_opts(static_assets={"/logos": Path(__file__).parent / "logos"})
9
+
10
+ import calcs
11
+ import visuals
12
+ from setup import (
13
+ AGES,
14
+ INTRO_MD,
15
+ LEVELS,
16
+ METRICS,
17
+ SEXES,
18
+ YEAR_MAX,
19
+ YEAR_MIN,
20
+ YEARS,
21
+ as_great_table_html,
22
+ build_choices_by_level,
23
+ download_extension,
24
+ download_media_type,
25
+ export_filtered_data,
26
+ lf,
27
+ )
28
+
29
+ LEVEL_LABELS = {
30
+ "SSYK1": "SSYK 1 - Major groups",
31
+ "SSYK2": "SSYK 2 - Minor groups",
32
+ "SSYK3": "SSYK 3 - Unit groups",
33
+ "SSYK4": "SSYK 4 - Detailed units",
34
+ }
35
+ OCCUPATION_CHOICES = build_choices_by_level(lf, LEVELS)
36
+ DEFAULT_LEVEL = "SSYK4" if "SSYK4" in LEVELS else LEVELS[0]
37
+ DEFAULT_OCCUPATION = next(iter(OCCUPATION_CHOICES[DEFAULT_LEVEL]))
38
+
39
+ ui.page_opts(
40
+ title=ui.tags.span(
41
+ ui.tags.img(src="logos/lab.svg", height="32px", style="margin-right:10px;vertical-align:middle;"),
42
+ "Yearly DAIOE Explorer of Swedish Occupations",
43
+ ),
44
+ theme=ui.Theme.from_brand(__file__),
45
+ fillable=True,
46
+ lang="en",
47
+ full_width=True,
48
+ )
49
+
50
+
51
+ @reactive.calc
52
+ def _download_frame():
53
+ """Collect filtered rows for the download tab."""
54
+ occupations = list(input.download_occupation()) if input.download_occupation() else None
55
+ years = input.download_years()
56
+ age = input.download_age()
57
+ sexes = list(input.download_sex())
58
+
59
+ data = lf.filter(
60
+ (pl.col("level") == input.download_level())
61
+ & pl.col("year").is_between(int(years[0]), int(years[1])),
62
+ )
63
+ if sexes:
64
+ data = data.filter(pl.col("sex").is_in(sexes))
65
+ if age != "All":
66
+ data = data.filter(pl.col("age_group") == age)
67
+ if occupations:
68
+ data = data.filter(pl.col("occupation").is_in(occupations))
69
+ return data.collect()
70
+
71
+
72
+ @reactive.calc
73
+ def occ_summary():
74
+ """Reactive wrapper: returns summary dict for the selected occupation and year."""
75
+ return calcs.get_occ_summary(lf, input.occupation(), int(input.occ_year()))
76
+
77
+
78
+
79
+
80
+ @reactive.calc
81
+ def occ_employment_by_age():
82
+ """Reactive wrapper: returns long-format employment by age group for the line chart."""
83
+ return calcs.get_occ_employment_by_age(
84
+ lf,
85
+ input.occupation(),
86
+ (int(input.chart_year_range()[0]), int(input.chart_year_range()[1])),
87
+ list(input.chart_age_groups()),
88
+ )
89
+
90
+
91
+ with ui.navset_pill(id="tab"):
92
+ with ui.nav_panel(title="1. Occupation View"):
93
+ with ui.layout_columns(col_widths=[6, 6]):
94
+ with ui.card(full_screen=True):
95
+ ui.markdown(INTRO_MD)
96
+ with ui.div(class_="d-flex gap-3 align-items-end"):
97
+ ui.input_select(
98
+ "occ_level",
99
+ "SSYK level",
100
+ choices={level: LEVEL_LABELS.get(level, level) for level in LEVELS},
101
+ selected=DEFAULT_LEVEL,
102
+ width="200px",
103
+ )
104
+ ui.input_selectize(
105
+ "occupation",
106
+ "Occupation",
107
+ choices=OCCUPATION_CHOICES[DEFAULT_LEVEL],
108
+ selected=DEFAULT_OCCUPATION,
109
+ )
110
+ ui.input_select(
111
+ "occ_year",
112
+ "Year",
113
+ choices={y: str(y) for y in YEARS},
114
+ selected=YEAR_MAX,
115
+ width="120px",
116
+ )
117
+
118
+ @render.ui
119
+ def occ_value_boxes():
120
+ """Render employment and % change value boxes for the selected occupation."""
121
+ summary = occ_summary()
122
+ if summary is None:
123
+ return ui.p("No data available.")
124
+ return visuals.build_value_boxes(summary, input.occupation())
125
+
126
+ with ui.card(full_screen=True):
127
+ ui.card_header("AI Exposure by Sub-domain")
128
+
129
+ @render_plotly
130
+ def ai_exposure_bar():
131
+ """Render bar chart of AI exposure level per sub-domain, coloured by index score."""
132
+ df = calcs.get_occ_ai_exposure(lf, input.occupation(), int(input.occ_year()))
133
+ return visuals.build_ai_exposure_bar(df.to_pandas(), input.occupation(), int(input.occ_year()))
134
+
135
+ ui.markdown(visuals.DAIOE_SOURCE_MD)
136
+
137
+ with ui.card(full_screen=True):
138
+ ui.card_header("Employment by Age Group")
139
+ with ui.layout_sidebar():
140
+ with ui.sidebar(width="220px"):
141
+ ui.input_slider(
142
+ "chart_year_range",
143
+ "Year range",
144
+ min=min(YEARS),
145
+ max=max(YEARS),
146
+ value=(min(YEARS), max(YEARS)),
147
+ step=1,
148
+ sep="",
149
+ )
150
+ ui.input_selectize(
151
+ "chart_age_groups",
152
+ "Age groups",
153
+ choices=AGES,
154
+ selected=AGES[:2],
155
+ multiple=True,
156
+ )
157
+ @render_plotly
158
+ def occ_age_chart():
159
+ """Render a line chart of 1-yr employment % change per age group."""
160
+ df = occ_employment_by_age()
161
+ return visuals.build_age_chart(df.to_pandas(), input.occupation())
162
+
163
+ ui.markdown(visuals.SCB_SOURCE_MD)
164
+
165
+ with ui.card():
166
+ "Card 4"
167
+
168
+ with ui.nav_panel(title="2. Comparison View"):
169
+ "Panel B content"
170
+
171
+ with ui.nav_panel(title="3. Download"):
172
+ ui.p(
173
+ "Export the filtered row-level dataset or inspect a compact preview before downloading.",
174
+ class_="text-muted mb-3",
175
+ )
176
+ with ui.div(class_="d-flex gap-3 align-items-end flex-wrap mb-3"):
177
+ ui.input_select(
178
+ "download_level",
179
+ "SSYK level",
180
+ choices={level: LEVEL_LABELS.get(level, level) for level in LEVELS},
181
+ selected=DEFAULT_LEVEL,
182
+ width="200px",
183
+ )
184
+ ui.input_slider(
185
+ "download_years",
186
+ "Year range",
187
+ min=YEAR_MIN,
188
+ max=YEAR_MAX,
189
+ value=(YEAR_MIN, YEAR_MAX),
190
+ step=1,
191
+ sep="",
192
+ width="220px",
193
+ )
194
+ ui.input_checkbox_group(
195
+ "download_sex",
196
+ "Sex",
197
+ choices={"men": "Men", "women": "Women"},
198
+ selected=SEXES,
199
+ inline=True,
200
+ )
201
+ ui.input_select(
202
+ "download_age",
203
+ "Age group",
204
+ choices={"All": "All ages"} | {a: a for a in AGES},
205
+ selected="All",
206
+ width="200px",
207
+ )
208
+ ui.input_selectize(
209
+ "download_occupation",
210
+ "Occupations",
211
+ choices=OCCUPATION_CHOICES[DEFAULT_LEVEL],
212
+ multiple=True,
213
+ options={"placeholder": "All occupations"},
214
+ )
215
+ ui.input_select(
216
+ "download_format",
217
+ "Format",
218
+ choices={"csv": "CSV", "parquet": "Parquet", "excel": "Excel"},
219
+ selected="csv",
220
+ width="120px",
221
+ )
222
+
223
+ with ui.layout_columns(col_widths=[3, 9]):
224
+ with ui.value_box(theme="primary"):
225
+ "Rows"
226
+
227
+ @render.text
228
+ def download_rows_count():
229
+ """Show count of rows matching current download filters."""
230
+ return f"{_download_frame().height:,}"
231
+
232
+ with ui.card():
233
+ ui.card_header("Export")
234
+
235
+ @render.download(
236
+ filename=lambda: (
237
+ "daioe_swedish_occupations_"
238
+ f"{__import__('datetime').datetime.now().strftime('%Y-%m-%d')}."
239
+ f"{download_extension(input.download_format())}"
240
+ ),
241
+ media_type=lambda: download_media_type(input.download_format()),
242
+ label="Download filtered data",
243
+ )
244
+ def download_data():
245
+ """Export filtered data in the selected format."""
246
+ return export_filtered_data(
247
+ _download_frame().to_pandas(),
248
+ input.download_format(),
249
+ )
250
+
251
+ with ui.card(full_screen=True):
252
+ ui.card_header("Preview (first 50 rows)")
253
+
254
+ @render.ui
255
+ def download_preview():
256
+ """Render a preview table of the filtered download data."""
257
+ cols = [
258
+ "level", "ssyk_code", "occupation", "year", "sex",
259
+ "age_group", "count", "daioe_genai_wavg",
260
+ "daioe_allapps_wavg", "pct_chg_1y",
261
+ ]
262
+ data = _download_frame().select(cols).head(50).to_pandas()
263
+ return as_great_table_html(data, METRICS)
264
+
265
+
266
+ @reactive.effect
267
+ def _sync_occupation_choices():
268
+ """Update the occupation selectize choices whenever the SSYK level changes."""
269
+ level = input.occ_level()
270
+ choices = OCCUPATION_CHOICES[level]
271
+ ui.update_selectize("occupation", choices=choices, selected=next(iter(choices)))
272
+
273
+
274
+ @reactive.effect
275
+ def _sync_download_occupation_choices():
276
+ """Update the download occupation selectize when the download SSYK level changes."""
277
+ level = input.download_level()
278
+ ui.update_selectize("download_occupation", choices=OCCUPATION_CHOICES[level], selected=[])
calcs.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import polars as pl
2
+
3
+
4
+ def get_occ_summary(lf: pl.LazyFrame, occupation: str, year: int) -> dict | None:
5
+ """
6
+ Aggregate employment count and percentage changes for one occupation and year.
7
+
8
+ Returns a dict with keys: employment, pct_1y, pct_3y, pct_5y, year.
9
+ Returns None if no data matches the filters.
10
+ """
11
+ df = (
12
+ lf.filter(
13
+ (pl.col("occupation") == occupation) & (pl.col("year") == year),
14
+ )
15
+ .select(["count", "pct_chg_1y", "pct_chg_3y", "pct_chg_5y", "year"])
16
+ .collect()
17
+ )
18
+
19
+ if df.is_empty():
20
+ return None
21
+
22
+ def _mean_or_none(col: str) -> float | None:
23
+ val = df[col].mean()
24
+ return None if val is None else float(val)
25
+
26
+ return {
27
+ "employment": df["count"].sum(),
28
+ "pct_1y": _mean_or_none("pct_chg_1y"),
29
+ "pct_3y": _mean_or_none("pct_chg_3y"),
30
+ "pct_5y": _mean_or_none("pct_chg_5y"),
31
+ "year": int(df["year"][0]),
32
+ }
33
+
34
+
35
+ AI_WAVG_COLS = [
36
+ "daioe_genai_wavg",
37
+ "daioe_allapps_wavg",
38
+ "daioe_stratgames_wavg",
39
+ "daioe_videogames_wavg",
40
+ "daioe_imgrec_wavg",
41
+ "daioe_imgcompr_wavg",
42
+ "daioe_imggen_wavg",
43
+ "daioe_readcompr_wavg",
44
+ "daioe_lngmod_wavg",
45
+ "daioe_translat_wavg",
46
+ "daioe_speechrec_wavg",
47
+ ]
48
+
49
+ AI_LABELS = {
50
+ "daioe_genai_wavg": "🧠 Generative AI",
51
+ "daioe_allapps_wavg": "📚 All Applications",
52
+ "daioe_stratgames_wavg": "♟️ Strategy Games",
53
+ "daioe_videogames_wavg": "🎮 Video Games",
54
+ "daioe_imgrec_wavg": "🖼️ Image Recognition",
55
+ "daioe_imgcompr_wavg": "🧩 Image Comprehension",
56
+ "daioe_imggen_wavg": "🎨 Image Generation",
57
+ "daioe_readcompr_wavg": "📖 Reading Comprehension",
58
+ "daioe_lngmod_wavg": "✍️ Language Modeling",
59
+ "daioe_translat_wavg": "🌐 Translation",
60
+ "daioe_speechrec_wavg": "🎙️ Speech Recognition",
61
+ }
62
+
63
+
64
+ AI_LEVEL_COLS = [c.replace("_wavg", "_Level_Exposure") for c in AI_WAVG_COLS]
65
+ AI_PCTL_COLS = [f"pctl_{c}" for c in AI_WAVG_COLS]
66
+
67
+ EXPOSURE_LABELS = {1: "Very Low", 2: "Low", 3: "Medium", 4: "High", 5: "Very High"}
68
+
69
+
70
+ def get_occ_ai_exposure(
71
+ lf: pl.LazyFrame, occupation: str, year: int,
72
+ ) -> pl.DataFrame:
73
+ """
74
+ Return mean weighted AI exposure scores, exposure levels, and percentile ranks per sub-domain.
75
+
76
+ Returns a long-format DataFrame with columns: domain, score, level, level_label, percentile.
77
+ Used to power the ranked horizontal bar chart in Card 2.
78
+ """
79
+ select_cols = AI_WAVG_COLS + AI_LEVEL_COLS + AI_PCTL_COLS
80
+ df = (
81
+ lf.filter(
82
+ (pl.col("occupation") == occupation) & (pl.col("year") == year),
83
+ )
84
+ .select(select_cols)
85
+ .collect()
86
+ )
87
+
88
+ rows = []
89
+ for wavg_col, level_col, pctl_col in zip(AI_WAVG_COLS, AI_LEVEL_COLS, AI_PCTL_COLS, strict=False):
90
+ raw_level = df[level_col].mean()
91
+ level_val = round(raw_level) if raw_level is not None else None
92
+ rows.append({
93
+ "domain": AI_LABELS[wavg_col],
94
+ "score": df[wavg_col].mean(),
95
+ "level": level_val,
96
+ "level_label": EXPOSURE_LABELS.get(level_val, "Unknown") if level_val else "Unknown",
97
+ "percentile": df[pctl_col].mean(),
98
+ })
99
+ return pl.DataFrame(rows).sort("score")
100
+
101
+
102
+ def get_occ_ai_trend(
103
+ lf: pl.LazyFrame, occupation: str, year_range: tuple[int, int],
104
+ ) -> pl.DataFrame:
105
+ """
106
+ Return yearly mean weighted AI exposure (All Applications) for one occupation over a year range.
107
+
108
+ Returns a DataFrame with columns: year, daioe_allapps_wavg.
109
+ Used to power the trend line in Card 2.
110
+ """
111
+ year_min, year_max = year_range
112
+ return (
113
+ lf.filter(
114
+ (pl.col("occupation") == occupation)
115
+ & (pl.col("year") >= year_min)
116
+ & (pl.col("year") <= year_max),
117
+ )
118
+ .group_by("year")
119
+ .agg(pl.col("daioe_allapps_wavg").mean())
120
+ .sort("year")
121
+ .collect()
122
+ )
123
+
124
+
125
+ def get_occ_employment_by_age(
126
+ lf: pl.LazyFrame,
127
+ occupation: str,
128
+ year_range: tuple[int, int],
129
+ age_groups: list[str],
130
+ ) -> pl.DataFrame:
131
+ """
132
+ Return yearly employment counts per age group for a given occupation and year range.
133
+
134
+ Used to power the employment change line chart in Card 3.
135
+ Returns a long-format DataFrame with columns: year, age_group, count.
136
+ """
137
+ year_min, year_max = year_range
138
+ return (
139
+ lf.filter(
140
+ (pl.col("occupation") == occupation)
141
+ & (pl.col("year") >= year_min)
142
+ & (pl.col("year") <= year_max)
143
+ & (pl.col("age_group").is_in(age_groups)),
144
+ )
145
+ .group_by(["year", "age_group"])
146
+ .agg([
147
+ pl.col("count").sum(),
148
+ pl.col("pct_chg_1y").mean(),
149
+ ])
150
+ .sort(["age_group", "year"])
151
+ .collect()
152
+ )
data/daioe_scb_years_processed.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1ecf3208ed35a24a24d20882e72a9106f3fda8a9dcc52eb0e040661b3ecf18f
3
+ size 3022908
logos/lab.svg ADDED
md_files/intro.md ADDED
@@ -0,0 +1 @@
 
 
1
+ This dashboard provides data on the current employment by occupation accross the SSYK Levels, and also highlights the changes in the employment for varied levels of AI exposure.
pyproject.toml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "ai-econ-daioe-years-v2"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.14"
7
+ dependencies = [
8
+ "faicons>=0.2.2",
9
+ "great-tables>=0.21.0",
10
+ "pandas>=3.0.2",
11
+ "pathlib>=1.0.1",
12
+ "plotly[express]>=6.7.0",
13
+ "polars>=1.40.1",
14
+ "pyarrow>=24.0.0",
15
+ "shiny[theme]>=1.6.1",
16
+ "shinywidgets>=0.8.1",
17
+ ]
setup.py ADDED
@@ -0,0 +1,257 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import importlib.util
2
+ import io
3
+ import re
4
+ from pathlib import Path
5
+
6
+ import pandas as pd
7
+ import plotly.graph_objects as go
8
+ import polars as pl
9
+ from great_tables import GT
10
+ from shiny import ui
11
+
12
+ # ---------------------------------------------------
13
+ # Mardown Files
14
+ # ------------
15
+ # BASE_DIR = Path(__file__).resolve().parent
16
+
17
+ BASE_DIR = Path.cwd()
18
+
19
+ INTRO_MD = (BASE_DIR / "md_files" / "intro.md").read_text(encoding="utf-8")
20
+
21
+
22
+ # ---------------------------------------------------
23
+ # Data Preliminaries
24
+ # ---------------------------------------------------
25
+
26
+ DATA_PATH = BASE_DIR / "data" / "daioe_scb_years_processed.parquet"
27
+
28
+ lf = pl.scan_parquet(DATA_PATH)
29
+
30
+ lf.collect_schema()
31
+
32
+
33
+ # ---------------------------------------------------
34
+ # Defining Input Values
35
+ # ---------------------------------------------------
36
+
37
+ # 1. SSYK12 Levels
38
+
39
+ LEVELS = lf.select(pl.col("level").unique().sort()).collect().to_series().to_list()
40
+
41
+
42
+ def build_choices_by_level(
43
+ lf: pl.LazyFrame,
44
+ levels: list[str],
45
+ ) -> dict[str, dict[str, str]]:
46
+ out = {}
47
+ for lvl in levels:
48
+ occs = (
49
+ lf.filter(pl.col("level") == lvl)
50
+ .select(pl.col("occupation").unique().sort())
51
+ .collect()
52
+ .to_series()
53
+ .to_list()
54
+ )
55
+ out[lvl] = {o: o for o in occs}
56
+ return out
57
+
58
+
59
+ # 2. Men and Women
60
+
61
+ SEXES = lf.select(pl.col("sex").unique().sort()).collect().to_series().to_list()
62
+
63
+ # 3. Age groupings
64
+
65
+ AGE_ORDER = [
66
+ "Early Career 1 (16-24)",
67
+ "Early Career 2 (25-29)",
68
+ "Developing (30-34)",
69
+ "Mid-Career 1 (35-39)",
70
+ "Mid-Career 1 (40-44)",
71
+ "Mid-Career 2 (45-49)",
72
+ "Senior (50+)",
73
+ ]
74
+
75
+ present = lf.select(pl.col("age_group").unique()).collect().to_series().to_list()
76
+
77
+ AGES = [x for x in AGE_ORDER if x in present]
78
+
79
+
80
+ YEARS = lf.select(pl.col("year").unique().sort()).collect().to_series().to_list()
81
+
82
+ # 4. Years from the dataset
83
+
84
+ YEAR_MIN, YEAR_MAX = min(YEARS), max(YEARS)
85
+
86
+ # 5. AI Sub-Indexes
87
+
88
+ METRICS: dict[str, str] = {
89
+ "daioe_genai": "🧠 Generative AI",
90
+ "daioe_allapps": "📚 All Applications",
91
+ "daioe_stratgames": "♟️ Strategy Games",
92
+ "daioe_videogames": "🎮 Video Games (Real-Time)",
93
+ "daioe_imgrec": "🖼️🔎 Image Recognition",
94
+ "daioe_imgcompr": "🧩🖼️ Image Comprehension",
95
+ "daioe_imggen": "🖌️🖼️ Image Generation",
96
+ "daioe_readcompr": "📖 Reading Comprehension",
97
+ "daioe_lngmod": "✍️🤖 Language Modeling",
98
+ "daioe_translat": "🌐🔤 Translation",
99
+ "daioe_speechrec": "🗣️🎙️ Speech Recognition",
100
+ }
101
+
102
+
103
+ first_cols = [
104
+ "level",
105
+ "ssyk_code",
106
+ "occupation",
107
+ "year",
108
+ "sex",
109
+ "age",
110
+ "age_group",
111
+ "count",
112
+ "weight_sum",
113
+ "chg_1y",
114
+ "chg_3y",
115
+ "chg_5y",
116
+ "pct_chg_1y",
117
+ "pct_chg_3y",
118
+ "pct_chg_5y",
119
+ ]
120
+
121
+
122
+ # ---------------------------------------------------
123
+ # Shared UI Helpers
124
+ # ---------------------------------------------------
125
+ def apply_plot_style(fig: go.Figure, brand: dict[str, str]) -> go.Figure:
126
+ """Apply a consistent visual style to Plotly charts."""
127
+ fig.update_layout(
128
+ paper_bgcolor="rgba(0,0,0,0)",
129
+ plot_bgcolor="rgba(0,0,0,0)",
130
+ font={"family": "Nunito Sans", "color": brand["text"]},
131
+ hoverlabel={"bgcolor": "white", "font_size": 12},
132
+ margin={"l": 20, "r": 20, "t": 40, "b": 20},
133
+ )
134
+ fig.update_xaxes(gridcolor="#E5E5E5", zeroline=False)
135
+ fig.update_yaxes(gridcolor="#E5E5E5", zeroline=False)
136
+ return fig
137
+
138
+
139
+ def empty_figure(message: str, brand: dict[str, str]) -> go.Figure:
140
+ """Create a styled empty Plotly figure with a centered message."""
141
+ fig = go.Figure()
142
+ fig.add_annotation(text=message, showarrow=False, font_size=16)
143
+ fig.update_xaxes(visible=False)
144
+ fig.update_yaxes(visible=False)
145
+ return apply_plot_style(fig, brand)
146
+
147
+
148
+ # ---------------------------------------------------
149
+ # Shared Table/Label Helpers
150
+ # ---------------------------------------------------
151
+ def metric_display_name(metric_key: str, metrics: dict[str, str]) -> str:
152
+ """Return a clean human-readable metric label without leading icons."""
153
+ label = metrics.get(metric_key, metric_key.replace("_", " ").title())
154
+ return re.sub(r"^[^A-Za-z0-9]+\s*", "", label).strip()
155
+
156
+
157
+ def readable_column_name(col: str, metrics: dict[str, str]) -> str:
158
+ """Convert raw dataset column names into readable table headers."""
159
+ exact = {
160
+ "ssyk_code": "SSYK Code",
161
+ "age_group": "Age Group",
162
+ "count": "Employees",
163
+ "year": "Year",
164
+ "sex": "Sex",
165
+ "level": "SSYK Level",
166
+ "occupation": "Occupation",
167
+ "chg_1y": "1-year Change",
168
+ "chg_3y": "3-year Change",
169
+ "chg_5y": "5-year Change",
170
+ }
171
+ if col in exact:
172
+ return exact[col]
173
+
174
+ col_l = col.lower()
175
+ if col_l.startswith("pctl_") and col_l.endswith("_wavg"):
176
+ metric_key = col[5:-5]
177
+ return f"{metric_display_name(metric_key, metrics)} Percentile (Weighted Avg)"
178
+ if col_l.endswith("_wavg"):
179
+ metric_key = col[:-5]
180
+ return f"{metric_display_name(metric_key, metrics)} (Weighted Avg)"
181
+ if col_l.endswith("_avg"):
182
+ metric_key = col[:-4]
183
+ return f"{metric_display_name(metric_key, metrics)} (Average)"
184
+ if col_l.endswith("_level_exposure"):
185
+ metric_key = col[: -len("_level_exposure")]
186
+ return f"{metric_display_name(metric_key, metrics)} Exposure Level"
187
+
188
+ fallback = col.replace("_", " ").title()
189
+ return (
190
+ fallback.replace("Ssyk", "SSYK").replace("Ai", "AI").replace("Daioe", "DAIOE")
191
+ )
192
+
193
+
194
+ def as_great_table_html(df, metrics: dict[str, str]) -> ui.TagChild:
195
+ """Render a pandas DataFrame as Great Tables HTML with readable headers."""
196
+ if df.empty:
197
+ return ui.p("No data available for the selected filters.")
198
+
199
+ df_display = df.rename(
200
+ columns={c: readable_column_name(c, metrics) for c in df.columns},
201
+ )
202
+
203
+ float_cols = [
204
+ c
205
+ for c in df_display.columns
206
+ if c != "Year" and pd.api.types.is_float_dtype(df_display[c])
207
+ ]
208
+
209
+ gt = (
210
+ GT(df_display)
211
+ .opt_row_striping()
212
+ .tab_options(table_font_names=["Nunito Sans", "Arial", "sans-serif"])
213
+ .opt_stylize(style=2, color="blue")
214
+ )
215
+
216
+ if float_cols:
217
+ gt = gt.fmt_number(columns=float_cols, decimals=2)
218
+
219
+ return ui.HTML(gt.as_raw_html())
220
+
221
+
222
+ # ---------------------------------------------------
223
+ # Shared Download Helpers
224
+ # ---------------------------------------------------
225
+ def download_extension(fmt: str) -> str:
226
+ """Map selected download format to its file extension."""
227
+ return {"csv": "csv", "parquet": "parquet", "excel": "xlsx"}.get(fmt, "csv")
228
+
229
+
230
+ def download_media_type(fmt: str) -> str:
231
+ """Return browser media type for each supported download format."""
232
+ if fmt == "parquet":
233
+ return "application/octet-stream"
234
+ if fmt == "excel":
235
+ return "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
236
+ return "text/csv"
237
+
238
+
239
+ def export_filtered_data(df, fmt: str) -> str | bytes:
240
+ """Export a pandas DataFrame to csv/parquet/excel payload for Shiny download."""
241
+ if fmt == "parquet":
242
+ return df.to_parquet(index=False)
243
+
244
+ if fmt == "excel":
245
+ engine = None
246
+ if importlib.util.find_spec("openpyxl") is not None:
247
+ engine = "openpyxl"
248
+ elif importlib.util.find_spec("xlsxwriter") is not None:
249
+ engine = "xlsxwriter"
250
+ else:
251
+ raise RuntimeError("Excel export requires openpyxl or xlsxwriter.")
252
+
253
+ buffer = io.BytesIO()
254
+ df.to_excel(buffer, index=False, engine=engine)
255
+ return buffer.getvalue()
256
+
257
+ return df.to_csv(index=False)
uv.lock ADDED
The diff for this file is too large to render. See raw diff
 
visuals.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import faicons as fa
2
+ import pandas as pd
3
+ import plotly.express as px
4
+ import plotly.graph_objects as go
5
+ from shiny import ui
6
+
7
+ SCB_SOURCE_MD = (
8
+ "Source: [Swedish Occupational Register, SCB]"
9
+ "(https://www.scb.se/en/finding-statistics/statistics-by-subject-area/"
10
+ "labour-market/labour-force-supply/"
11
+ "the-swedish-occupational-register-with-statistics/)"
12
+ )
13
+
14
+ DAIOE_SOURCE_MD = "Source: [DAIOEs](https://www.ai-econlab.com/ai-exposure-daioe)"
15
+
16
+ # Brand colours from _brand.yml
17
+ _C_BG = "rgba(0,0,0,0)"
18
+ _C_GRID = "#E5E5E5"
19
+ _C_TEXT = "#1C2826" # black
20
+ _C_TITLE = "#0C0A3E" # primary / blue
21
+
22
+ _FONT_BASE = "Nunito Sans"
23
+ _FONT_HEAD = "Montserrat"
24
+
25
+ _BASE_LAYOUT = {
26
+ "paper_bgcolor": _C_BG,
27
+ "plot_bgcolor": _C_BG,
28
+ "font": {"family": _FONT_BASE, "color": _C_TEXT, "size": 13},
29
+ "title_font": {"family": _FONT_HEAD, "color": _C_TITLE, "size": 15},
30
+ "hoverlabel": {"font": {"family": _FONT_BASE, "size": 12}},
31
+ "margin": {"l": 20, "r": 20, "t": 45, "b": 20},
32
+ }
33
+
34
+
35
+ def build_value_boxes(summary: dict, occupation: str) -> ui.Tag:
36
+ """
37
+ Build the employment summary value boxes for a given occupation.
38
+
39
+ Returns a div containing a heading, four value boxes (employment, 1/3/5-yr
40
+ change), and a markdown source note.
41
+ """
42
+
43
+ def _arrow(v):
44
+ return "▼" if v < 0 else "▲"
45
+
46
+ def _theme(v):
47
+ return "danger" if v < 0 else "success"
48
+
49
+ def _fmt_pct(v):
50
+ return f"{_arrow(v)} {v:.0f}%" if v is not None else "N/A"
51
+
52
+ def _fmt_theme(v):
53
+ return _theme(v) if v is not None else "secondary"
54
+
55
+ emp = summary["employment"]
56
+ pct1 = summary["pct_1y"]
57
+ pct3 = summary["pct_3y"]
58
+ pct5 = summary["pct_5y"]
59
+ year = summary["year"]
60
+
61
+ return ui.div(
62
+ ui.h6(f"National Employment of {occupation}", class_="mt-3 mb-2 fw-semibold"),
63
+ ui.layout_columns(
64
+ ui.value_box(
65
+ title="Employment",
66
+ showcase=fa.icon_svg("users"),
67
+ value=f"{emp:,.0f}",
68
+ theme="primary",
69
+ ),
70
+ ui.value_box(
71
+ title="1-yr change",
72
+ value=_fmt_pct(pct1),
73
+ showcase=fa.icon_svg("arrow-trend-up" if pct1 is None or pct1 >= 0 else "arrow-trend-down"),
74
+ theme=_fmt_theme(pct1),
75
+ ),
76
+ ui.value_box(
77
+ title="3-yr change",
78
+ value=_fmt_pct(pct3),
79
+ showcase=fa.icon_svg("arrow-trend-up" if pct3 is None or pct3 >= 0 else "arrow-trend-down"),
80
+ theme=_fmt_theme(pct3),
81
+ ),
82
+ ui.value_box(
83
+ title="5-yr change",
84
+ value=_fmt_pct(pct5),
85
+ showcase=fa.icon_svg("arrow-trend-up" if pct5 is None or pct5 >= 0 else "arrow-trend-down"),
86
+ theme=_fmt_theme(pct5),
87
+ ),
88
+ col_widths=[3, 3, 3, 3],
89
+ ),
90
+ ui.markdown(f"Data as at **{year}**.\n\n{SCB_SOURCE_MD}"),
91
+ )
92
+
93
+
94
+ def build_age_chart(df: pd.DataFrame, occupation: str) -> go.Figure:
95
+ """
96
+ Build a Plotly line chart of 1-yr employment % change by age group over time.
97
+
98
+ Absolute employment count is shown on hover. Returns an empty figure if df is empty.
99
+ """
100
+ if df.empty:
101
+ return go.Figure()
102
+
103
+ fig = px.line(
104
+ df,
105
+ x="year",
106
+ y="pct_chg_1y",
107
+ color="age_group",
108
+ markers=True,
109
+ custom_data=["count"],
110
+ labels={
111
+ "year": "Year",
112
+ "pct_chg_1y": "Employment change (%)",
113
+ "age_group": "Age Group",
114
+ },
115
+ )
116
+ fig.update_traces(
117
+ hovertemplate=(
118
+ "<b>%{fullData.name}</b><br>"
119
+ "Year: %{x}<br>"
120
+ "Change: %{y:.1f}%<br>"
121
+ "Employment: %{customdata[0]:,}<extra></extra>"
122
+ ),
123
+ )
124
+ fig.add_hline(y=0, line_color="grey", line_width=1)
125
+ fig.update_layout(
126
+ **_BASE_LAYOUT,
127
+ title={
128
+ "text": f"Annual Employment Change of {occupation} in Sweden",
129
+ "x": 0.01,
130
+ "xanchor": "left",
131
+ },
132
+ legend={"title": None},
133
+ yaxis={"ticksuffix": "%"},
134
+ )
135
+ fig.update_xaxes(gridcolor=_C_GRID, zeroline=False, dtick=1)
136
+ fig.update_yaxes(gridcolor=_C_GRID, zeroline=False)
137
+ return fig
138
+
139
+
140
+ def build_ai_exposure_bar(df: pd.DataFrame, occupation: str, year: int) -> go.Figure:
141
+ """
142
+ Build a vertical bar chart of AI exposure level per sub-domain.
143
+
144
+ X-axis: AI sub-domains with emoji labels.
145
+ Y-axis: exposure level (1=Low, 2=Medium, 3=High).
146
+ Bar colour intensity driven by the weighted average score.
147
+ Hover shows exposure level label, index score, and percentile rank.
148
+ """
149
+ if df.empty:
150
+ return go.Figure()
151
+
152
+ fig = go.Figure(
153
+ go.Bar(
154
+ x=df["percentile"],
155
+ y=df["domain"],
156
+ orientation="h",
157
+ marker={
158
+ "color": df["percentile"],
159
+ "colorscale": "Blues",
160
+ "colorbar": {"title": "Percentile Rank"},
161
+ "showscale": True,
162
+ "cmin": 0,
163
+ "cmax": 100,
164
+ },
165
+ customdata=list(
166
+ zip(df["level_label"], df["level"], df["score"], strict=False)
167
+ ),
168
+ hovertemplate=(
169
+ "<b>%{y}</b><br>"
170
+ "Percentile Rank: %{x:.0f}<br>"
171
+ "Exposure Level: %{customdata[0]} (%{customdata[1]}/5)<br>"
172
+ "Index Score: %{customdata[2]:.3f}<extra></extra>"
173
+ ),
174
+ ),
175
+ )
176
+ fig.update_layout(
177
+ **_BASE_LAYOUT,
178
+ title={
179
+ "text": f"{occupation} Level of AI Exposure ({year})",
180
+ "x": 0.01,
181
+ "xanchor": "left",
182
+ },
183
+ xaxis={"title": "Percentile Rank", "range": [0, 100]},
184
+ yaxis={"title": None},
185
+ )
186
+ fig.update_xaxes(gridcolor=_C_GRID, zeroline=False)
187
+ fig.update_yaxes(gridcolor=_C_GRID, zeroline=False)
188
+ return fig