Spaces:

joseph-data
/

app_months

Running

App Files Files Community

joseph-data commited on 15 days ago

Commit

e4ffd46

verified ·

1 Parent(s): d7ca74c

Sync from GitHub via hub-sync

Browse files

Files changed (4) hide show

.dockerignore +3 -0
Dockerfile +0 -2
app.py +164 -42
data/scb_months_lvl1.parquet +2 -2

.dockerignore CHANGED Viewed

@@ -56,6 +56,9 @@ Thumbs.db
 # Frontend dependency caches (if present)
 node_modules/
 # Local cache / output files
 data/*
 !data/.gitkeep

 # Frontend dependency caches (if present)
 node_modules/
+# Project-specific cruft
+_brand.yml
 # Local cache / output files
 data/*
 !data/.gitkeep

Dockerfile CHANGED Viewed

@@ -34,9 +34,7 @@ ENV PATH="/app/.venv/bin:$PATH"
 # Copy only what the app needs at runtime
 COPY app.py ./app.py
-COPY _brand.yml ./_brand.yml
 COPY data ./data
-COPY logos ./logos
 # Requirement for deployment at hf
 EXPOSE 7860

 # Copy only what the app needs at runtime
 COPY app.py ./app.py
 COPY data ./data
 # Requirement for deployment at hf
 EXPOSE 7860

app.py CHANGED Viewed

@@ -12,6 +12,48 @@ MIN_POINTS_FOR_TRENDLINE = 2
 DATA_PATH = Path(__file__).parent / "data" / "scb_months_lvl1.parquet"
 LOGOS_PATH = Path(__file__).parent / "logos"
 # --- Data Loading ---
 def load_data():
@@ -22,11 +64,9 @@ def load_data():
 df_full = load_data()
-# Identify metric columns
 daioe_metrics = [
     col for col in df_full.columns if col.startswith("daioe_") and col.endswith("_wavg")
 ]
-change_metrics = ["pct_chg_1m", "pct_chg_3m", "pct_chg_6m"]
 sexes = df_full["sex"].unique().to_list() if not df_full.is_empty() else []
 years = sorted(df_full["year"].unique().to_list()) if not df_full.is_empty() else []
 occupations = (
@@ -35,7 +75,22 @@ occupations = (
     else []
 )
-# --- Page Options ---
 app_opts(static_assets={"/logos": LOGOS_PATH})
 ui.page_opts(
@@ -50,7 +105,6 @@ ui.tags.style("""
     justify-content: center;
     margin: 0.25rem 0 1rem;
 }
 .app-logo {
     width: min(180px, 80%);
     height: auto;
@@ -58,33 +112,28 @@ ui.tags.style("""
 }
 """)
 # --- Sidebar ---
-with ui.sidebar():
     ui.div(
         ui.img(src="/logos/lab.svg", alt="AI-Econ Lab logo", class_="app-logo"),
         class_="app-logo-wrap",
     )
     ui.input_select(
         "ai_metric",
-        "Select AI Exposure Metric (Weighted Avg)",
-        choices={
-            m: m.replace("daioe_", "").replace("_wavg", "").title()
-            for m in daioe_metrics
-        },
-        selected=daioe_metrics[-1] if daioe_metrics else None,
     )
     ui.input_select(
         "change_horizon",
-        "Select Employment Change Horizon",
-        choices={
-            m: m.replace("pct_chg_", "").replace("m", " Month").title()
-            for m in change_metrics
-        },
         selected="pct_chg_3m",
     )
     ui.input_slider(
         "year_filter",
-        "Filter by Year",
         min=min(years) if years else 2015,
         max=max(years) if years else 2026,
         value=[min(years), max(years)] if years else [2015, 2026],
@@ -92,21 +141,25 @@ with ui.sidebar():
     )
     ui.input_checkbox_group(
         "sex_filter",
-        "Filter by Sex",
         choices=sexes,
         selected=sexes,
     )
     ui.input_selectize(
         "occ_filter",
-        "Filter by Occupation (Leave blank for all)",
         choices=occupations,
         multiple=True,
     )
     ui.hr()
     ui.markdown("""
-    **About this Dashboard**
-    This app visualizes the relationship between AI Occupational Exposure (DAIOE)
-    and monthly employment changes in Sweden.
     """)
@@ -128,70 +181,139 @@ def filtered_df():
     return df
-# --- Main Layout ---
 with ui.layout_columns(fill=False):
     with ui.value_box(theme="primary"):
-        "Avg Exposure"
         @render.text
         def avg_exposure():
             df = filtered_df()
             if df.is_empty():
-                return "0.0"
             val = df[app_input.ai_metric()].mean()
-            return f"{val:.2f}"
     with ui.value_box(theme="secondary"):
-        "Median % Change"
         @render.text
         def median_change():
             df = filtered_df()
             if df.is_empty():
-                return "0.0%"
             val = df[app_input.change_horizon()].median()
             return f"{val:+.2f}%"
     with ui.value_box(theme="info"):
-        "Observation Count"
         @render.text
         def obs_count():
             return f"{len(filtered_df()):,}"
 with ui.card(full_screen=True):
-    ui.card_header("AI Exposure vs. Employment Change")
     @render_widget
     def scatter_plot():
         df = filtered_df().to_pandas()
         if df.empty:
-            return px.scatter(title="No data available for selected filters")
         fig = px.scatter(
             df,
-            x=app_input.ai_metric(),
-            y=app_input.change_horizon(),
             color="occupation",
             size="emp_count" if "emp_count" in df.columns else None,
-            hover_data=["month", "sex", "emp_count"],
             labels={
-                app_input.ai_metric(): "AI Exposure Score",
-                app_input.change_horizon(): "% Change in Employment",
             },
             template="plotly_white",
-            opacity=0.7,
-            trendline="ols" if len(df) > MIN_POINTS_FOR_TRENDLINE else None,
-            trendline_scope="overall" if len(df) > MIN_POINTS_FOR_TRENDLINE else None,
         )
-        fig.update_layout(legend_title_text="Occupation")
         return fig
 with ui.card(full_screen=True):
-    ui.card_header("Filtered Data Table")
     @render.data_frame
     def data_table():
-        return render.DataGrid(filtered_df().to_pandas())

 DATA_PATH = Path(__file__).parent / "data" / "scb_months_lvl1.parquet"
 LOGOS_PATH = Path(__file__).parent / "logos"
+# Brand-aligned color sequence for occupation dots
+BRAND_COLORS = [
+    "#4D6CFA",  # violet (primary accent)
+    "#BA274A",  # red
+    "#5BC0BE",  # teal
+    "#F9A03F",  # amber
+    "#8B5CF6",  # purple
+    "#0C0A3E",  # deep blue
+    "#E8A838",  # gold
+    "#6B9BC3",  # steel blue
+    "#2A2E45",  # gray-blue
+]
+# Human-readable labels for DAIOE weighted-average metrics
+METRIC_LABELS = {
+    "daioe_allapps_wavg":    "All AI Applications",
+    "daioe_stratgames_wavg": "Strategic Games",
+    "daioe_videogames_wavg": "Video Games",
+    "daioe_imgrec_wavg":     "Image Recognition",
+    "daioe_imgcompr_wavg":   "Image Compression",
+    "daioe_imggen_wavg":     "Image Generation",
+    "daioe_readcompr_wavg":  "Reading Comprehension",
+    "daioe_lngmod_wavg":     "Language Models",
+    "daioe_translat_wavg":   "Translation",
+    "daioe_speechrec_wavg":  "Speech Recognition",
+    "daioe_genai_wavg":      "Generative AI",
+}
+HORIZON_LABELS = {
+    "pct_chg_1m": "1 Month",
+    "pct_chg_3m": "3 Months",
+    "pct_chg_6m": "6 Months",
+}
+# Columns shown in the data table — prioritise the selected metric & horizon,
+# then a curated set of DAIOE weighted averages (avoids dumping all 68 cols).
+TABLE_BASE_COLS = [
+    "year", "month", "sex", "occupation", "emp_count",
+    "pct_chg_1m", "pct_chg_3m", "pct_chg_6m",
+]
+TABLE_DAIOE_COLS = list(METRIC_LABELS.keys())
 # --- Data Loading ---
 def load_data():
 df_full = load_data()
 daioe_metrics = [
     col for col in df_full.columns if col.startswith("daioe_") and col.endswith("_wavg")
 ]
 sexes = df_full["sex"].unique().to_list() if not df_full.is_empty() else []
 years = sorted(df_full["year"].unique().to_list()) if not df_full.is_empty() else []
 occupations = (
     else []
 )
+# Build metric choice dict — fall back gracefully for any unmapped columns
+metric_choices = {
+    m: METRIC_LABELS.get(
+        m,
+        m.replace("daioe_", "").replace("_wavg", "").replace("_", " ").title(),
+    )
+    for m in daioe_metrics
+}
+default_metric = (
+    "daioe_allapps_wavg" if "daioe_allapps_wavg" in daioe_metrics
+    else (daioe_metrics[-1] if daioe_metrics else None)
+)
+# --- Page Setup ---
 app_opts(static_assets={"/logos": LOGOS_PATH})
 ui.page_opts(
     justify-content: center;
     margin: 0.25rem 0 1rem;
 }
 .app-logo {
     width: min(180px, 80%);
     height: auto;
 }
 """)
 # --- Sidebar ---
+with ui.sidebar(title="Filters"):
     ui.div(
         ui.img(src="/logos/lab.svg", alt="AI-Econ Lab logo", class_="app-logo"),
         class_="app-logo-wrap",
     )
     ui.input_select(
         "ai_metric",
+        "AI Exposure Metric",
+        choices=metric_choices,
+        selected=default_metric,
     )
     ui.input_select(
         "change_horizon",
+        "Employment Change Horizon",
+        choices=HORIZON_LABELS,
         selected="pct_chg_3m",
     )
     ui.input_slider(
         "year_filter",
+        "Year Range",
         min=min(years) if years else 2015,
         max=max(years) if years else 2026,
         value=[min(years), max(years)] if years else [2015, 2026],
     )
     ui.input_checkbox_group(
         "sex_filter",
+        "Sex",
         choices=sexes,
         selected=sexes,
     )
     ui.input_selectize(
         "occ_filter",
+        "Occupation (blank = all)",
         choices=occupations,
         multiple=True,
     )
     ui.hr()
     ui.markdown("""
+    **About**
+    This dashboard visualizes the relationship between AI Occupational Exposure
+    (DAIOE) and employment changes across Swedish occupational categories.
+    Data: [Statistics Sweden (SCB)](https://www.scb.se) &
+    DAIOE scores via the AI-Econ Lab.
     """)
     return df
+# --- KPI Cards ---
 with ui.layout_columns(fill=False):
     with ui.value_box(theme="primary"):
+        "Avg AI Exposure"
         @render.text
         def avg_exposure():
             df = filtered_df()
             if df.is_empty():
+                return "—"
             val = df[app_input.ai_metric()].mean()
+            return f"{val:.3f}"
+        ui.p(
+            "Weighted average DAIOE score",
+            style="font-size:0.8rem; opacity:0.85; margin:0;",
+        )
     with ui.value_box(theme="secondary"):
+        "Median Employment Change"
         @render.text
         def median_change():
             df = filtered_df()
             if df.is_empty():
+                return "—"
             val = df[app_input.change_horizon()].median()
             return f"{val:+.2f}%"
+        @render.ui
+        def median_change_label():
+            return ui.p(
+                f"Over {HORIZON_LABELS.get(app_input.change_horizon(), '')}",
+                style="font-size:0.8rem; opacity:0.85; margin:0;",
+            )
     with ui.value_box(theme="info"):
+        "Observations"
         @render.text
         def obs_count():
             return f"{len(filtered_df()):,}"
+        ui.p(
+            "Data points after filtering",
+            style="font-size:0.8rem; opacity:0.85; margin:0;",
+        )
+# --- Scatter Plot ---
 with ui.card(full_screen=True):
+    @render.ui
+    def scatter_header():
+        metric_label = metric_choices.get(app_input.ai_metric(), app_input.ai_metric())
+        horizon_label = HORIZON_LABELS.get(app_input.change_horizon(), app_input.change_horizon())
+        return ui.card_header(f"{metric_label} vs. {horizon_label} Employment Change")
     @render_widget
     def scatter_plot():
         df = filtered_df().to_pandas()
+        metric = app_input.ai_metric()
+        horizon = app_input.change_horizon()
+        metric_label = metric_choices.get(metric, metric)
+        horizon_label = HORIZON_LABELS.get(horizon, horizon)
         if df.empty:
+            return px.scatter(title="No data available for the selected filters.")
+        use_trendline = len(df) > MIN_POINTS_FOR_TRENDLINE
         fig = px.scatter(
             df,
+            x=metric,
+            y=horizon,
             color="occupation",
             size="emp_count" if "emp_count" in df.columns else None,
+            hover_data=["month", "year", "sex", "emp_count"],
             labels={
+                metric:       f"AI Exposure Score — {metric_label}",
+                horizon:      f"% Employment Change ({horizon_label})",
+                "occupation": "Occupation",
+                "emp_count":  "Employment",
+                "month":      "Month",
+                "year":       "Year",
+                "sex":        "Sex",
             },
+            color_discrete_sequence=BRAND_COLORS,
             template="plotly_white",
+            opacity=0.72,
+            trendline="ols" if use_trendline else None,
+            trendline_scope="overall" if use_trendline else None,
         )
+        fig.update_layout(
+            legend_title_text="Occupation",
+            font_family="Nunito Sans",
+            title_font_family="Montserrat",
+            plot_bgcolor="#FFFFFF",
+            paper_bgcolor="#FFFFFF",
+            legend={
+                "bgcolor": "rgba(249,247,241,0.9)",
+                "bordercolor": "#E0DDD6",
+                "borderwidth": 1,
+            },
+            margin={"l": 60, "r": 30, "t": 40, "b": 60},
+        )
+        if use_trendline:
+            fig.update_traces(
+                selector={"mode": "lines"},
+                line={"color": "#0C0A3E", "width": 2, "dash": "dot"},
+            )
         return fig
+# --- Data Table ---
 with ui.card(full_screen=True):
+    ui.card_header("Filtered Data")
     @render.data_frame
     def data_table():
+        df = filtered_df()
+        if df.is_empty():
+            return render.DataGrid(df.to_pandas())
+        metric = app_input.ai_metric()
+        horizon = app_input.change_horizon()
+        # Selected metric + horizon come first, then remaining base cols, then other DAIOE wavgs
+        priority = ["year", "month", "sex", "occupation", "emp_count", metric, horizon]
+        rest_daioe = [c for c in TABLE_DAIOE_COLS if c not in priority and c in df.columns]
+        rest_base = [c for c in TABLE_BASE_COLS if c not in priority and c in df.columns]
+        display_cols = [c for c in priority + rest_base + rest_daioe if c in df.columns]
+        return render.DataGrid(df.select(display_cols).to_pandas(), filters=True)

data/scb_months_lvl1.parquet CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b44c3ea17b685243d1d7af5bdbac1a82e1414938fbdfe222d7a3024d3332808e
-size 168138

 version https://git-lfs.github.com/spec/v1
+oid sha256:6e022b2f932438566e431649210e8838551a400f10b3c109610bb3d83dcf7c0a
+size 167890