File size: 6,513 Bytes
966f06e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
import polars as pl


AI_WAVG_COLS = [
    "daioe_genai_wavg",
    "daioe_allapps_wavg",
    "daioe_stratgames_wavg",
    "daioe_videogames_wavg",
    "daioe_imgrec_wavg",
    "daioe_imgcompr_wavg",
    "daioe_imggen_wavg",
    "daioe_readcompr_wavg",
    "daioe_lngmod_wavg",
    "daioe_translat_wavg",
    "daioe_speechrec_wavg",
]

AI_LABELS = {
    "daioe_genai_wavg": "๐Ÿง  Generative AI",
    "daioe_allapps_wavg": "๐Ÿ“š All Applications",
    "daioe_stratgames_wavg": "โ™Ÿ๏ธ Strategy Games",
    "daioe_videogames_wavg": "๐ŸŽฎ Video Games",
    "daioe_imgrec_wavg": "๐Ÿ–ผ๏ธ Image Recognition",
    "daioe_imgcompr_wavg": "๐Ÿงฉ Image Comprehension",
    "daioe_imggen_wavg": "๐ŸŽจ Image Generation",
    "daioe_readcompr_wavg": "๐Ÿ“– Reading Comprehension",
    "daioe_lngmod_wavg": "โœ๏ธ Language Modeling",
    "daioe_translat_wavg": "๐ŸŒ Translation",
    "daioe_speechrec_wavg": "๐ŸŽ™๏ธ Speech Recognition",
}

AI_LEVEL_COLS = [c.replace("_wavg", "_Level_Exposure") for c in AI_WAVG_COLS]
AI_PCTL_COLS = [f"pctl_{c}" for c in AI_WAVG_COLS]

EXPOSURE_LABELS = {1: "Very Low", 2: "Low", 3: "Medium", 4: "High", 5: "Very High"}


def get_occ_summary(lf: pl.LazyFrame, occupation: str, year: int) -> dict | None:
    """
    Aggregate employment and percentage changes for one occupation and year.

    Sums emp_count across sexes per month, then averages across months.
    Returns a dict with keys: employment, pct_1m, pct_3m, pct_6m, year.
    Returns None if no data matches the filters.
    """
    df = (
        lf.filter(
            (pl.col("occupation") == occupation) & (pl.col("year") == year),
        )
        .group_by("month")
        .agg([
            pl.col("emp_count").sum(),
            pl.col("pct_chg_1m").mean(),
            pl.col("pct_chg_3m").mean(),
            pl.col("pct_chg_6m").mean(),
            pl.col("year").first(),
        ])
        .collect()
    )

    if df.is_empty():
        return None

    def _mean_or_none(col: str) -> float | None:
        val = df[col].mean()
        return None if val is None else float(val)

    return {
        "employment": float(df["emp_count"].mean()),
        "pct_1m": _mean_or_none("pct_chg_1m"),
        "pct_3m": _mean_or_none("pct_chg_3m"),
        "pct_6m": _mean_or_none("pct_chg_6m"),
        "year": int(df["year"][0]),
    }


def get_occ_ai_exposure(
    lf: pl.LazyFrame, occupation: str, year: int,
) -> pl.DataFrame:
    """
    Return mean weighted AI exposure scores, exposure levels, and percentile ranks per sub-domain.

    Returns a long-format DataFrame with columns: domain, score, level, level_label, percentile.
    Used to power the ranked horizontal bar chart.
    """
    select_cols = AI_WAVG_COLS + AI_LEVEL_COLS + AI_PCTL_COLS
    df = (
        lf.filter(
            (pl.col("occupation") == occupation) & (pl.col("year") == year),
        )
        .select(select_cols)
        .collect()
    )

    rows = []
    for wavg_col, level_col, pctl_col in zip(AI_WAVG_COLS, AI_LEVEL_COLS, AI_PCTL_COLS, strict=False):
        raw_level = df[level_col].mean()
        level_val = round(raw_level) if raw_level is not None else None
        rows.append({
            "domain": AI_LABELS[wavg_col],
            "score": df[wavg_col].mean(),
            "level": level_val,
            "level_label": EXPOSURE_LABELS.get(level_val, "Unknown") if level_val else "Unknown",
            "percentile": df[pctl_col].mean(),
        })
    return pl.DataFrame(rows).sort("score")


def get_occ_employment_by_sex(
    lf: pl.LazyFrame,
    occupation: str,
    year_range: tuple[int, int],
    sexes: list[str],
) -> pl.DataFrame:
    """
    Return monthly employment counts per sex for a given occupation and year range.

    Returns a DataFrame with columns: year, month, sex, emp_count, pct_chg_1m.
    Used to power the employment trend line chart in the Occupation View.
    """
    year_min, year_max = year_range
    return (
        lf.filter(
            (pl.col("occupation") == occupation)
            & (pl.col("year") >= year_min)
            & (pl.col("year") <= year_max)
            & (pl.col("sex").is_in(sexes)),
        )
        .group_by(["year", "month", "sex"])
        .agg([
            pl.col("emp_count").sum(),
            pl.col("pct_chg_1m").mean(),
        ])
        .sort(["sex", "year", "month"])
        .collect()
    )


def get_comparison_employment(
    lf: pl.LazyFrame,
    occupations: list[str],
    sexes: list[str],
) -> pl.DataFrame:
    """
    Return total employment per year/month/occupation for the comparison view.

    Aggregates across the selected sexes.
    Returns a DataFrame with columns: year, month, occupation, emp_count, pct_chg_1m.
    """
    return (
        lf.filter(
            pl.col("occupation").is_in(occupations)
            & pl.col("sex").is_in(sexes),
        )
        .group_by(["year", "month", "occupation"])
        .agg([
            pl.col("emp_count").sum(),
            pl.col("pct_chg_1m").mean(),
        ])
        .sort(["occupation", "year", "month"])
        .collect()
    )


def get_comp_summary(
    lf: pl.LazyFrame,
    occupations: list[str],
    sexes: list[str],
    year: int,
) -> pl.DataFrame:
    """
    Return a per-occupation employment summary for the selected year.

    Returns a DataFrame with columns: occupation, emp_count, pct_chg_1m, pct_chg_3m, pct_chg_6m.
    Used to populate the summary table in the Comparison View.
    """
    return (
        lf.filter(
            pl.col("occupation").is_in(occupations)
            & pl.col("sex").is_in(sexes)
            & (pl.col("year") == year),
        )
        .group_by("occupation")
        .agg([
            pl.col("emp_count").mean().alias("emp_count"),
            pl.col("pct_chg_1m").mean().alias("pct_chg_1m"),
            pl.col("pct_chg_3m").mean().alias("pct_chg_3m"),
            pl.col("pct_chg_6m").mean().alias("pct_chg_6m"),
        ])
        .sort("occupation")
        .collect()
    )


def get_comp_radar(
    lf: pl.LazyFrame,
    occupations: list[str],
    year: int,
) -> pl.DataFrame:
    """
    Return mean AI percentile scores per occupation for the radar chart.

    Returns a DataFrame with columns: occupation, pctl_<metric>_wavg for each metric.
    """
    return (
        lf.filter(
            pl.col("occupation").is_in(occupations)
            & (pl.col("year") == year),
        )
        .group_by("occupation")
        .agg([pl.col(c).mean() for c in AI_PCTL_COLS])
        .collect()
    )