yujinahn commited on
Commit
b7bc560
·
verified ·
1 Parent(s): c8f1473

Upload datacreation.ipynb

Browse files
Files changed (1) hide show
  1. datacreation.ipynb +1615 -0
datacreation.ipynb ADDED
@@ -0,0 +1,1615 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": []
7
+ },
8
+ "kernelspec": {
9
+ "name": "python3",
10
+ "display_name": "Python 3"
11
+ },
12
+ "language_info": {
13
+ "name": "python"
14
+ }
15
+ },
16
+ "cells": [
17
+ {
18
+ "cell_type": "code",
19
+ "execution_count": 24,
20
+ "metadata": {
21
+ "colab": {
22
+ "base_uri": "https://localhost:8080/"
23
+ },
24
+ "id": "XQZJSSopDet9",
25
+ "outputId": "d7fe7f2e-b030-4b8d-e574-0a6e896e824d"
26
+ },
27
+ "outputs": [
28
+ {
29
+ "output_type": "stream",
30
+ "name": "stdout",
31
+ "text": [
32
+ "Requirement already satisfied: pandas in /usr/local/lib/python3.12/dist-packages (2.2.2)\n",
33
+ "Requirement already satisfied: numpy in /usr/local/lib/python3.12/dist-packages (2.0.2)\n",
34
+ "Requirement already satisfied: matplotlib in /usr/local/lib/python3.12/dist-packages (3.10.0)\n",
35
+ "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.12/dist-packages (1.6.1)\n",
36
+ "Requirement already satisfied: statsmodels in /usr/local/lib/python3.12/dist-packages (0.14.6)\n",
37
+ "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.12/dist-packages (from pandas) (2.9.0.post0)\n",
38
+ "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.12/dist-packages (from pandas) (2025.2)\n",
39
+ "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.12/dist-packages (from pandas) (2026.1)\n",
40
+ "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.12/dist-packages (from matplotlib) (1.3.3)\n",
41
+ "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.12/dist-packages (from matplotlib) (0.12.1)\n",
42
+ "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.12/dist-packages (from matplotlib) (4.62.1)\n",
43
+ "Requirement already satisfied: kiwisolver>=1.3.1 in /usr/local/lib/python3.12/dist-packages (from matplotlib) (1.5.0)\n",
44
+ "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.12/dist-packages (from matplotlib) (26.1)\n",
45
+ "Requirement already satisfied: pillow>=8 in /usr/local/lib/python3.12/dist-packages (from matplotlib) (11.3.0)\n",
46
+ "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.12/dist-packages (from matplotlib) (3.3.2)\n",
47
+ "Requirement already satisfied: scipy>=1.6.0 in /usr/local/lib/python3.12/dist-packages (from scikit-learn) (1.16.3)\n",
48
+ "Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.12/dist-packages (from scikit-learn) (1.5.3)\n",
49
+ "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.12/dist-packages (from scikit-learn) (3.6.0)\n",
50
+ "Requirement already satisfied: patsy>=0.5.6 in /usr/local/lib/python3.12/dist-packages (from statsmodels) (1.0.2)\n",
51
+ "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.12/dist-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)\n"
52
+ ]
53
+ }
54
+ ],
55
+ "source": [
56
+ "!pip install pandas numpy matplotlib scikit-learn statsmodels"
57
+ ]
58
+ },
59
+ {
60
+ "cell_type": "code",
61
+ "source": [
62
+ "import random\n",
63
+ "import numpy as np\n",
64
+ "import pandas as pd\n",
65
+ "\n",
66
+ "random.seed(42)\n",
67
+ "np.random.seed(42)"
68
+ ],
69
+ "metadata": {
70
+ "id": "yzXlo3jYDi0f"
71
+ },
72
+ "execution_count": 25,
73
+ "outputs": []
74
+ },
75
+ {
76
+ "cell_type": "code",
77
+ "source": [
78
+ "print(\"=\" * 50)\n",
79
+ "print(\"STEP 1/2: Data Creation (real data + synthetic enrichment)\")\n",
80
+ "print(\"=\" * 50)"
81
+ ],
82
+ "metadata": {
83
+ "colab": {
84
+ "base_uri": "https://localhost:8080/"
85
+ },
86
+ "id": "EL_CQaWsDkGw",
87
+ "outputId": "105e7fb3-54f6-4717-f474-86c61bdaf485"
88
+ },
89
+ "execution_count": 26,
90
+ "outputs": [
91
+ {
92
+ "output_type": "stream",
93
+ "name": "stdout",
94
+ "text": [
95
+ "==================================================\n",
96
+ "STEP 1/2: Data Creation (real data + synthetic enrichment)\n",
97
+ "==================================================\n"
98
+ ]
99
+ }
100
+ ]
101
+ },
102
+ {
103
+ "cell_type": "code",
104
+ "source": [
105
+ "# ====================================================\n",
106
+ "# GENERATE SYNTHETIC EU JOBS DATA\n",
107
+ "# ====================================================\n",
108
+ "import random\n",
109
+ "from datetime import datetime, timedelta\n",
110
+ "\n",
111
+ "random.seed(42)\n",
112
+ "\n",
113
+ "DOMAINS = [\n",
114
+ " \"Budget and Finances\", \"European Policy\", \"Crisis management and Internal security\",\n",
115
+ " \"Law\", \"Defence\", \"Human Resources\", \"Information Technology\",\n",
116
+ " \"Justice and human rights\", \"Economics, Finance and Statistics\",\n",
117
+ " \"Transport\", \"Agriculture\", \"Environment, Climate change\", \"Public Health\",\n",
118
+ " \"Education and Culture\", \"External Relations\", \"Trade\", \"Audit\",\n",
119
+ " \"Communication\", \"Research and Innovation\", \"Statistics\", \"Energy\",\n",
120
+ " \"Migration\", \"Digital Affairs\", \"Tax and Customs\",\n",
121
+ "]\n",
122
+ "TITLES = [\n",
123
+ " \"Project Assistant\", \"Policy Officer\", \"Legal Officer\", \"Finance Agent\",\n",
124
+ " \"Communications Officer\", \"Cybersecurity Expert\", \"IT Officer\",\n",
125
+ " \"Senior Advisor\", \"Head of Unit\", \"Junior Analyst\", \"Senior Analyst\",\n",
126
+ " \"Programme Manager\", \"Auditor\", \"HR Officer\", \"Translator\",\n",
127
+ " \"Data Analyst\", \"Compliance Officer\", \"Procurement Officer\",\n",
128
+ " \"Research Officer\", \"Liaison Officer\", \"Statistician\", \"Economist\",\n",
129
+ " \"Risk Manager\", \"Operations Officer\", \"Coordinator\",\n",
130
+ "]\n",
131
+ "GRADES = [\"FG II\", \"FG III\", \"FG IV\", \"FG III, FG IV\", \"AD 5\", \"AD 6\", \"AD 7\", \"AD 8\", \"AD 10\", \"AD 12\", \"AD 14\"]\n",
132
+ "GRADE_WEIGHTS = [3, 11, 17, 9, 4, 7, 5, 2, 2, 2, 3]\n",
133
+ "CONTRACTS = [\"Contract staff\", \"Temporary staff\", \"Seconded national expert (SNE)\", \"Permanent official\"]\n",
134
+ "CONTRACT_WEIGHTS = [46, 28, 24, 5]\n",
135
+ "INSTITUTIONS = [\n",
136
+ " \"EU institutions\", \"(EDA) European Defence Agency\", \"(EUSPA) European Union Agency for the Space Programme\",\n",
137
+ " \"(FRONTEX) European Border and Coast Guard Agency\", \"(FRA) European Union Agency for Fundamental Rights\",\n",
138
+ " \"(EASA) European Union Aviation Safety Agency\", \"(Europol) European Union Agency for Law Enforcement Cooperation\",\n",
139
+ " \"(EFSA) European Food Safety Authority\", \"(ECB) European Central Bank\", \"(EMA) European Medicines Agency\",\n",
140
+ " \"(EUAA) European Union Agency for Asylum\", \"(EIB) European Investment Bank\", \"(EEA) European Environment Agency\",\n",
141
+ "]\n",
142
+ "LOCATIONS = [\n",
143
+ " \"Brussels (Belgium)\", \"Luxembourg (Luxembourg)\", \"The Hague (The Netherlands)\",\n",
144
+ " \"Warsaw (Poland)\", \"Frankfurt (Germany)\", \"Cologne (Germany)\", \"Parma (Italy)\",\n",
145
+ " \"Lisbon (Portugal)\", \"Paris (France)\", \"Vienna (Austria)\", \"Helsinki (Finland)\",\n",
146
+ " \"Madrid (Spain)\", \"Dublin (Ireland)\", \"Stockholm (Sweden)\",\n",
147
+ "]\n",
148
+ "\n",
149
+ "synthetic_rows = []\n",
150
+ "for i in range(200):\n",
151
+ " base = datetime(2026, 5, 1)\n",
152
+ " deadline = base + timedelta(days=random.randint(1, 180))\n",
153
+ " deadline_str = f\"{deadline.strftime('%d/%m/%Y')} - {random.choice(['13:00', '17:00', '23:59'])}\"\n",
154
+ " title = random.choice(TITLES)\n",
155
+ " inst = random.choice(INSTITUTIONS)\n",
156
+ " inst_short = inst.split(\")\")[0].replace(\"(\", \"\").lower() if \"(\" in inst else \"eu\"\n",
157
+ "\n",
158
+ " synthetic_rows.append({\n",
159
+ " \"ID\": 19800 + i,\n",
160
+ " \"title\": title,\n",
161
+ " \"Domain(s)\": random.choice(DOMAINS),\n",
162
+ " \"Grade\": random.choices(GRADES, weights=GRADE_WEIGHTS, k=1)[0],\n",
163
+ " \"Type of contract\": random.choices(CONTRACTS, weights=CONTRACT_WEIGHTS, k=1)[0],\n",
164
+ " \"Institution(s) \": inst,\n",
165
+ " \"Location(s)\": random.choice(LOCATIONS),\n",
166
+ " \"Deadline \": deadline_str,\n",
167
+ " \"Link to Content\": f\"https://eu-careers.europa.eu/en/job-opportunities/{title.lower().replace(' ', '-')}/{inst_short}-{19800+i}\",\n",
168
+ " })\n",
169
+ "\n",
170
+ "synthetic_df = pd.DataFrame(synthetic_rows)\n",
171
+ "print(f\"Generated {len(synthetic_df)} synthetic job postings\")\n",
172
+ "synthetic_df.head()"
173
+ ],
174
+ "metadata": {
175
+ "colab": {
176
+ "base_uri": "https://localhost:8080/",
177
+ "height": 435
178
+ },
179
+ "id": "BQf2iEmtum_U",
180
+ "outputId": "f01811d0-c4f5-435c-f86f-85c4f1c84bab"
181
+ },
182
+ "execution_count": 27,
183
+ "outputs": [
184
+ {
185
+ "output_type": "stream",
186
+ "name": "stdout",
187
+ "text": [
188
+ "Generated 200 synthetic job postings\n"
189
+ ]
190
+ },
191
+ {
192
+ "output_type": "execute_result",
193
+ "data": {
194
+ "text/plain": [
195
+ " ID title Domain(s) Grade \\\n",
196
+ "0 19800 Project Assistant Economics, Finance and Statistics FG IV \n",
197
+ "1 19801 Procurement Officer Research and Innovation FG IV \n",
198
+ "2 19802 Liaison Officer Communication FG III \n",
199
+ "3 19803 Translator Economics, Finance and Statistics AD 7 \n",
200
+ "4 19804 HR Officer Economics, Finance and Statistics FG III \n",
201
+ "\n",
202
+ " Type of contract Institution(s) \\\n",
203
+ "0 Contract staff (EIB) European Investment Bank \n",
204
+ "1 Contract staff (EDA) European Defence Agency \n",
205
+ "2 Temporary staff EU institutions \n",
206
+ "3 Contract staff (EMA) European Medicines Agency \n",
207
+ "4 Permanent official (EASA) European Union Aviation Safety Agency \n",
208
+ "\n",
209
+ " Location(s) Deadline \\\n",
210
+ "0 Luxembourg (Luxembourg) 12/10/2026 - 13:00 \n",
211
+ "1 Warsaw (Poland) 22/10/2026 - 23:59 \n",
212
+ "2 Paris (France) 30/06/2026 - 23:59 \n",
213
+ "3 Dublin (Ireland) 17/08/2026 - 13:00 \n",
214
+ "4 Cologne (Germany) 11/06/2026 - 23:59 \n",
215
+ "\n",
216
+ " Link to Content \n",
217
+ "0 https://eu-careers.europa.eu/en/job-opportunit... \n",
218
+ "1 https://eu-careers.europa.eu/en/job-opportunit... \n",
219
+ "2 https://eu-careers.europa.eu/en/job-opportunit... \n",
220
+ "3 https://eu-careers.europa.eu/en/job-opportunit... \n",
221
+ "4 https://eu-careers.europa.eu/en/job-opportunit... "
222
+ ],
223
+ "text/html": [
224
+ "\n",
225
+ " <div id=\"df-d8fbda0f-8ab7-47b8-839b-6774c3523b85\" class=\"colab-df-container\">\n",
226
+ " <div>\n",
227
+ "<style scoped>\n",
228
+ " .dataframe tbody tr th:only-of-type {\n",
229
+ " vertical-align: middle;\n",
230
+ " }\n",
231
+ "\n",
232
+ " .dataframe tbody tr th {\n",
233
+ " vertical-align: top;\n",
234
+ " }\n",
235
+ "\n",
236
+ " .dataframe thead th {\n",
237
+ " text-align: right;\n",
238
+ " }\n",
239
+ "</style>\n",
240
+ "<table border=\"1\" class=\"dataframe\">\n",
241
+ " <thead>\n",
242
+ " <tr style=\"text-align: right;\">\n",
243
+ " <th></th>\n",
244
+ " <th>ID</th>\n",
245
+ " <th>title</th>\n",
246
+ " <th>Domain(s)</th>\n",
247
+ " <th>Grade</th>\n",
248
+ " <th>Type of contract</th>\n",
249
+ " <th>Institution(s)</th>\n",
250
+ " <th>Location(s)</th>\n",
251
+ " <th>Deadline</th>\n",
252
+ " <th>Link to Content</th>\n",
253
+ " </tr>\n",
254
+ " </thead>\n",
255
+ " <tbody>\n",
256
+ " <tr>\n",
257
+ " <th>0</th>\n",
258
+ " <td>19800</td>\n",
259
+ " <td>Project Assistant</td>\n",
260
+ " <td>Economics, Finance and Statistics</td>\n",
261
+ " <td>FG IV</td>\n",
262
+ " <td>Contract staff</td>\n",
263
+ " <td>(EIB) European Investment Bank</td>\n",
264
+ " <td>Luxembourg (Luxembourg)</td>\n",
265
+ " <td>12/10/2026 - 13:00</td>\n",
266
+ " <td>https://eu-careers.europa.eu/en/job-opportunit...</td>\n",
267
+ " </tr>\n",
268
+ " <tr>\n",
269
+ " <th>1</th>\n",
270
+ " <td>19801</td>\n",
271
+ " <td>Procurement Officer</td>\n",
272
+ " <td>Research and Innovation</td>\n",
273
+ " <td>FG IV</td>\n",
274
+ " <td>Contract staff</td>\n",
275
+ " <td>(EDA) European Defence Agency</td>\n",
276
+ " <td>Warsaw (Poland)</td>\n",
277
+ " <td>22/10/2026 - 23:59</td>\n",
278
+ " <td>https://eu-careers.europa.eu/en/job-opportunit...</td>\n",
279
+ " </tr>\n",
280
+ " <tr>\n",
281
+ " <th>2</th>\n",
282
+ " <td>19802</td>\n",
283
+ " <td>Liaison Officer</td>\n",
284
+ " <td>Communication</td>\n",
285
+ " <td>FG III</td>\n",
286
+ " <td>Temporary staff</td>\n",
287
+ " <td>EU institutions</td>\n",
288
+ " <td>Paris (France)</td>\n",
289
+ " <td>30/06/2026 - 23:59</td>\n",
290
+ " <td>https://eu-careers.europa.eu/en/job-opportunit...</td>\n",
291
+ " </tr>\n",
292
+ " <tr>\n",
293
+ " <th>3</th>\n",
294
+ " <td>19803</td>\n",
295
+ " <td>Translator</td>\n",
296
+ " <td>Economics, Finance and Statistics</td>\n",
297
+ " <td>AD 7</td>\n",
298
+ " <td>Contract staff</td>\n",
299
+ " <td>(EMA) European Medicines Agency</td>\n",
300
+ " <td>Dublin (Ireland)</td>\n",
301
+ " <td>17/08/2026 - 13:00</td>\n",
302
+ " <td>https://eu-careers.europa.eu/en/job-opportunit...</td>\n",
303
+ " </tr>\n",
304
+ " <tr>\n",
305
+ " <th>4</th>\n",
306
+ " <td>19804</td>\n",
307
+ " <td>HR Officer</td>\n",
308
+ " <td>Economics, Finance and Statistics</td>\n",
309
+ " <td>FG III</td>\n",
310
+ " <td>Permanent official</td>\n",
311
+ " <td>(EASA) European Union Aviation Safety Agency</td>\n",
312
+ " <td>Cologne (Germany)</td>\n",
313
+ " <td>11/06/2026 - 23:59</td>\n",
314
+ " <td>https://eu-careers.europa.eu/en/job-opportunit...</td>\n",
315
+ " </tr>\n",
316
+ " </tbody>\n",
317
+ "</table>\n",
318
+ "</div>\n",
319
+ " <div class=\"colab-df-buttons\">\n",
320
+ "\n",
321
+ " <div class=\"colab-df-container\">\n",
322
+ " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-d8fbda0f-8ab7-47b8-839b-6774c3523b85')\"\n",
323
+ " title=\"Convert this dataframe to an interactive table.\"\n",
324
+ " style=\"display:none;\">\n",
325
+ "\n",
326
+ " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
327
+ " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
328
+ " </svg>\n",
329
+ " </button>\n",
330
+ "\n",
331
+ " <style>\n",
332
+ " .colab-df-container {\n",
333
+ " display:flex;\n",
334
+ " gap: 12px;\n",
335
+ " }\n",
336
+ "\n",
337
+ " .colab-df-convert {\n",
338
+ " background-color: #E8F0FE;\n",
339
+ " border: none;\n",
340
+ " border-radius: 50%;\n",
341
+ " cursor: pointer;\n",
342
+ " display: none;\n",
343
+ " fill: #1967D2;\n",
344
+ " height: 32px;\n",
345
+ " padding: 0 0 0 0;\n",
346
+ " width: 32px;\n",
347
+ " }\n",
348
+ "\n",
349
+ " .colab-df-convert:hover {\n",
350
+ " background-color: #E2EBFA;\n",
351
+ " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
352
+ " fill: #174EA6;\n",
353
+ " }\n",
354
+ "\n",
355
+ " .colab-df-buttons div {\n",
356
+ " margin-bottom: 4px;\n",
357
+ " }\n",
358
+ "\n",
359
+ " [theme=dark] .colab-df-convert {\n",
360
+ " background-color: #3B4455;\n",
361
+ " fill: #D2E3FC;\n",
362
+ " }\n",
363
+ "\n",
364
+ " [theme=dark] .colab-df-convert:hover {\n",
365
+ " background-color: #434B5C;\n",
366
+ " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
367
+ " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
368
+ " fill: #FFFFFF;\n",
369
+ " }\n",
370
+ " </style>\n",
371
+ "\n",
372
+ " <script>\n",
373
+ " const buttonEl =\n",
374
+ " document.querySelector('#df-d8fbda0f-8ab7-47b8-839b-6774c3523b85 button.colab-df-convert');\n",
375
+ " buttonEl.style.display =\n",
376
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
377
+ "\n",
378
+ " async function convertToInteractive(key) {\n",
379
+ " const element = document.querySelector('#df-d8fbda0f-8ab7-47b8-839b-6774c3523b85');\n",
380
+ " const dataTable =\n",
381
+ " await google.colab.kernel.invokeFunction('convertToInteractive',\n",
382
+ " [key], {});\n",
383
+ " if (!dataTable) return;\n",
384
+ "\n",
385
+ " const docLinkHtml = 'Like what you see? Visit the ' +\n",
386
+ " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
387
+ " + ' to learn more about interactive tables.';\n",
388
+ " element.innerHTML = '';\n",
389
+ " dataTable['output_type'] = 'display_data';\n",
390
+ " await google.colab.output.renderOutput(dataTable, element);\n",
391
+ " const docLink = document.createElement('div');\n",
392
+ " docLink.innerHTML = docLinkHtml;\n",
393
+ " element.appendChild(docLink);\n",
394
+ " }\n",
395
+ " </script>\n",
396
+ " </div>\n",
397
+ "\n",
398
+ "\n",
399
+ " </div>\n",
400
+ " </div>\n"
401
+ ],
402
+ "application/vnd.google.colaboratory.intrinsic+json": {
403
+ "type": "dataframe",
404
+ "variable_name": "synthetic_df",
405
+ "summary": "{\n \"name\": \"synthetic_df\",\n \"rows\": 200,\n \"fields\": [\n {\n \"column\": \"ID\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 57,\n \"min\": 19800,\n \"max\": 19999,\n \"num_unique_values\": 200,\n \"samples\": [\n 19895,\n 19815,\n 19830\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"title\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 25,\n \"samples\": [\n \"Statistician\",\n \"IT Officer\",\n \"Project Assistant\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Domain(s)\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 24,\n \"samples\": [\n \"Public Health\",\n \"Education and Culture\",\n \"Economics, Finance and Statistics\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Grade\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 11,\n \"samples\": [\n \"FG III, FG IV\",\n \"FG IV\",\n \"FG II\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Type of contract\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"Temporary staff\",\n \"Seconded national expert (SNE)\",\n \"Contract staff\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Institution(s) \",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 13,\n \"samples\": [\n \"(FRA) European Union Agency for Fundamental Rights\",\n \"(EFSA) European Food Safety Authority\",\n \"(EIB) European Investment Bank\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Location(s)\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 14,\n \"samples\": [\n \"The Hague (The Netherlands)\",\n \"Brussels (Belgium)\",\n \"Luxembourg (Luxembourg)\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Deadline \",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 169,\n \"samples\": [\n \"31/07/2026 - 13:00\",\n \"25/09/2026 - 23:59\",\n \"15/07/2026 - 23:59\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Link to Content\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 200,\n \"samples\": [\n \"https://eu-careers.europa.eu/en/job-opportunities/policy-officer/easa-19895\",\n \"https://eu-careers.europa.eu/en/job-opportunities/programme-manager/frontex-19815\",\n \"https://eu-careers.europa.eu/en/job-opportunities/data-analyst/frontex-19830\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
406
+ }
407
+ },
408
+ "metadata": {},
409
+ "execution_count": 27
410
+ }
411
+ ]
412
+ },
413
+ {
414
+ "cell_type": "code",
415
+ "source": [
416
+ "# Load the real data\n",
417
+ "real_df = pd.read_csv(\"CSV-JOBS.csv\")\n",
418
+ "print(f\"Real jobs loaded: {len(real_df)}\")\n",
419
+ "\n",
420
+ "# Combine real + synthetic into one dataset\n",
421
+ "jobs_df = pd.concat([real_df, synthetic_df], ignore_index=True)\n",
422
+ "\n",
423
+ "print(f\"Total combined dataset: {len(jobs_df)} rows\")\n",
424
+ "print(\"Columns:\", jobs_df.columns.tolist())\n",
425
+ "\n",
426
+ "display(jobs_df.head())"
427
+ ],
428
+ "metadata": {
429
+ "colab": {
430
+ "base_uri": "https://localhost:8080/",
431
+ "height": 505
432
+ },
433
+ "id": "jyIW1KljDlXP",
434
+ "outputId": "394d684c-df7c-45d8-bb3b-83d5b83f0c0e"
435
+ },
436
+ "execution_count": 28,
437
+ "outputs": [
438
+ {
439
+ "output_type": "stream",
440
+ "name": "stdout",
441
+ "text": [
442
+ "Real jobs loaded: 103\n",
443
+ "Total combined dataset: 303 rows\n",
444
+ "Columns: ['ID', 'title', 'Domain(s)', 'Grade', 'Type of contract', 'Institution(s) ', 'Location(s)', 'Deadline ', 'Link to Content']\n"
445
+ ]
446
+ },
447
+ {
448
+ "output_type": "display_data",
449
+ "data": {
450
+ "text/plain": [
451
+ " ID title \\\n",
452
+ "0 19702 Project Assistant \n",
453
+ "1 19703 Policy Officer \n",
454
+ "2 19664 Senior Military Advisor to the Executive Director \n",
455
+ "3 19665 Structures Expert \n",
456
+ "4 19666 Certification Expert - Hydromechanical and Fli... \n",
457
+ "\n",
458
+ " Domain(s) Grade Type of contract \\\n",
459
+ "0 Justice and human rights FG III Contract staff \n",
460
+ "1 Justice and human rights FG IV Contract staff \n",
461
+ "2 Transport AD 10 Temporary staff \n",
462
+ "3 Transport AD 7 Temporary staff \n",
463
+ "4 Transport AD 7 Temporary staff \n",
464
+ "\n",
465
+ " Institution(s) Location(s) \\\n",
466
+ "0 (FRA) European Union Agency for Fundamental Ri... Vienna (Austria) \n",
467
+ "1 (FRA) European Union Agency for Fundamental Ri... Vienna (Austria) \n",
468
+ "2 (EASA) European Union Aviation Safety Agency Cologne (Germany) \n",
469
+ "3 (EASA) European Union Aviation Safety Agency Cologne (Germany) \n",
470
+ "4 (EASA) European Union Aviation Safety Agency Cologne (Germany) \n",
471
+ "\n",
472
+ " Deadline Link to Content \n",
473
+ "0 30/04/2026 - 13:00 https://eu-careers.europa.eu/en/job-opportunit... \n",
474
+ "1 30/04/2026 - 13:00 https://eu-careers.europa.eu/en/job-opportunit... \n",
475
+ "2 30/04/2026 - 23:59 https://eu-careers.europa.eu/en/job-opportunit... \n",
476
+ "3 30/04/2026 - 23:59 https://eu-careers.europa.eu/en/job-opportunit... \n",
477
+ "4 30/04/2026 - 23:59 https://eu-careers.europa.eu/en/job-opportunit... "
478
+ ],
479
+ "text/html": [
480
+ "\n",
481
+ " <div id=\"df-2d5843f3-5a2e-43a9-9098-8c9ee1215ecc\" class=\"colab-df-container\">\n",
482
+ " <div>\n",
483
+ "<style scoped>\n",
484
+ " .dataframe tbody tr th:only-of-type {\n",
485
+ " vertical-align: middle;\n",
486
+ " }\n",
487
+ "\n",
488
+ " .dataframe tbody tr th {\n",
489
+ " vertical-align: top;\n",
490
+ " }\n",
491
+ "\n",
492
+ " .dataframe thead th {\n",
493
+ " text-align: right;\n",
494
+ " }\n",
495
+ "</style>\n",
496
+ "<table border=\"1\" class=\"dataframe\">\n",
497
+ " <thead>\n",
498
+ " <tr style=\"text-align: right;\">\n",
499
+ " <th></th>\n",
500
+ " <th>ID</th>\n",
501
+ " <th>title</th>\n",
502
+ " <th>Domain(s)</th>\n",
503
+ " <th>Grade</th>\n",
504
+ " <th>Type of contract</th>\n",
505
+ " <th>Institution(s)</th>\n",
506
+ " <th>Location(s)</th>\n",
507
+ " <th>Deadline</th>\n",
508
+ " <th>Link to Content</th>\n",
509
+ " </tr>\n",
510
+ " </thead>\n",
511
+ " <tbody>\n",
512
+ " <tr>\n",
513
+ " <th>0</th>\n",
514
+ " <td>19702</td>\n",
515
+ " <td>Project Assistant</td>\n",
516
+ " <td>Justice and human rights</td>\n",
517
+ " <td>FG III</td>\n",
518
+ " <td>Contract staff</td>\n",
519
+ " <td>(FRA) European Union Agency for Fundamental Ri...</td>\n",
520
+ " <td>Vienna (Austria)</td>\n",
521
+ " <td>30/04/2026 - 13:00</td>\n",
522
+ " <td>https://eu-careers.europa.eu/en/job-opportunit...</td>\n",
523
+ " </tr>\n",
524
+ " <tr>\n",
525
+ " <th>1</th>\n",
526
+ " <td>19703</td>\n",
527
+ " <td>Policy Officer</td>\n",
528
+ " <td>Justice and human rights</td>\n",
529
+ " <td>FG IV</td>\n",
530
+ " <td>Contract staff</td>\n",
531
+ " <td>(FRA) European Union Agency for Fundamental Ri...</td>\n",
532
+ " <td>Vienna (Austria)</td>\n",
533
+ " <td>30/04/2026 - 13:00</td>\n",
534
+ " <td>https://eu-careers.europa.eu/en/job-opportunit...</td>\n",
535
+ " </tr>\n",
536
+ " <tr>\n",
537
+ " <th>2</th>\n",
538
+ " <td>19664</td>\n",
539
+ " <td>Senior Military Advisor to the Executive Director</td>\n",
540
+ " <td>Transport</td>\n",
541
+ " <td>AD 10</td>\n",
542
+ " <td>Temporary staff</td>\n",
543
+ " <td>(EASA) European Union Aviation Safety Agency</td>\n",
544
+ " <td>Cologne (Germany)</td>\n",
545
+ " <td>30/04/2026 - 23:59</td>\n",
546
+ " <td>https://eu-careers.europa.eu/en/job-opportunit...</td>\n",
547
+ " </tr>\n",
548
+ " <tr>\n",
549
+ " <th>3</th>\n",
550
+ " <td>19665</td>\n",
551
+ " <td>Structures Expert</td>\n",
552
+ " <td>Transport</td>\n",
553
+ " <td>AD 7</td>\n",
554
+ " <td>Temporary staff</td>\n",
555
+ " <td>(EASA) European Union Aviation Safety Agency</td>\n",
556
+ " <td>Cologne (Germany)</td>\n",
557
+ " <td>30/04/2026 - 23:59</td>\n",
558
+ " <td>https://eu-careers.europa.eu/en/job-opportunit...</td>\n",
559
+ " </tr>\n",
560
+ " <tr>\n",
561
+ " <th>4</th>\n",
562
+ " <td>19666</td>\n",
563
+ " <td>Certification Expert - Hydromechanical and Fli...</td>\n",
564
+ " <td>Transport</td>\n",
565
+ " <td>AD 7</td>\n",
566
+ " <td>Temporary staff</td>\n",
567
+ " <td>(EASA) European Union Aviation Safety Agency</td>\n",
568
+ " <td>Cologne (Germany)</td>\n",
569
+ " <td>30/04/2026 - 23:59</td>\n",
570
+ " <td>https://eu-careers.europa.eu/en/job-opportunit...</td>\n",
571
+ " </tr>\n",
572
+ " </tbody>\n",
573
+ "</table>\n",
574
+ "</div>\n",
575
+ " <div class=\"colab-df-buttons\">\n",
576
+ "\n",
577
+ " <div class=\"colab-df-container\">\n",
578
+ " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-2d5843f3-5a2e-43a9-9098-8c9ee1215ecc')\"\n",
579
+ " title=\"Convert this dataframe to an interactive table.\"\n",
580
+ " style=\"display:none;\">\n",
581
+ "\n",
582
+ " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
583
+ " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
584
+ " </svg>\n",
585
+ " </button>\n",
586
+ "\n",
587
+ " <style>\n",
588
+ " .colab-df-container {\n",
589
+ " display:flex;\n",
590
+ " gap: 12px;\n",
591
+ " }\n",
592
+ "\n",
593
+ " .colab-df-convert {\n",
594
+ " background-color: #E8F0FE;\n",
595
+ " border: none;\n",
596
+ " border-radius: 50%;\n",
597
+ " cursor: pointer;\n",
598
+ " display: none;\n",
599
+ " fill: #1967D2;\n",
600
+ " height: 32px;\n",
601
+ " padding: 0 0 0 0;\n",
602
+ " width: 32px;\n",
603
+ " }\n",
604
+ "\n",
605
+ " .colab-df-convert:hover {\n",
606
+ " background-color: #E2EBFA;\n",
607
+ " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
608
+ " fill: #174EA6;\n",
609
+ " }\n",
610
+ "\n",
611
+ " .colab-df-buttons div {\n",
612
+ " margin-bottom: 4px;\n",
613
+ " }\n",
614
+ "\n",
615
+ " [theme=dark] .colab-df-convert {\n",
616
+ " background-color: #3B4455;\n",
617
+ " fill: #D2E3FC;\n",
618
+ " }\n",
619
+ "\n",
620
+ " [theme=dark] .colab-df-convert:hover {\n",
621
+ " background-color: #434B5C;\n",
622
+ " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
623
+ " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
624
+ " fill: #FFFFFF;\n",
625
+ " }\n",
626
+ " </style>\n",
627
+ "\n",
628
+ " <script>\n",
629
+ " const buttonEl =\n",
630
+ " document.querySelector('#df-2d5843f3-5a2e-43a9-9098-8c9ee1215ecc button.colab-df-convert');\n",
631
+ " buttonEl.style.display =\n",
632
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
633
+ "\n",
634
+ " async function convertToInteractive(key) {\n",
635
+ " const element = document.querySelector('#df-2d5843f3-5a2e-43a9-9098-8c9ee1215ecc');\n",
636
+ " const dataTable =\n",
637
+ " await google.colab.kernel.invokeFunction('convertToInteractive',\n",
638
+ " [key], {});\n",
639
+ " if (!dataTable) return;\n",
640
+ "\n",
641
+ " const docLinkHtml = 'Like what you see? Visit the ' +\n",
642
+ " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
643
+ " + ' to learn more about interactive tables.';\n",
644
+ " element.innerHTML = '';\n",
645
+ " dataTable['output_type'] = 'display_data';\n",
646
+ " await google.colab.output.renderOutput(dataTable, element);\n",
647
+ " const docLink = document.createElement('div');\n",
648
+ " docLink.innerHTML = docLinkHtml;\n",
649
+ " element.appendChild(docLink);\n",
650
+ " }\n",
651
+ " </script>\n",
652
+ " </div>\n",
653
+ "\n",
654
+ "\n",
655
+ " </div>\n",
656
+ " </div>\n"
657
+ ],
658
+ "application/vnd.google.colaboratory.intrinsic+json": {
659
+ "type": "dataframe",
660
+ "summary": "{\n \"name\": \"display(jobs_df\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"ID\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 20,\n \"min\": 19664,\n \"max\": 19703,\n \"num_unique_values\": 5,\n \"samples\": [\n 19703,\n 19666,\n 19664\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"title\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"Policy Officer\",\n \"Certification Expert - Hydromechanical and Flight Control Systems\",\n \"Senior Military Advisor to the Executive Director\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Domain(s)\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Transport\",\n \"Justice and human rights\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Grade\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"FG IV\",\n \"AD 7\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Type of contract\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Temporary staff\",\n \"Contract staff\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Institution(s) \",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"(EASA) European Union Aviation Safety Agency\",\n \"(FRA) European Union Agency for Fundamental Rights\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Location(s)\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Cologne (Germany)\",\n \"Vienna (Austria)\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Deadline \",\n \"properties\": {\n \"dtype\": \"object\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"30/04/2026 - 23:59\",\n \"30/04/2026 - 13:00\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Link to Content\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"https://eu-careers.europa.eu/en/job-opportunities/policy-officer/fra-ca-polof-fgiv-2026\",\n \"https://eu-careers.europa.eu/en/job-opportunities/certification-expert-hydromechanical-and-flight-control-systems/easa-ad-2026-997\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
661
+ }
662
+ },
663
+ "metadata": {}
664
+ }
665
+ ]
666
+ },
667
+ {
668
+ "cell_type": "code",
669
+ "source": [
670
+ "jobs_clean = jobs_df.copy()\n",
671
+ "\n",
672
+ "jobs_clean.columns = jobs_clean.columns.str.strip()\n",
673
+ "jobs_clean[\"Deadline\"] = pd.to_datetime(jobs_clean[\"Deadline\"], errors=\"coerce\")\n",
674
+ "\n",
675
+ "jobs_clean = jobs_clean.dropna(subset=[\"title\", \"Domain(s)\", \"Type of contract\", \"Deadline\"])\n",
676
+ "\n",
677
+ "print(\"Cleaned shape:\", jobs_clean.shape)\n",
678
+ "display(jobs_clean.head())"
679
+ ],
680
+ "metadata": {
681
+ "colab": {
682
+ "base_uri": "https://localhost:8080/",
683
+ "height": 505
684
+ },
685
+ "id": "UgMfW50wDm1g",
686
+ "outputId": "a288151f-da02-4fdf-d3ae-8ecf1c6be725"
687
+ },
688
+ "execution_count": 29,
689
+ "outputs": [
690
+ {
691
+ "output_type": "stream",
692
+ "name": "stdout",
693
+ "text": [
694
+ "Cleaned shape: (299, 9)\n"
695
+ ]
696
+ },
697
+ {
698
+ "output_type": "stream",
699
+ "name": "stderr",
700
+ "text": [
701
+ "/tmp/ipykernel_12223/2873856445.py:4: UserWarning: Parsing dates in %d/%m/%Y - %H:%M format when dayfirst=False (the default) was specified. Pass `dayfirst=True` or specify a format to silence this warning.\n",
702
+ " jobs_clean[\"Deadline\"] = pd.to_datetime(jobs_clean[\"Deadline\"], errors=\"coerce\")\n"
703
+ ]
704
+ },
705
+ {
706
+ "output_type": "display_data",
707
+ "data": {
708
+ "text/plain": [
709
+ " ID title \\\n",
710
+ "0 19702 Project Assistant \n",
711
+ "1 19703 Policy Officer \n",
712
+ "2 19664 Senior Military Advisor to the Executive Director \n",
713
+ "3 19665 Structures Expert \n",
714
+ "4 19666 Certification Expert - Hydromechanical and Fli... \n",
715
+ "\n",
716
+ " Domain(s) Grade Type of contract \\\n",
717
+ "0 Justice and human rights FG III Contract staff \n",
718
+ "1 Justice and human rights FG IV Contract staff \n",
719
+ "2 Transport AD 10 Temporary staff \n",
720
+ "3 Transport AD 7 Temporary staff \n",
721
+ "4 Transport AD 7 Temporary staff \n",
722
+ "\n",
723
+ " Institution(s) Location(s) \\\n",
724
+ "0 (FRA) European Union Agency for Fundamental Ri... Vienna (Austria) \n",
725
+ "1 (FRA) European Union Agency for Fundamental Ri... Vienna (Austria) \n",
726
+ "2 (EASA) European Union Aviation Safety Agency Cologne (Germany) \n",
727
+ "3 (EASA) European Union Aviation Safety Agency Cologne (Germany) \n",
728
+ "4 (EASA) European Union Aviation Safety Agency Cologne (Germany) \n",
729
+ "\n",
730
+ " Deadline Link to Content \n",
731
+ "0 2026-04-30 13:00:00 https://eu-careers.europa.eu/en/job-opportunit... \n",
732
+ "1 2026-04-30 13:00:00 https://eu-careers.europa.eu/en/job-opportunit... \n",
733
+ "2 2026-04-30 23:59:00 https://eu-careers.europa.eu/en/job-opportunit... \n",
734
+ "3 2026-04-30 23:59:00 https://eu-careers.europa.eu/en/job-opportunit... \n",
735
+ "4 2026-04-30 23:59:00 https://eu-careers.europa.eu/en/job-opportunit... "
736
+ ],
737
+ "text/html": [
738
+ "\n",
739
+ " <div id=\"df-fbd49b0f-f509-4c22-8bc4-f5737d95fee2\" class=\"colab-df-container\">\n",
740
+ " <div>\n",
741
+ "<style scoped>\n",
742
+ " .dataframe tbody tr th:only-of-type {\n",
743
+ " vertical-align: middle;\n",
744
+ " }\n",
745
+ "\n",
746
+ " .dataframe tbody tr th {\n",
747
+ " vertical-align: top;\n",
748
+ " }\n",
749
+ "\n",
750
+ " .dataframe thead th {\n",
751
+ " text-align: right;\n",
752
+ " }\n",
753
+ "</style>\n",
754
+ "<table border=\"1\" class=\"dataframe\">\n",
755
+ " <thead>\n",
756
+ " <tr style=\"text-align: right;\">\n",
757
+ " <th></th>\n",
758
+ " <th>ID</th>\n",
759
+ " <th>title</th>\n",
760
+ " <th>Domain(s)</th>\n",
761
+ " <th>Grade</th>\n",
762
+ " <th>Type of contract</th>\n",
763
+ " <th>Institution(s)</th>\n",
764
+ " <th>Location(s)</th>\n",
765
+ " <th>Deadline</th>\n",
766
+ " <th>Link to Content</th>\n",
767
+ " </tr>\n",
768
+ " </thead>\n",
769
+ " <tbody>\n",
770
+ " <tr>\n",
771
+ " <th>0</th>\n",
772
+ " <td>19702</td>\n",
773
+ " <td>Project Assistant</td>\n",
774
+ " <td>Justice and human rights</td>\n",
775
+ " <td>FG III</td>\n",
776
+ " <td>Contract staff</td>\n",
777
+ " <td>(FRA) European Union Agency for Fundamental Ri...</td>\n",
778
+ " <td>Vienna (Austria)</td>\n",
779
+ " <td>2026-04-30 13:00:00</td>\n",
780
+ " <td>https://eu-careers.europa.eu/en/job-opportunit...</td>\n",
781
+ " </tr>\n",
782
+ " <tr>\n",
783
+ " <th>1</th>\n",
784
+ " <td>19703</td>\n",
785
+ " <td>Policy Officer</td>\n",
786
+ " <td>Justice and human rights</td>\n",
787
+ " <td>FG IV</td>\n",
788
+ " <td>Contract staff</td>\n",
789
+ " <td>(FRA) European Union Agency for Fundamental Ri...</td>\n",
790
+ " <td>Vienna (Austria)</td>\n",
791
+ " <td>2026-04-30 13:00:00</td>\n",
792
+ " <td>https://eu-careers.europa.eu/en/job-opportunit...</td>\n",
793
+ " </tr>\n",
794
+ " <tr>\n",
795
+ " <th>2</th>\n",
796
+ " <td>19664</td>\n",
797
+ " <td>Senior Military Advisor to the Executive Director</td>\n",
798
+ " <td>Transport</td>\n",
799
+ " <td>AD 10</td>\n",
800
+ " <td>Temporary staff</td>\n",
801
+ " <td>(EASA) European Union Aviation Safety Agency</td>\n",
802
+ " <td>Cologne (Germany)</td>\n",
803
+ " <td>2026-04-30 23:59:00</td>\n",
804
+ " <td>https://eu-careers.europa.eu/en/job-opportunit...</td>\n",
805
+ " </tr>\n",
806
+ " <tr>\n",
807
+ " <th>3</th>\n",
808
+ " <td>19665</td>\n",
809
+ " <td>Structures Expert</td>\n",
810
+ " <td>Transport</td>\n",
811
+ " <td>AD 7</td>\n",
812
+ " <td>Temporary staff</td>\n",
813
+ " <td>(EASA) European Union Aviation Safety Agency</td>\n",
814
+ " <td>Cologne (Germany)</td>\n",
815
+ " <td>2026-04-30 23:59:00</td>\n",
816
+ " <td>https://eu-careers.europa.eu/en/job-opportunit...</td>\n",
817
+ " </tr>\n",
818
+ " <tr>\n",
819
+ " <th>4</th>\n",
820
+ " <td>19666</td>\n",
821
+ " <td>Certification Expert - Hydromechanical and Fli...</td>\n",
822
+ " <td>Transport</td>\n",
823
+ " <td>AD 7</td>\n",
824
+ " <td>Temporary staff</td>\n",
825
+ " <td>(EASA) European Union Aviation Safety Agency</td>\n",
826
+ " <td>Cologne (Germany)</td>\n",
827
+ " <td>2026-04-30 23:59:00</td>\n",
828
+ " <td>https://eu-careers.europa.eu/en/job-opportunit...</td>\n",
829
+ " </tr>\n",
830
+ " </tbody>\n",
831
+ "</table>\n",
832
+ "</div>\n",
833
+ " <div class=\"colab-df-buttons\">\n",
834
+ "\n",
835
+ " <div class=\"colab-df-container\">\n",
836
+ " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-fbd49b0f-f509-4c22-8bc4-f5737d95fee2')\"\n",
837
+ " title=\"Convert this dataframe to an interactive table.\"\n",
838
+ " style=\"display:none;\">\n",
839
+ "\n",
840
+ " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
841
+ " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
842
+ " </svg>\n",
843
+ " </button>\n",
844
+ "\n",
845
+ " <style>\n",
846
+ " .colab-df-container {\n",
847
+ " display:flex;\n",
848
+ " gap: 12px;\n",
849
+ " }\n",
850
+ "\n",
851
+ " .colab-df-convert {\n",
852
+ " background-color: #E8F0FE;\n",
853
+ " border: none;\n",
854
+ " border-radius: 50%;\n",
855
+ " cursor: pointer;\n",
856
+ " display: none;\n",
857
+ " fill: #1967D2;\n",
858
+ " height: 32px;\n",
859
+ " padding: 0 0 0 0;\n",
860
+ " width: 32px;\n",
861
+ " }\n",
862
+ "\n",
863
+ " .colab-df-convert:hover {\n",
864
+ " background-color: #E2EBFA;\n",
865
+ " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
866
+ " fill: #174EA6;\n",
867
+ " }\n",
868
+ "\n",
869
+ " .colab-df-buttons div {\n",
870
+ " margin-bottom: 4px;\n",
871
+ " }\n",
872
+ "\n",
873
+ " [theme=dark] .colab-df-convert {\n",
874
+ " background-color: #3B4455;\n",
875
+ " fill: #D2E3FC;\n",
876
+ " }\n",
877
+ "\n",
878
+ " [theme=dark] .colab-df-convert:hover {\n",
879
+ " background-color: #434B5C;\n",
880
+ " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
881
+ " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
882
+ " fill: #FFFFFF;\n",
883
+ " }\n",
884
+ " </style>\n",
885
+ "\n",
886
+ " <script>\n",
887
+ " const buttonEl =\n",
888
+ " document.querySelector('#df-fbd49b0f-f509-4c22-8bc4-f5737d95fee2 button.colab-df-convert');\n",
889
+ " buttonEl.style.display =\n",
890
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
891
+ "\n",
892
+ " async function convertToInteractive(key) {\n",
893
+ " const element = document.querySelector('#df-fbd49b0f-f509-4c22-8bc4-f5737d95fee2');\n",
894
+ " const dataTable =\n",
895
+ " await google.colab.kernel.invokeFunction('convertToInteractive',\n",
896
+ " [key], {});\n",
897
+ " if (!dataTable) return;\n",
898
+ "\n",
899
+ " const docLinkHtml = 'Like what you see? Visit the ' +\n",
900
+ " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
901
+ " + ' to learn more about interactive tables.';\n",
902
+ " element.innerHTML = '';\n",
903
+ " dataTable['output_type'] = 'display_data';\n",
904
+ " await google.colab.output.renderOutput(dataTable, element);\n",
905
+ " const docLink = document.createElement('div');\n",
906
+ " docLink.innerHTML = docLinkHtml;\n",
907
+ " element.appendChild(docLink);\n",
908
+ " }\n",
909
+ " </script>\n",
910
+ " </div>\n",
911
+ "\n",
912
+ "\n",
913
+ " </div>\n",
914
+ " </div>\n"
915
+ ],
916
+ "application/vnd.google.colaboratory.intrinsic+json": {
917
+ "type": "dataframe",
918
+ "summary": "{\n \"name\": \"display(jobs_clean\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"ID\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 20,\n \"min\": 19664,\n \"max\": 19703,\n \"num_unique_values\": 5,\n \"samples\": [\n 19703,\n 19666,\n 19664\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"title\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"Policy Officer\",\n \"Certification Expert - Hydromechanical and Flight Control Systems\",\n \"Senior Military Advisor to the Executive Director\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Domain(s)\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Transport\",\n \"Justice and human rights\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Grade\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"FG IV\",\n \"AD 7\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Type of contract\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Temporary staff\",\n \"Contract staff\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Institution(s)\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"(EASA) European Union Aviation Safety Agency\",\n \"(FRA) European Union Agency for Fundamental Rights\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Location(s)\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Cologne (Germany)\",\n \"Vienna (Austria)\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Deadline\",\n \"properties\": {\n \"dtype\": \"date\",\n \"min\": \"2026-04-30 13:00:00\",\n \"max\": \"2026-04-30 23:59:00\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"2026-04-30 23:59:00\",\n \"2026-04-30 13:00:00\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Link to Content\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"https://eu-careers.europa.eu/en/job-opportunities/policy-officer/fra-ca-polof-fgiv-2026\",\n \"https://eu-careers.europa.eu/en/job-opportunities/certification-expert-hydromechanical-and-flight-control-systems/easa-ad-2026-997\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
919
+ }
920
+ },
921
+ "metadata": {}
922
+ }
923
+ ]
924
+ },
925
+ {
926
+ "cell_type": "code",
927
+ "source": [
928
+ "print(\"Unique domains:\", jobs_clean[\"Domain(s)\"].nunique())\n",
929
+ "print(\"Unique contract types:\", jobs_clean[\"Type of contract\"].nunique())\n",
930
+ "print(\"Unique institutions:\", jobs_clean[\"Institution(s)\"].nunique())\n",
931
+ "print(\"Unique locations:\", jobs_clean[\"Location(s)\"].nunique())"
932
+ ],
933
+ "metadata": {
934
+ "colab": {
935
+ "base_uri": "https://localhost:8080/"
936
+ },
937
+ "id": "wN8LLnbqDo5H",
938
+ "outputId": "157bd26d-0dee-44ed-9b65-bbdf4547d43a"
939
+ },
940
+ "execution_count": 30,
941
+ "outputs": [
942
+ {
943
+ "output_type": "stream",
944
+ "name": "stdout",
945
+ "text": [
946
+ "Unique domains: 44\n",
947
+ "Unique contract types: 5\n",
948
+ "Unique institutions: 40\n",
949
+ "Unique locations: 33\n"
950
+ ]
951
+ }
952
+ ]
953
+ },
954
+ {
955
+ "cell_type": "code",
956
+ "source": [
957
+ "comments_by_demand = {\n",
958
+ " \"high\": [\n",
959
+ " \"This role appears to be in strong demand across institutions.\",\n",
960
+ " \"The labor market for this profile looks highly competitive.\",\n",
961
+ " \"This posting suggests strong employment opportunities.\",\n",
962
+ " \"Demand for this skill set appears to be growing.\",\n",
963
+ " \"This category seems to attract significant hiring activity.\"\n",
964
+ " ],\n",
965
+ " \"stable\": [\n",
966
+ " \"This role appears to have steady demand.\",\n",
967
+ " \"The market for this profile looks relatively balanced.\",\n",
968
+ " \"This posting suggests moderate but stable opportunities.\",\n",
969
+ " \"Demand appears consistent across institutions.\",\n",
970
+ " \"This category seems to have regular hiring activity.\"\n",
971
+ " ],\n",
972
+ " \"low\": [\n",
973
+ " \"This role appears to have weaker demand.\",\n",
974
+ " \"The market for this profile looks more limited.\",\n",
975
+ " \"This posting suggests fewer employment opportunities.\",\n",
976
+ " \"Demand for this skill set appears relatively low.\",\n",
977
+ " \"This category seems to have less hiring activity.\"\n",
978
+ " ]\n",
979
+ "}"
980
+ ],
981
+ "metadata": {
982
+ "id": "HHqueIqZDqVe"
983
+ },
984
+ "execution_count": 31,
985
+ "outputs": []
986
+ },
987
+ {
988
+ "cell_type": "code",
989
+ "source": [
990
+ "jobs_enriched = jobs_clean.copy()\n",
991
+ "\n",
992
+ "today = pd.Timestamp.today().normalize()\n",
993
+ "jobs_enriched[\"days_to_deadline\"] = (jobs_enriched[\"Deadline\"] - today).dt.days\n",
994
+ "\n",
995
+ "def urgency_from_days(days):\n",
996
+ " if pd.isna(days):\n",
997
+ " return random.randint(3, 6)\n",
998
+ " if days <= 7:\n",
999
+ " return random.randint(8, 10)\n",
1000
+ " elif days <= 21:\n",
1001
+ " return random.randint(5, 8)\n",
1002
+ " else:\n",
1003
+ " return random.randint(2, 6)\n",
1004
+ "\n",
1005
+ "jobs_enriched[\"urgency_score\"] = jobs_enriched[\"days_to_deadline\"].apply(urgency_from_days)\n",
1006
+ "\n",
1007
+ "def demand_from_urgency(score):\n",
1008
+ " if score >= 8:\n",
1009
+ " return random.randint(7, 10)\n",
1010
+ " elif score >= 5:\n",
1011
+ " return random.randint(4, 7)\n",
1012
+ " else:\n",
1013
+ " return random.randint(2, 5)\n",
1014
+ "\n",
1015
+ "jobs_enriched[\"job_demand_score\"] = jobs_enriched[\"urgency_score\"].apply(demand_from_urgency)\n",
1016
+ "\n",
1017
+ "def demand_label(score):\n",
1018
+ " if score <= 3:\n",
1019
+ " return \"low\"\n",
1020
+ " elif score <= 6:\n",
1021
+ " return \"stable\"\n",
1022
+ " else:\n",
1023
+ " return \"high\"\n",
1024
+ "\n",
1025
+ "jobs_enriched[\"demand_label\"] = jobs_enriched[\"job_demand_score\"].apply(demand_label)\n",
1026
+ "\n",
1027
+ "jobs_enriched[\"estimated_salary\"] = np.random.randint(35000, 95000, len(jobs_enriched))\n",
1028
+ "\n",
1029
+ "jobs_enriched[\"automation_risk\"] = np.random.choice(\n",
1030
+ " [\"low\", \"medium\", \"high\"],\n",
1031
+ " size=len(jobs_enriched),\n",
1032
+ " p=[0.35, 0.45, 0.20]\n",
1033
+ ")\n",
1034
+ "\n",
1035
+ "jobs_enriched[\"estimated_applications\"] = np.random.randint(20, 250, len(jobs_enriched))\n",
1036
+ "\n",
1037
+ "jobs_enriched[\"job_comment\"] = jobs_enriched[\"demand_label\"].apply(\n",
1038
+ " lambda x: random.choice(comments_by_demand[x])\n",
1039
+ ")\n",
1040
+ "\n",
1041
+ "display(jobs_enriched.head())"
1042
+ ],
1043
+ "metadata": {
1044
+ "colab": {
1045
+ "base_uri": "https://localhost:8080/",
1046
+ "height": 521
1047
+ },
1048
+ "id": "l6PBz502Dr3a",
1049
+ "outputId": "401257dc-e7ea-4481-eb1b-15688fbcc6b2"
1050
+ },
1051
+ "execution_count": 32,
1052
+ "outputs": [
1053
+ {
1054
+ "output_type": "display_data",
1055
+ "data": {
1056
+ "text/plain": [
1057
+ " ID title \\\n",
1058
+ "0 19702 Project Assistant \n",
1059
+ "1 19703 Policy Officer \n",
1060
+ "2 19664 Senior Military Advisor to the Executive Director \n",
1061
+ "3 19665 Structures Expert \n",
1062
+ "4 19666 Certification Expert - Hydromechanical and Fli... \n",
1063
+ "\n",
1064
+ " Domain(s) Grade Type of contract \\\n",
1065
+ "0 Justice and human rights FG III Contract staff \n",
1066
+ "1 Justice and human rights FG IV Contract staff \n",
1067
+ "2 Transport AD 10 Temporary staff \n",
1068
+ "3 Transport AD 7 Temporary staff \n",
1069
+ "4 Transport AD 7 Temporary staff \n",
1070
+ "\n",
1071
+ " Institution(s) Location(s) \\\n",
1072
+ "0 (FRA) European Union Agency for Fundamental Ri... Vienna (Austria) \n",
1073
+ "1 (FRA) European Union Agency for Fundamental Ri... Vienna (Austria) \n",
1074
+ "2 (EASA) European Union Aviation Safety Agency Cologne (Germany) \n",
1075
+ "3 (EASA) European Union Aviation Safety Agency Cologne (Germany) \n",
1076
+ "4 (EASA) European Union Aviation Safety Agency Cologne (Germany) \n",
1077
+ "\n",
1078
+ " Deadline Link to Content \\\n",
1079
+ "0 2026-04-30 13:00:00 https://eu-careers.europa.eu/en/job-opportunit... \n",
1080
+ "1 2026-04-30 13:00:00 https://eu-careers.europa.eu/en/job-opportunit... \n",
1081
+ "2 2026-04-30 23:59:00 https://eu-careers.europa.eu/en/job-opportunit... \n",
1082
+ "3 2026-04-30 23:59:00 https://eu-careers.europa.eu/en/job-opportunit... \n",
1083
+ "4 2026-04-30 23:59:00 https://eu-careers.europa.eu/en/job-opportunit... \n",
1084
+ "\n",
1085
+ " days_to_deadline urgency_score job_demand_score demand_label \\\n",
1086
+ "0 0 9 9 high \n",
1087
+ "1 0 8 8 high \n",
1088
+ "2 0 8 10 high \n",
1089
+ "3 0 9 8 high \n",
1090
+ "4 0 9 7 high \n",
1091
+ "\n",
1092
+ " estimated_salary automation_risk estimated_applications \\\n",
1093
+ "0 91422 low 75 \n",
1094
+ "1 50795 medium 180 \n",
1095
+ "2 35860 medium 185 \n",
1096
+ "3 73158 low 136 \n",
1097
+ "4 89343 medium 153 \n",
1098
+ "\n",
1099
+ " job_comment \n",
1100
+ "0 The labor market for this profile looks highly... \n",
1101
+ "1 Demand for this skill set appears to be growing. \n",
1102
+ "2 This category seems to attract significant hir... \n",
1103
+ "3 This posting suggests strong employment opport... \n",
1104
+ "4 This category seems to attract significant hir... "
1105
+ ],
1106
+ "text/html": [
1107
+ "\n",
1108
+ " <div id=\"df-11976e12-8290-4b3f-bb3c-7b01b06b0d36\" class=\"colab-df-container\">\n",
1109
+ " <div>\n",
1110
+ "<style scoped>\n",
1111
+ " .dataframe tbody tr th:only-of-type {\n",
1112
+ " vertical-align: middle;\n",
1113
+ " }\n",
1114
+ "\n",
1115
+ " .dataframe tbody tr th {\n",
1116
+ " vertical-align: top;\n",
1117
+ " }\n",
1118
+ "\n",
1119
+ " .dataframe thead th {\n",
1120
+ " text-align: right;\n",
1121
+ " }\n",
1122
+ "</style>\n",
1123
+ "<table border=\"1\" class=\"dataframe\">\n",
1124
+ " <thead>\n",
1125
+ " <tr style=\"text-align: right;\">\n",
1126
+ " <th></th>\n",
1127
+ " <th>ID</th>\n",
1128
+ " <th>title</th>\n",
1129
+ " <th>Domain(s)</th>\n",
1130
+ " <th>Grade</th>\n",
1131
+ " <th>Type of contract</th>\n",
1132
+ " <th>Institution(s)</th>\n",
1133
+ " <th>Location(s)</th>\n",
1134
+ " <th>Deadline</th>\n",
1135
+ " <th>Link to Content</th>\n",
1136
+ " <th>days_to_deadline</th>\n",
1137
+ " <th>urgency_score</th>\n",
1138
+ " <th>job_demand_score</th>\n",
1139
+ " <th>demand_label</th>\n",
1140
+ " <th>estimated_salary</th>\n",
1141
+ " <th>automation_risk</th>\n",
1142
+ " <th>estimated_applications</th>\n",
1143
+ " <th>job_comment</th>\n",
1144
+ " </tr>\n",
1145
+ " </thead>\n",
1146
+ " <tbody>\n",
1147
+ " <tr>\n",
1148
+ " <th>0</th>\n",
1149
+ " <td>19702</td>\n",
1150
+ " <td>Project Assistant</td>\n",
1151
+ " <td>Justice and human rights</td>\n",
1152
+ " <td>FG III</td>\n",
1153
+ " <td>Contract staff</td>\n",
1154
+ " <td>(FRA) European Union Agency for Fundamental Ri...</td>\n",
1155
+ " <td>Vienna (Austria)</td>\n",
1156
+ " <td>2026-04-30 13:00:00</td>\n",
1157
+ " <td>https://eu-careers.europa.eu/en/job-opportunit...</td>\n",
1158
+ " <td>0</td>\n",
1159
+ " <td>9</td>\n",
1160
+ " <td>9</td>\n",
1161
+ " <td>high</td>\n",
1162
+ " <td>91422</td>\n",
1163
+ " <td>low</td>\n",
1164
+ " <td>75</td>\n",
1165
+ " <td>The labor market for this profile looks highly...</td>\n",
1166
+ " </tr>\n",
1167
+ " <tr>\n",
1168
+ " <th>1</th>\n",
1169
+ " <td>19703</td>\n",
1170
+ " <td>Policy Officer</td>\n",
1171
+ " <td>Justice and human rights</td>\n",
1172
+ " <td>FG IV</td>\n",
1173
+ " <td>Contract staff</td>\n",
1174
+ " <td>(FRA) European Union Agency for Fundamental Ri...</td>\n",
1175
+ " <td>Vienna (Austria)</td>\n",
1176
+ " <td>2026-04-30 13:00:00</td>\n",
1177
+ " <td>https://eu-careers.europa.eu/en/job-opportunit...</td>\n",
1178
+ " <td>0</td>\n",
1179
+ " <td>8</td>\n",
1180
+ " <td>8</td>\n",
1181
+ " <td>high</td>\n",
1182
+ " <td>50795</td>\n",
1183
+ " <td>medium</td>\n",
1184
+ " <td>180</td>\n",
1185
+ " <td>Demand for this skill set appears to be growing.</td>\n",
1186
+ " </tr>\n",
1187
+ " <tr>\n",
1188
+ " <th>2</th>\n",
1189
+ " <td>19664</td>\n",
1190
+ " <td>Senior Military Advisor to the Executive Director</td>\n",
1191
+ " <td>Transport</td>\n",
1192
+ " <td>AD 10</td>\n",
1193
+ " <td>Temporary staff</td>\n",
1194
+ " <td>(EASA) European Union Aviation Safety Agency</td>\n",
1195
+ " <td>Cologne (Germany)</td>\n",
1196
+ " <td>2026-04-30 23:59:00</td>\n",
1197
+ " <td>https://eu-careers.europa.eu/en/job-opportunit...</td>\n",
1198
+ " <td>0</td>\n",
1199
+ " <td>8</td>\n",
1200
+ " <td>10</td>\n",
1201
+ " <td>high</td>\n",
1202
+ " <td>35860</td>\n",
1203
+ " <td>medium</td>\n",
1204
+ " <td>185</td>\n",
1205
+ " <td>This category seems to attract significant hir...</td>\n",
1206
+ " </tr>\n",
1207
+ " <tr>\n",
1208
+ " <th>3</th>\n",
1209
+ " <td>19665</td>\n",
1210
+ " <td>Structures Expert</td>\n",
1211
+ " <td>Transport</td>\n",
1212
+ " <td>AD 7</td>\n",
1213
+ " <td>Temporary staff</td>\n",
1214
+ " <td>(EASA) European Union Aviation Safety Agency</td>\n",
1215
+ " <td>Cologne (Germany)</td>\n",
1216
+ " <td>2026-04-30 23:59:00</td>\n",
1217
+ " <td>https://eu-careers.europa.eu/en/job-opportunit...</td>\n",
1218
+ " <td>0</td>\n",
1219
+ " <td>9</td>\n",
1220
+ " <td>8</td>\n",
1221
+ " <td>high</td>\n",
1222
+ " <td>73158</td>\n",
1223
+ " <td>low</td>\n",
1224
+ " <td>136</td>\n",
1225
+ " <td>This posting suggests strong employment opport...</td>\n",
1226
+ " </tr>\n",
1227
+ " <tr>\n",
1228
+ " <th>4</th>\n",
1229
+ " <td>19666</td>\n",
1230
+ " <td>Certification Expert - Hydromechanical and Fli...</td>\n",
1231
+ " <td>Transport</td>\n",
1232
+ " <td>AD 7</td>\n",
1233
+ " <td>Temporary staff</td>\n",
1234
+ " <td>(EASA) European Union Aviation Safety Agency</td>\n",
1235
+ " <td>Cologne (Germany)</td>\n",
1236
+ " <td>2026-04-30 23:59:00</td>\n",
1237
+ " <td>https://eu-careers.europa.eu/en/job-opportunit...</td>\n",
1238
+ " <td>0</td>\n",
1239
+ " <td>9</td>\n",
1240
+ " <td>7</td>\n",
1241
+ " <td>high</td>\n",
1242
+ " <td>89343</td>\n",
1243
+ " <td>medium</td>\n",
1244
+ " <td>153</td>\n",
1245
+ " <td>This category seems to attract significant hir...</td>\n",
1246
+ " </tr>\n",
1247
+ " </tbody>\n",
1248
+ "</table>\n",
1249
+ "</div>\n",
1250
+ " <div class=\"colab-df-buttons\">\n",
1251
+ "\n",
1252
+ " <div class=\"colab-df-container\">\n",
1253
+ " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-11976e12-8290-4b3f-bb3c-7b01b06b0d36')\"\n",
1254
+ " title=\"Convert this dataframe to an interactive table.\"\n",
1255
+ " style=\"display:none;\">\n",
1256
+ "\n",
1257
+ " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
1258
+ " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
1259
+ " </svg>\n",
1260
+ " </button>\n",
1261
+ "\n",
1262
+ " <style>\n",
1263
+ " .colab-df-container {\n",
1264
+ " display:flex;\n",
1265
+ " gap: 12px;\n",
1266
+ " }\n",
1267
+ "\n",
1268
+ " .colab-df-convert {\n",
1269
+ " background-color: #E8F0FE;\n",
1270
+ " border: none;\n",
1271
+ " border-radius: 50%;\n",
1272
+ " cursor: pointer;\n",
1273
+ " display: none;\n",
1274
+ " fill: #1967D2;\n",
1275
+ " height: 32px;\n",
1276
+ " padding: 0 0 0 0;\n",
1277
+ " width: 32px;\n",
1278
+ " }\n",
1279
+ "\n",
1280
+ " .colab-df-convert:hover {\n",
1281
+ " background-color: #E2EBFA;\n",
1282
+ " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
1283
+ " fill: #174EA6;\n",
1284
+ " }\n",
1285
+ "\n",
1286
+ " .colab-df-buttons div {\n",
1287
+ " margin-bottom: 4px;\n",
1288
+ " }\n",
1289
+ "\n",
1290
+ " [theme=dark] .colab-df-convert {\n",
1291
+ " background-color: #3B4455;\n",
1292
+ " fill: #D2E3FC;\n",
1293
+ " }\n",
1294
+ "\n",
1295
+ " [theme=dark] .colab-df-convert:hover {\n",
1296
+ " background-color: #434B5C;\n",
1297
+ " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
1298
+ " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
1299
+ " fill: #FFFFFF;\n",
1300
+ " }\n",
1301
+ " </style>\n",
1302
+ "\n",
1303
+ " <script>\n",
1304
+ " const buttonEl =\n",
1305
+ " document.querySelector('#df-11976e12-8290-4b3f-bb3c-7b01b06b0d36 button.colab-df-convert');\n",
1306
+ " buttonEl.style.display =\n",
1307
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
1308
+ "\n",
1309
+ " async function convertToInteractive(key) {\n",
1310
+ " const element = document.querySelector('#df-11976e12-8290-4b3f-bb3c-7b01b06b0d36');\n",
1311
+ " const dataTable =\n",
1312
+ " await google.colab.kernel.invokeFunction('convertToInteractive',\n",
1313
+ " [key], {});\n",
1314
+ " if (!dataTable) return;\n",
1315
+ "\n",
1316
+ " const docLinkHtml = 'Like what you see? Visit the ' +\n",
1317
+ " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
1318
+ " + ' to learn more about interactive tables.';\n",
1319
+ " element.innerHTML = '';\n",
1320
+ " dataTable['output_type'] = 'display_data';\n",
1321
+ " await google.colab.output.renderOutput(dataTable, element);\n",
1322
+ " const docLink = document.createElement('div');\n",
1323
+ " docLink.innerHTML = docLinkHtml;\n",
1324
+ " element.appendChild(docLink);\n",
1325
+ " }\n",
1326
+ " </script>\n",
1327
+ " </div>\n",
1328
+ "\n",
1329
+ "\n",
1330
+ " </div>\n",
1331
+ " </div>\n"
1332
+ ],
1333
+ "application/vnd.google.colaboratory.intrinsic+json": {
1334
+ "type": "dataframe",
1335
+ "summary": "{\n \"name\": \"display(jobs_enriched\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"ID\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 20,\n \"min\": 19664,\n \"max\": 19703,\n \"num_unique_values\": 5,\n \"samples\": [\n 19703,\n 19666,\n 19664\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"title\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"Policy Officer\",\n \"Certification Expert - Hydromechanical and Flight Control Systems\",\n \"Senior Military Advisor to the Executive Director\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Domain(s)\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Transport\",\n \"Justice and human rights\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Grade\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"FG IV\",\n \"AD 7\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Type of contract\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Temporary staff\",\n \"Contract staff\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Institution(s)\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"(EASA) European Union Aviation Safety Agency\",\n \"(FRA) European Union Agency for Fundamental Rights\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Location(s)\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"Cologne (Germany)\",\n \"Vienna (Austria)\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Deadline\",\n \"properties\": {\n \"dtype\": \"date\",\n \"min\": \"2026-04-30 13:00:00\",\n \"max\": \"2026-04-30 23:59:00\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"2026-04-30 23:59:00\",\n \"2026-04-30 13:00:00\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Link to Content\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"https://eu-careers.europa.eu/en/job-opportunities/policy-officer/fra-ca-polof-fgiv-2026\",\n \"https://eu-careers.europa.eu/en/job-opportunities/certification-expert-hydromechanical-and-flight-control-systems/easa-ad-2026-997\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"days_to_deadline\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 0,\n \"num_unique_values\": 1,\n \"samples\": [\n 0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"urgency_score\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 8,\n \"max\": 9,\n \"num_unique_values\": 2,\n \"samples\": [\n 8\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"job_demand_score\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 1,\n \"min\": 7,\n \"max\": 10,\n \"num_unique_values\": 4,\n \"samples\": [\n 8\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"demand_label\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"high\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"estimated_salary\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 24288,\n \"min\": 35860,\n \"max\": 91422,\n \"num_unique_values\": 5,\n \"samples\": [\n 50795\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"automation_risk\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 2,\n \"samples\": [\n \"medium\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"estimated_applications\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 44,\n \"min\": 75,\n \"max\": 185,\n \"num_unique_values\": 5,\n \"samples\": [\n 180\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"job_comment\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 4,\n \"samples\": [\n \"Demand for this skill set appears to be growing.\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
1336
+ }
1337
+ },
1338
+ "metadata": {}
1339
+ }
1340
+ ]
1341
+ },
1342
+ {
1343
+ "cell_type": "code",
1344
+ "source": [
1345
+ "jobs_enriched.to_csv(\"jobs_enriched.csv\", index=False)\n",
1346
+ "print(\"Saved: jobs_enriched.csv\")"
1347
+ ],
1348
+ "metadata": {
1349
+ "colab": {
1350
+ "base_uri": "https://localhost:8080/"
1351
+ },
1352
+ "id": "s00SIZBsDtd-",
1353
+ "outputId": "e87d8cc4-4a9c-4263-ddd1-9ddc8d02cbc9"
1354
+ },
1355
+ "execution_count": 33,
1356
+ "outputs": [
1357
+ {
1358
+ "output_type": "stream",
1359
+ "name": "stdout",
1360
+ "text": [
1361
+ "Saved: jobs_enriched.csv\n"
1362
+ ]
1363
+ }
1364
+ ]
1365
+ },
1366
+ {
1367
+ "cell_type": "code",
1368
+ "source": [
1369
+ "jobs_enriched[\"deadline_month\"] = jobs_enriched[\"Deadline\"].dt.to_period(\"M\").astype(str)\n",
1370
+ "\n",
1371
+ "monthly_openings = (\n",
1372
+ " jobs_enriched\n",
1373
+ " .groupby([\"deadline_month\", \"Domain(s)\"], as_index=False)\n",
1374
+ " .agg(\n",
1375
+ " postings=(\"title\", \"count\"),\n",
1376
+ " avg_salary=(\"estimated_salary\", \"mean\"),\n",
1377
+ " avg_demand_score=(\"job_demand_score\", \"mean\"),\n",
1378
+ " avg_applications=(\"estimated_applications\", \"mean\")\n",
1379
+ " )\n",
1380
+ ")\n",
1381
+ "\n",
1382
+ "display(monthly_openings.head())\n",
1383
+ "print(\"Shape:\", monthly_openings.shape)"
1384
+ ],
1385
+ "metadata": {
1386
+ "colab": {
1387
+ "base_uri": "https://localhost:8080/",
1388
+ "height": 241
1389
+ },
1390
+ "id": "3NtBDh8ODvLW",
1391
+ "outputId": "aad6702c-076b-47cd-b398-1cf215d10268"
1392
+ },
1393
+ "execution_count": 34,
1394
+ "outputs": [
1395
+ {
1396
+ "output_type": "display_data",
1397
+ "data": {
1398
+ "text/plain": [
1399
+ " deadline_month Domain(s) postings avg_salary \\\n",
1400
+ "0 2026-04 Data protection 1 79732.000000 \n",
1401
+ "1 2026-04 Economics, Finance and Statistics 1 46284.000000 \n",
1402
+ "2 2026-04 Human Resources 1 89886.000000 \n",
1403
+ "3 2026-04 Justice and human rights 2 71108.500000 \n",
1404
+ "4 2026-04 Transport 3 66120.333333 \n",
1405
+ "\n",
1406
+ " avg_demand_score avg_applications \n",
1407
+ "0 9.000000 77.0 \n",
1408
+ "1 7.000000 63.0 \n",
1409
+ "2 7.000000 192.0 \n",
1410
+ "3 8.500000 127.5 \n",
1411
+ "4 8.333333 158.0 "
1412
+ ],
1413
+ "text/html": [
1414
+ "\n",
1415
+ " <div id=\"df-d89ad582-a695-47da-9deb-fe333bb72f92\" class=\"colab-df-container\">\n",
1416
+ " <div>\n",
1417
+ "<style scoped>\n",
1418
+ " .dataframe tbody tr th:only-of-type {\n",
1419
+ " vertical-align: middle;\n",
1420
+ " }\n",
1421
+ "\n",
1422
+ " .dataframe tbody tr th {\n",
1423
+ " vertical-align: top;\n",
1424
+ " }\n",
1425
+ "\n",
1426
+ " .dataframe thead th {\n",
1427
+ " text-align: right;\n",
1428
+ " }\n",
1429
+ "</style>\n",
1430
+ "<table border=\"1\" class=\"dataframe\">\n",
1431
+ " <thead>\n",
1432
+ " <tr style=\"text-align: right;\">\n",
1433
+ " <th></th>\n",
1434
+ " <th>deadline_month</th>\n",
1435
+ " <th>Domain(s)</th>\n",
1436
+ " <th>postings</th>\n",
1437
+ " <th>avg_salary</th>\n",
1438
+ " <th>avg_demand_score</th>\n",
1439
+ " <th>avg_applications</th>\n",
1440
+ " </tr>\n",
1441
+ " </thead>\n",
1442
+ " <tbody>\n",
1443
+ " <tr>\n",
1444
+ " <th>0</th>\n",
1445
+ " <td>2026-04</td>\n",
1446
+ " <td>Data protection</td>\n",
1447
+ " <td>1</td>\n",
1448
+ " <td>79732.000000</td>\n",
1449
+ " <td>9.000000</td>\n",
1450
+ " <td>77.0</td>\n",
1451
+ " </tr>\n",
1452
+ " <tr>\n",
1453
+ " <th>1</th>\n",
1454
+ " <td>2026-04</td>\n",
1455
+ " <td>Economics, Finance and Statistics</td>\n",
1456
+ " <td>1</td>\n",
1457
+ " <td>46284.000000</td>\n",
1458
+ " <td>7.000000</td>\n",
1459
+ " <td>63.0</td>\n",
1460
+ " </tr>\n",
1461
+ " <tr>\n",
1462
+ " <th>2</th>\n",
1463
+ " <td>2026-04</td>\n",
1464
+ " <td>Human Resources</td>\n",
1465
+ " <td>1</td>\n",
1466
+ " <td>89886.000000</td>\n",
1467
+ " <td>7.000000</td>\n",
1468
+ " <td>192.0</td>\n",
1469
+ " </tr>\n",
1470
+ " <tr>\n",
1471
+ " <th>3</th>\n",
1472
+ " <td>2026-04</td>\n",
1473
+ " <td>Justice and human rights</td>\n",
1474
+ " <td>2</td>\n",
1475
+ " <td>71108.500000</td>\n",
1476
+ " <td>8.500000</td>\n",
1477
+ " <td>127.5</td>\n",
1478
+ " </tr>\n",
1479
+ " <tr>\n",
1480
+ " <th>4</th>\n",
1481
+ " <td>2026-04</td>\n",
1482
+ " <td>Transport</td>\n",
1483
+ " <td>3</td>\n",
1484
+ " <td>66120.333333</td>\n",
1485
+ " <td>8.333333</td>\n",
1486
+ " <td>158.0</td>\n",
1487
+ " </tr>\n",
1488
+ " </tbody>\n",
1489
+ "</table>\n",
1490
+ "</div>\n",
1491
+ " <div class=\"colab-df-buttons\">\n",
1492
+ "\n",
1493
+ " <div class=\"colab-df-container\">\n",
1494
+ " <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-d89ad582-a695-47da-9deb-fe333bb72f92')\"\n",
1495
+ " title=\"Convert this dataframe to an interactive table.\"\n",
1496
+ " style=\"display:none;\">\n",
1497
+ "\n",
1498
+ " <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
1499
+ " <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
1500
+ " </svg>\n",
1501
+ " </button>\n",
1502
+ "\n",
1503
+ " <style>\n",
1504
+ " .colab-df-container {\n",
1505
+ " display:flex;\n",
1506
+ " gap: 12px;\n",
1507
+ " }\n",
1508
+ "\n",
1509
+ " .colab-df-convert {\n",
1510
+ " background-color: #E8F0FE;\n",
1511
+ " border: none;\n",
1512
+ " border-radius: 50%;\n",
1513
+ " cursor: pointer;\n",
1514
+ " display: none;\n",
1515
+ " fill: #1967D2;\n",
1516
+ " height: 32px;\n",
1517
+ " padding: 0 0 0 0;\n",
1518
+ " width: 32px;\n",
1519
+ " }\n",
1520
+ "\n",
1521
+ " .colab-df-convert:hover {\n",
1522
+ " background-color: #E2EBFA;\n",
1523
+ " box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
1524
+ " fill: #174EA6;\n",
1525
+ " }\n",
1526
+ "\n",
1527
+ " .colab-df-buttons div {\n",
1528
+ " margin-bottom: 4px;\n",
1529
+ " }\n",
1530
+ "\n",
1531
+ " [theme=dark] .colab-df-convert {\n",
1532
+ " background-color: #3B4455;\n",
1533
+ " fill: #D2E3FC;\n",
1534
+ " }\n",
1535
+ "\n",
1536
+ " [theme=dark] .colab-df-convert:hover {\n",
1537
+ " background-color: #434B5C;\n",
1538
+ " box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
1539
+ " filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
1540
+ " fill: #FFFFFF;\n",
1541
+ " }\n",
1542
+ " </style>\n",
1543
+ "\n",
1544
+ " <script>\n",
1545
+ " const buttonEl =\n",
1546
+ " document.querySelector('#df-d89ad582-a695-47da-9deb-fe333bb72f92 button.colab-df-convert');\n",
1547
+ " buttonEl.style.display =\n",
1548
+ " google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
1549
+ "\n",
1550
+ " async function convertToInteractive(key) {\n",
1551
+ " const element = document.querySelector('#df-d89ad582-a695-47da-9deb-fe333bb72f92');\n",
1552
+ " const dataTable =\n",
1553
+ " await google.colab.kernel.invokeFunction('convertToInteractive',\n",
1554
+ " [key], {});\n",
1555
+ " if (!dataTable) return;\n",
1556
+ "\n",
1557
+ " const docLinkHtml = 'Like what you see? Visit the ' +\n",
1558
+ " '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
1559
+ " + ' to learn more about interactive tables.';\n",
1560
+ " element.innerHTML = '';\n",
1561
+ " dataTable['output_type'] = 'display_data';\n",
1562
+ " await google.colab.output.renderOutput(dataTable, element);\n",
1563
+ " const docLink = document.createElement('div');\n",
1564
+ " docLink.innerHTML = docLinkHtml;\n",
1565
+ " element.appendChild(docLink);\n",
1566
+ " }\n",
1567
+ " </script>\n",
1568
+ " </div>\n",
1569
+ "\n",
1570
+ "\n",
1571
+ " </div>\n",
1572
+ " </div>\n"
1573
+ ],
1574
+ "application/vnd.google.colaboratory.intrinsic+json": {
1575
+ "type": "dataframe",
1576
+ "summary": "{\n \"name\": \"print(\\\"Shape:\\\", monthly_openings\",\n \"rows\": 5,\n \"fields\": [\n {\n \"column\": \"deadline_month\",\n \"properties\": {\n \"dtype\": \"object\",\n \"num_unique_values\": 1,\n \"samples\": [\n \"2026-04\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"Domain(s)\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 5,\n \"samples\": [\n \"Economics, Finance and Statistics\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"postings\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 1,\n \"max\": 3,\n \"num_unique_values\": 3,\n \"samples\": [\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"avg_salary\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 16331.975065972749,\n \"min\": 46284.0,\n \"max\": 89886.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 46284.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"avg_demand_score\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0.9159087776022724,\n \"min\": 7.0,\n \"max\": 9.0,\n \"num_unique_values\": 4,\n \"samples\": [\n 7.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"avg_applications\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 54.13178363955875,\n \"min\": 63.0,\n \"max\": 192.0,\n \"num_unique_values\": 5,\n \"samples\": [\n 63.0\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
1577
+ }
1578
+ },
1579
+ "metadata": {}
1580
+ },
1581
+ {
1582
+ "output_type": "stream",
1583
+ "name": "stdout",
1584
+ "text": [
1585
+ "Shape: (138, 6)\n"
1586
+ ]
1587
+ }
1588
+ ]
1589
+ },
1590
+ {
1591
+ "cell_type": "code",
1592
+ "source": [
1593
+ "monthly_openings.to_csv(\"jobs_monthly_openings.csv\", index=False)\n",
1594
+ "print(\"Saved: jobs_monthly_openings.csv\")"
1595
+ ],
1596
+ "metadata": {
1597
+ "colab": {
1598
+ "base_uri": "https://localhost:8080/"
1599
+ },
1600
+ "id": "_WpGUgWSDxDr",
1601
+ "outputId": "569af645-b25b-40f8-f70b-522e5c8be3de"
1602
+ },
1603
+ "execution_count": 35,
1604
+ "outputs": [
1605
+ {
1606
+ "output_type": "stream",
1607
+ "name": "stdout",
1608
+ "text": [
1609
+ "Saved: jobs_monthly_openings.csv\n"
1610
+ ]
1611
+ }
1612
+ ]
1613
+ }
1614
+ ]
1615
+ }