Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -160,6 +160,207 @@ def classify_occupational_group_by_level(responsibilities: List[str]) -> dict:
|
|
| 160 |
result['error'] = str(e)
|
| 161 |
return result
|
| 162 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
def get_skills_info_esco(Level_5_code):
|
| 164 |
|
| 165 |
try:
|
|
|
|
| 160 |
result['error'] = str(e)
|
| 161 |
return result
|
| 162 |
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
def classify_esco_by_hierarchical_level(responsibilities: List[str]) -> dict:
|
| 166 |
+
"""
|
| 167 |
+
Classifies job responsibilities into occupational groups at 4 levels,
|
| 168 |
+
[European Skills, Competences, Qualifications, and Occupations (ESCO)](https://esco.ec.europa.eu/en)
|
| 169 |
+
returning codes, names, and descriptions for each level.
|
| 170 |
+
Args:
|
| 171 |
+
responsibilities: List of job responsibility strings
|
| 172 |
+
Returns:
|
| 173 |
+
Dictionary containing classification information or error message
|
| 174 |
+
"""
|
| 175 |
+
|
| 176 |
+
esco_df = pd.read_csv(
|
| 177 |
+
"ISCOGroups_en.csv",
|
| 178 |
+
dtype={'code': str} # Force 'code' to be read as string
|
| 179 |
+
)
|
| 180 |
+
|
| 181 |
+
esco_level5_df = pd.read_csv(
|
| 182 |
+
"occupations_en.csv",
|
| 183 |
+
dtype={'code': str, 'iscoGroup': str, } # Force 'code' to be read as string
|
| 184 |
+
)
|
| 185 |
+
|
| 186 |
+
result = {}
|
| 187 |
+
|
| 188 |
+
######################## Level 1 ###################
|
| 189 |
+
# Get all top-level codes (single character/digit)
|
| 190 |
+
top_level_codes = sorted({
|
| 191 |
+
code for code in esco_df['code']
|
| 192 |
+
if len(code) == 1 and code.isalnum()
|
| 193 |
+
})
|
| 194 |
+
|
| 195 |
+
level1_code = None
|
| 196 |
+
if top_level_codes:
|
| 197 |
+
level1_df = esco_df[esco_df['code'].isin(top_level_codes)]
|
| 198 |
+
job_occupation_list = "\n".join(f"- {row['code']}: {row['preferredLabel']} - {row['description']}"
|
| 199 |
+
for _, row in level1_df.iterrows())
|
| 200 |
+
list1_output = level1_df["code"].tolist() # Convert Series to list
|
| 201 |
+
list1 = ", ".join(map(str, list1_output)) # Join elements with comma
|
| 202 |
+
|
| 203 |
+
user_prompt1 = f"""
|
| 204 |
+
Here is a list of job responsibilities:
|
| 205 |
+
{responsibilities}
|
| 206 |
+
|
| 207 |
+
Select the most relevant top-level code from these options:
|
| 208 |
+
{job_occupation_list}
|
| 209 |
+
|
| 210 |
+
Based on the responsibilities, suggest the most relevant level 1 Occupation code from within this list: {list1}.
|
| 211 |
+
**Important:**
|
| 212 |
+
- Return ONLY the code, nothing else.
|
| 213 |
+
- The code should be exactly as shown in the list.
|
| 214 |
+
- Do not include any additional text or explanation.
|
| 215 |
+
"""
|
| 216 |
+
level1_code = gpt_call("Identify top-level occupational group", user_prompt1).strip()
|
| 217 |
+
level1_code = code_sanitize(level1_code, list1_output)
|
| 218 |
+
result.update(get_level_ESCO_info(level1_df, level1_code, 'Level_1'))
|
| 219 |
+
|
| 220 |
+
######################## Level 2 ###################
|
| 221 |
+
|
| 222 |
+
level2_code = None
|
| 223 |
+
if level1_code:
|
| 224 |
+
level2_df = esco_df[
|
| 225 |
+
(esco_df['code'].str.startswith(level1_code)) & (esco_df['code'].str.len() == len(level1_code) + 1)
|
| 226 |
+
]
|
| 227 |
+
if not level2_df.empty:
|
| 228 |
+
level2_options = "\n".join(f"- {row['code']}: {row['preferredLabel']} - {row['description']}"
|
| 229 |
+
for _, row in level2_df.iterrows())
|
| 230 |
+
list2_output = level2_df["code"].tolist() # Convert Series to list
|
| 231 |
+
list2 = ", ".join(map(str, list2_output)) # Join elements with comma
|
| 232 |
+
|
| 233 |
+
user_prompt2 = f"""
|
| 234 |
+
Here is a list of job responsibilities:
|
| 235 |
+
{responsibilities}
|
| 236 |
+
|
| 237 |
+
Here is a list of level 2 Occupation classifications within {level1_code}:
|
| 238 |
+
{level2_options}
|
| 239 |
+
|
| 240 |
+
Based on the responsibilities, suggest the most relevant level 2 Occupation code from within this list: {list2}.
|
| 241 |
+
**Important:**
|
| 242 |
+
- Return ONLY the code, nothing else.
|
| 243 |
+
- The code should be exactly as shown in the list.
|
| 244 |
+
- Do not include any additional text or explanation.
|
| 245 |
+
"""
|
| 246 |
+
level2_code = gpt_call("Identify second-level occupational group", user_prompt2).strip()
|
| 247 |
+
level2_code = code_sanitize(level2_code, list2_output)
|
| 248 |
+
result.update(get_level_ESCO_info(level2_df, level2_code, 'Level_2'))
|
| 249 |
+
|
| 250 |
+
######################## Level 3 ###################
|
| 251 |
+
level3_code = None
|
| 252 |
+
if level2_code:
|
| 253 |
+
level3_df = esco_df[
|
| 254 |
+
(esco_df['code'].str.startswith(level2_code)) & (esco_df['code'].str.len() == len(level2_code) + 1)
|
| 255 |
+
]
|
| 256 |
+
if not level3_df.empty:
|
| 257 |
+
level3_options = "\n".join(f"- {row['code']}: {row['preferredLabel']} - {row['description']}"
|
| 258 |
+
for _, row in level3_df.iterrows())
|
| 259 |
+
list3_output = level3_df["code"].tolist() # Convert Series to list
|
| 260 |
+
list3 = ", ".join(map(str, list3_output)) # Join elements with comma
|
| 261 |
+
|
| 262 |
+
user_prompt3 = f"""
|
| 263 |
+
Here is a list of job responsibilities:
|
| 264 |
+
{responsibilities}
|
| 265 |
+
|
| 266 |
+
Here is a list of level 3 Occupation classifications within {level2_code}:
|
| 267 |
+
{level3_options}
|
| 268 |
+
|
| 269 |
+
Based on the responsibilities, suggest the most relevant level 3 Occupation code from within this list: {list3}.
|
| 270 |
+
|
| 271 |
+
**Important:**
|
| 272 |
+
- Return ONLY the code, nothing else.
|
| 273 |
+
- The code should be exactly as shown in the list.
|
| 274 |
+
- Do not include any additional text or explanation.
|
| 275 |
+
|
| 276 |
+
"""
|
| 277 |
+
level3_code = gpt_call("Identify third-level occupational group", user_prompt3).strip()
|
| 278 |
+
level3_code = code_sanitize(level3_code, list3_output)
|
| 279 |
+
result.update(get_level_ESCO_info(level3_df, level3_code, 'Level_3'))
|
| 280 |
+
|
| 281 |
+
######################## Level 4 ###################
|
| 282 |
+
level4_code = None
|
| 283 |
+
if level3_code:
|
| 284 |
+
level4_df = esco_df[
|
| 285 |
+
(esco_df['code'].str.startswith(level3_code)) & (esco_df['code'].str.len() == len(level3_code) + 1)
|
| 286 |
+
]
|
| 287 |
+
if not level4_df.empty:
|
| 288 |
+
level4_options = "\n".join(f"- {row['code']}: {row['preferredLabel']} - {row['description']}"
|
| 289 |
+
for _, row in level4_df.iterrows())
|
| 290 |
+
list4_output = level4_df["code"].tolist() # Convert Series to list
|
| 291 |
+
list4 = ", ".join(map(str, list4_output)) # Join elements with comma
|
| 292 |
+
user_prompt4 = f"""
|
| 293 |
+
Here is a list of job responsibilities:
|
| 294 |
+
{responsibilities}
|
| 295 |
+
|
| 296 |
+
Here is a list of level 4 Occupation classifications within {level3_code}:
|
| 297 |
+
{level4_options}
|
| 298 |
+
|
| 299 |
+
Based on the responsibilities, suggest the most relevant level 4 Occupation code from within this list: {list4}.
|
| 300 |
+
**Important:**
|
| 301 |
+
- Return ONLY the code, nothing else.
|
| 302 |
+
- The code should be exactly as shown in the list.
|
| 303 |
+
- Do not include any additional text or explanation.
|
| 304 |
+
"""
|
| 305 |
+
|
| 306 |
+
level4_code = gpt_call("Identify fourth-level occupational group", user_prompt4).strip()
|
| 307 |
+
level4_code = code_sanitize(level4_code, list4_output)
|
| 308 |
+
result.update(get_level_ESCO_info(level4_df, level4_code, 'Level_4'))
|
| 309 |
+
|
| 310 |
+
######################## Level 5 ###################
|
| 311 |
+
level5_code = None
|
| 312 |
+
if level4_code:
|
| 313 |
+
level5_df = esco_level5_df[
|
| 314 |
+
(esco_level5_df['iscoGroup'].str.startswith(level4_code))
|
| 315 |
+
]
|
| 316 |
+
if not level5_df.empty:
|
| 317 |
+
level5_options = "\n".join(f"- {row['code']}: {row['preferredLabel']} - {row['description']}"
|
| 318 |
+
for _, row in level5_df.iterrows())
|
| 319 |
+
|
| 320 |
+
list5_output = level5_df["code"].tolist() # Convert Series to list
|
| 321 |
+
list5 = ", ".join(map(str, list5_output)) # Join elements with comma
|
| 322 |
+
user_prompt5 = f"""
|
| 323 |
+
Here is a list of job responsibilities:
|
| 324 |
+
{responsibilities}
|
| 325 |
+
|
| 326 |
+
Here is a list of level 4 Occupation classifications within {level4_code}:
|
| 327 |
+
{level5_options}
|
| 328 |
+
|
| 329 |
+
Based on the responsibilities, suggest the most relevant level 4 Occupation code from within this list: {list5}.
|
| 330 |
+
**Important:**
|
| 331 |
+
- Return ONLY the code as stated in the provided list, nothing else.
|
| 332 |
+
- The code should be exactly as shown in the list.
|
| 333 |
+
- Do not include any additional text, occupation code or explanation.
|
| 334 |
+
"""
|
| 335 |
+
|
| 336 |
+
level5_code = gpt_call("Identify fifth-level occupational group", user_prompt5).strip()
|
| 337 |
+
# Handle the case where the LLM might return just the code part
|
| 338 |
+
level5_code = code_sanitize(level5_code, list5_output)
|
| 339 |
+
result.update(get_level_ESCO_info(level5_df, level5_code, 'Level_5'))
|
| 340 |
+
|
| 341 |
+
## Et voila!!
|
| 342 |
+
return result
|
| 343 |
+
|
| 344 |
+
|
| 345 |
+
|
| 346 |
+
def get_level_ESCO_info(df, code, level_name):
|
| 347 |
+
"""Helper function to get level info with error handling"""
|
| 348 |
+
matches = df[df['code'] == code]
|
| 349 |
+
if len(matches) == 0:
|
| 350 |
+
print(f"Warning: No {level_name} found for ESCO code {code}")
|
| 351 |
+
return {
|
| 352 |
+
f'{level_name}_ESCO_code': code,
|
| 353 |
+
f'{level_name}_ESCO_name': 'UNKNOWN',
|
| 354 |
+
f'{level_name}_ESCO_desc': 'No matching occupation found'
|
| 355 |
+
}
|
| 356 |
+
info = matches.iloc[0]
|
| 357 |
+
return {
|
| 358 |
+
f'{level_name}_ESCO_code': code,
|
| 359 |
+
f'{level_name}_ESCO_name': info['preferredLabel'],
|
| 360 |
+
f'{level_name}_ESCO_desc': info.get('description', '')
|
| 361 |
+
}
|
| 362 |
+
|
| 363 |
+
|
| 364 |
def get_skills_info_esco(Level_5_code):
|
| 365 |
|
| 366 |
try:
|