Spaces:
Running
Running
| import gradio as gr | |
| import pdfplumber | |
| import pandas as pd | |
| import re | |
| import warnings | |
| import logging | |
| import os | |
| from dotenv import load_dotenv | |
| import json | |
| from concurrent.futures import ThreadPoolExecutor | |
| from typing import List, Dict, Optional | |
| import traceback | |
| import time | |
| import openai | |
| # Debugging setup | |
| DEBUG = True | |
| debug_messages = [] | |
| def log_debug(message): | |
| """Log debug messages and keep last 20 entries""" | |
| if DEBUG: | |
| timestamp = time.strftime("%Y-%m-%d %H:%M:%S") | |
| full_message = f"[{timestamp}] {message}" | |
| debug_messages.append(full_message) | |
| print(full_message) # Print to console | |
| # Keep only the last 20 messages | |
| if len(debug_messages) > 20: | |
| debug_messages.pop(0) | |
| return "\n".join(debug_messages) | |
| return "" | |
| # Initialize debug logging | |
| log_debug("Application starting...") | |
| # Load environment variables | |
| load_dotenv() | |
| # Configure logging for pdfminer | |
| logging.getLogger('pdfminer').setLevel(logging.ERROR) | |
| # Suppress specific warnings | |
| warnings.filterwarnings("ignore", category=UserWarning, message="CropBox.*") | |
| # ================= DataFrame initializations ================= | |
| try: | |
| job_families_df = pd.read_csv("job_families1.csv", on_bad_lines='skip') | |
| log_debug(f"Reading {len(job_families_df)} job_families") | |
| except Exception as e: | |
| log_debug(f"Error reading job_families1.csv: {e}") | |
| job_families_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately | |
| try: | |
| occupational_groups_df = pd.read_csv("occupational_groups.csv", on_bad_lines='skip') | |
| log_debug(f"Reading {len(occupational_groups_df)} occupational_groups") | |
| except Exception as e: | |
| log_debug(f"Error reading occupational_groups.csv: {e}") | |
| occupational_groups_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately | |
| try: | |
| esco_df = pd.read_csv("ISCOGroups_en.csv", on_bad_lines='skip', dtype={'code': str} ) # Force 'code' to be read as string | |
| log_debug(f"Reading {len(esco_df)} esco groups") | |
| except Exception as e: | |
| log_debug(f"Error reading ISCOGroups_en.csv: {e}") | |
| esco_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately | |
| try: | |
| esco_level5_df = pd.read_csv("occupations_en.csv", on_bad_lines='skip', dtype={'code': str, 'iscoGroup': str, } ) # Force 'code' to be read as string | |
| log_debug(f"Reading {len(esco_level5_df)} esco_level5") | |
| except Exception as e: | |
| log_debug(f"Error reading occupations_en.csv: {e}") | |
| esco_level5_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately | |
| try: | |
| esco_skill_df = pd.read_csv("skills_en.csv", on_bad_lines='skip') | |
| log_debug(f"Reading {len(esco_skill_df)} esco_skill") | |
| except Exception as e: | |
| log_debug(f"Error reading skills_en.csv: {e}") | |
| esco_skill_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately | |
| try: | |
| esco_skill_map_df = pd.read_csv("occupationSkillRelations_en.csv", on_bad_lines='skip') | |
| log_debug(f"Reading {len(esco_skill_map_df)} esco_skill_map") | |
| except Exception as e: | |
| log_debug(f"Error reading occupationSkillRelations_en.csv: {e}") | |
| esco_skill_map_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately | |
| # ================= LLM API ================= | |
| def initialize_openai_client(): | |
| try: | |
| client = openai.AzureOpenAI( | |
| api_key=os.getenv("AZURE_OPENAI_API_KEY"), | |
| azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"), | |
| api_version=os.getenv("OPENAI_API_VERSION"), | |
| ) | |
| return client | |
| except Exception as e: | |
| raise Exception(f"Failed to initialize OpenAI client: {e}") | |
| client = initialize_openai_client() | |
| def gpt_call(system_prompt: str, user_prompt: str) -> str: | |
| try: | |
| response = client.chat.completions.create( | |
| model=os.getenv("AZURE_DEPLOYMENT_NAME"), | |
| messages=[ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": user_prompt} | |
| ], | |
| temperature=0.3 | |
| ) | |
| return response.choices[0].message.content.strip() | |
| except Exception as e: | |
| return f"ERROR: {e}" | |
| # ================= Extract text ================= | |
| def extract_text_from_pdf(pdf_path: str) -> str: | |
| text = "" | |
| with pdfplumber.open(pdf_path) as pdf: | |
| for page in pdf.pages: | |
| page_text = page.extract_text() | |
| if page_text: | |
| text += page_text + "\n" | |
| for table in page.extract_tables(): | |
| for row in table: | |
| for cell in row: | |
| if isinstance(cell, str): | |
| text += cell + " " | |
| text += "\n" | |
| return text | |
| # ================= AI Functions ================= | |
| def extract_section_from_pdf(full_text: str, section_title: str) -> str: | |
| user_prompt = f""" | |
| Carefully evaluate the provided position description (PD) document and extract the content of the section titled "{section_title}" from the following text. | |
| Return only the content of the section, without the title. | |
| If the section cannot be found or explicitly mentioned in the text, use "N/A" as the default value. | |
| Do not repeat in the extracted text the name of the section. | |
| Extract precisely all the related text. | |
| Text of the position description: | |
| {full_text} | |
| Section to identify: "{section_title}": | |
| """ | |
| return gpt_call("You are an HR expert working for IOM.", user_prompt) | |
| def classify_job_family(responsibilities: List[str]) -> str: | |
| job_family_list = "\n".join(f"- {row['Job_family']}: {row['Job_subfamily']}" for _, row in job_families_df.iterrows()) | |
| user_prompt = f""" | |
| Here is a list of job responsibilities: | |
| {responsibilities} | |
| Here is a list of Job families: | |
| {job_family_list} | |
| Based on the responsibilities, suggest the most relevant job family and subfamily from the list above. | |
| **Important:** | |
| - Return ONLY the job family, nothing else. | |
| - The job family should be exactly as shown in the list. | |
| - Do not include any additional text or explanation. | |
| """ | |
| return gpt_call("Suggest job family and subfamily based on responsibilities.", user_prompt) | |
| def get_level_CCOG_info(df, code, level_name): | |
| matches = df[df['code'] == code] | |
| if len(matches) == 0: | |
| log_debug(f"Warning: No {level_name} found for CCOG code {code}") | |
| return { | |
| f'{level_name}_CCOG_code': code, | |
| f'{level_name}_CCOG_name': 'UNKNOWN', | |
| f'{level_name}_CCOG_desc': 'No matching occupation found' | |
| } | |
| info = matches.iloc[0] | |
| return { | |
| f'{level_name}_CCOG_code': code, | |
| f'{level_name}_CCOG_name': info['occupation'], | |
| f'{level_name}_CCOG_desc': info.get('occupation_description', '') | |
| } | |
| def code_sanitize(input_string, valid_codes): | |
| for code in valid_codes: | |
| if code in input_string: | |
| return code | |
| return None | |
| def classify_occupational_group_by_level(responsibilities: List[str]) -> dict: | |
| result = {} | |
| try: | |
| for level in range(1, 5): | |
| level_df = occupational_groups_df[occupational_groups_df['level'] == f"Level {level}"] | |
| if level > 1: | |
| prev_level_code = result[f'Level_{level-1}_CCOG_code'] | |
| level_df = level_df[level_df['code'].str.startswith(prev_level_code)] | |
| job_occupation_list = "\n".join(f"- {row['code']}: {row['occupation']} - {row.get('occupation_description', '')}" for _, row in level_df.iterrows()) | |
| list_output = level_df["code"].tolist() | |
| user_prompt = f""" | |
| Here is a list of job responsibilities: | |
| {responsibilities} | |
| Here is a list of level {level} Occupation classifications: | |
| {job_occupation_list} | |
| Based on the responsibilities, suggest the most relevant level {level} Occupation code from within this list: {', '.join(map(str, list_output))}. | |
| **Important:** | |
| - Return ONLY the code, nothing else. | |
| - The code should be exactly as shown in the list. | |
| - Do not include any additional text or explanation. | |
| """ | |
| level_code = gpt_call(f"Identify level {level} occupational group", user_prompt).strip() | |
| level_code = code_sanitize(level_code, list_output) | |
| result.update(get_level_CCOG_info(level_df, level_code, f'Level_{level}')) | |
| except Exception as e: | |
| log_debug(f"Error during classification: {str(e)}") | |
| result['error'] = str(e) | |
| return result | |
| def classify_esco_by_hierarchical_level(responsibilities: List[str]) -> dict: | |
| """ | |
| Classifies job responsibilities into occupational groups at 4 levels, | |
| [European Skills, Competences, Qualifications, and Occupations (ESCO)](https://esco.ec.europa.eu/en) | |
| returning codes, names, and descriptions for each level. | |
| Args: | |
| responsibilities: List of job responsibility strings | |
| Returns: | |
| Dictionary containing classification information or error message | |
| """ | |
| result = {} | |
| ######################## Level 1 ################### | |
| # Get all top-level codes (single character/digit) | |
| top_level_codes = sorted({ | |
| code for code in esco_df['code'] | |
| if len(code) == 1 and code.isalnum() | |
| }) | |
| level1_code = None | |
| if top_level_codes: | |
| level1_df = esco_df[esco_df['code'].isin(top_level_codes)] | |
| job_occupation_list = "\n".join(f"- {row['code']}: {row['preferredLabel']} - {row['description']}" | |
| for _, row in level1_df.iterrows()) | |
| list1_output = level1_df["code"].tolist() # Convert Series to list | |
| list1 = ", ".join(map(str, list1_output)) # Join elements with comma | |
| user_prompt1 = f""" | |
| Here is a list of job responsibilities: | |
| {responsibilities} | |
| Select the most relevant top-level code from these options: | |
| {job_occupation_list} | |
| Based on the responsibilities, suggest the most relevant level 1 Occupation code from within this list: {list1}. | |
| **Important:** | |
| - Return ONLY the code, nothing else. | |
| - The code should be exactly as shown in the list. | |
| - Do not include any additional text or explanation. | |
| """ | |
| level1_code = gpt_call("Identify top-level occupational group", user_prompt1).strip() | |
| level1_code = code_sanitize(level1_code, list1_output) | |
| result.update(get_level_ESCO_info(level1_df, level1_code, 'Level_1')) | |
| ######################## Level 2 ################### | |
| level2_code = None | |
| if level1_code: | |
| level2_df = esco_df[ | |
| (esco_df['code'].str.startswith(level1_code)) & (esco_df['code'].str.len() == len(level1_code) + 1) | |
| ] | |
| if not level2_df.empty: | |
| level2_options = "\n".join(f"- {row['code']}: {row['preferredLabel']} - {row['description']}" | |
| for _, row in level2_df.iterrows()) | |
| list2_output = level2_df["code"].tolist() # Convert Series to list | |
| list2 = ", ".join(map(str, list2_output)) # Join elements with comma | |
| user_prompt2 = f""" | |
| Here is a list of job responsibilities: | |
| {responsibilities} | |
| Here is a list of level 2 Occupation classifications within {level1_code}: | |
| {level2_options} | |
| Based on the responsibilities, suggest the most relevant level 2 Occupation code from within this list: {list2}. | |
| **Important:** | |
| - Return ONLY the code, nothing else. | |
| - The code should be exactly as shown in the list. | |
| - Do not include any additional text or explanation. | |
| """ | |
| level2_code = gpt_call("Identify second-level occupational group", user_prompt2).strip() | |
| level2_code = code_sanitize(level2_code, list2_output) | |
| result.update(get_level_ESCO_info(level2_df, level2_code, 'Level_2')) | |
| ######################## Level 3 ################### | |
| level3_code = None | |
| if level2_code: | |
| level3_df = esco_df[ | |
| (esco_df['code'].str.startswith(level2_code)) & (esco_df['code'].str.len() == len(level2_code) + 1) | |
| ] | |
| if not level3_df.empty: | |
| level3_options = "\n".join(f"- {row['code']}: {row['preferredLabel']} - {row['description']}" | |
| for _, row in level3_df.iterrows()) | |
| list3_output = level3_df["code"].tolist() # Convert Series to list | |
| list3 = ", ".join(map(str, list3_output)) # Join elements with comma | |
| user_prompt3 = f""" | |
| Here is a list of job responsibilities: | |
| {responsibilities} | |
| Here is a list of level 3 Occupation classifications within {level2_code}: | |
| {level3_options} | |
| Based on the responsibilities, suggest the most relevant level 3 Occupation code from within this list: {list3}. | |
| **Important:** | |
| - Return ONLY the code, nothing else. | |
| - The code should be exactly as shown in the list. | |
| - Do not include any additional text or explanation. | |
| """ | |
| level3_code = gpt_call("Identify third-level occupational group", user_prompt3).strip() | |
| level3_code = code_sanitize(level3_code, list3_output) | |
| result.update(get_level_ESCO_info(level3_df, level3_code, 'Level_3')) | |
| ######################## Level 4 ################### | |
| level4_code = None | |
| if level3_code: | |
| level4_df = esco_df[ | |
| (esco_df['code'].str.startswith(level3_code)) & (esco_df['code'].str.len() == len(level3_code) + 1) | |
| ] | |
| if not level4_df.empty: | |
| level4_options = "\n".join(f"- {row['code']}: {row['preferredLabel']} - {row['description']}" | |
| for _, row in level4_df.iterrows()) | |
| list4_output = level4_df["code"].tolist() # Convert Series to list | |
| list4 = ", ".join(map(str, list4_output)) # Join elements with comma | |
| user_prompt4 = f""" | |
| Here is a list of job responsibilities: | |
| {responsibilities} | |
| Here is a list of level 4 Occupation classifications within {level3_code}: | |
| {level4_options} | |
| Based on the responsibilities, suggest the most relevant level 4 Occupation code from within this list: {list4}. | |
| **Important:** | |
| - Return ONLY the code, nothing else. | |
| - The code should be exactly as shown in the list. | |
| - Do not include any additional text or explanation. | |
| """ | |
| level4_code = gpt_call("Identify fourth-level occupational group", user_prompt4).strip() | |
| level4_code = code_sanitize(level4_code, list4_output) | |
| result.update(get_level_ESCO_info(level4_df, level4_code, 'Level_4')) | |
| ######################## Level 5 ################### | |
| level5_code = None | |
| if level4_code: | |
| level5_df = esco_level5_df[ | |
| (esco_level5_df['iscoGroup'].str.startswith(level4_code)) | |
| ] | |
| if not level5_df.empty: | |
| level5_options = "\n".join(f"- {row['code']}: {row['preferredLabel']} - {row['description']}" | |
| for _, row in level5_df.iterrows()) | |
| list5_output = level5_df["code"].tolist() # Convert Series to list | |
| list5 = ", ".join(map(str, list5_output)) # Join elements with comma | |
| user_prompt5 = f""" | |
| Here is a list of job responsibilities: | |
| {responsibilities} | |
| Here is a list of level 4 Occupation classifications within {level4_code}: | |
| {level5_options} | |
| Based on the responsibilities, suggest the most relevant level 4 Occupation code from within this list: {list5}. | |
| **Important:** | |
| - Return ONLY the code as stated in the provided list, nothing else. | |
| - The code should be exactly as shown in the list. | |
| - Do not include any additional text, occupation code or explanation. | |
| """ | |
| level5_code = gpt_call("Identify fifth-level occupational group", user_prompt5).strip() | |
| # Handle the case where the LLM might return just the code part | |
| level5_code = code_sanitize(level5_code, list5_output) | |
| result.update(get_level_ESCO_info(level5_df, level5_code, 'Level_5')) | |
| ## Et voila!! | |
| return result | |
| def get_level_ESCO_info(df, code, level_name): | |
| """Helper function to get level info with error handling""" | |
| matches = df[df['code'] == code] | |
| if len(matches) == 0: | |
| log_debug(f"Warning: No {level_name} found for ESCO code {code}") | |
| return { | |
| f'{level_name}_ESCO_code': code, | |
| f'{level_name}_ESCO_name': 'UNKNOWN', | |
| f'{level_name}_ESCO_desc': 'No matching occupation found' | |
| } | |
| info = matches.iloc[0] | |
| return { | |
| f'{level_name}_ESCO_code': code, | |
| f'{level_name}_ESCO_name': info['preferredLabel'], | |
| f'{level_name}_ESCO_desc': info.get('description', '') | |
| } | |
| def get_skills_info_esco(Level_5_code): | |
| matches = esco_level5_df[esco_level5_df['code'] == Level_5_code] | |
| conceptUris = matches['conceptUri'].values.tolist() | |
| skills = esco_skill_map_df[esco_skill_map_df['occupationUri'].isin(conceptUris)] | |
| skillUris = skills['skillUri'].values.tolist() | |
| thisskillslist = esco_skill_df[esco_skill_df['conceptUri'].isin(skillUris)] | |
| result = thisskillslist[['preferredLabel', 'conceptUri', 'description']].drop_duplicates() | |
| result = result.rename(columns={'preferredLabel': 'skill_name', 'description': 'skill_description', 'conceptUri': 'skill_code'}) | |
| return result | |
| def review_skills(Level_5_code: str, top_n: int = 10) -> List[Dict[str, str]]: | |
| matches = esco_level5_df[esco_level5_df['code'] == Level_5_code] | |
| esco_occup = matches['preferredLabel'].values.tolist() | |
| skill_filtered = get_skills_info_esco(Level_5_code) | |
| skill_filtered_options = "\n".join(f"- {row['skill_code']}: {row['skill_name']} - {row['skill_description']}" for _, row in skill_filtered.iterrows()) | |
| prompt = f""" | |
| Here is a list of skills: | |
| {skill_filtered_options} | |
| Filter the skills that are relevant in the context of the work of the International Organisation for Migration. | |
| Ensure that skills are relevant in the context of a {esco_occup} working for a non-profit public organization. | |
| Required JSON structure: | |
| {{ | |
| "skills": [ | |
| {{ | |
| "skill_name": "string", | |
| "skill_description": "string", | |
| "skill_code": "string" | |
| }} | |
| ] | |
| }} | |
| **Important:** | |
| - Do not duplicate any records of skills | |
| - Keep only the 10 most relevant skills | |
| - Return ONLY the JSON object with no other text | |
| - Use double quotes for all strings | |
| - No trailing commas in arrays/objects | |
| - No markdown formatting (no ```json) | |
| - No text before or after the JSON | |
| - Escape all special characters in strings | |
| - Ensure all brackets are properly closed | |
| - No trailing commas in arrays/objects, especially before closing brackets | |
| """ | |
| raw = gpt_call("You are an HR expert working for the International Organisation for Migration and with in-depth knowledge of the European Skills, Competences, Qualifications and Occupations. Extract skills required for this position.", prompt) | |
| json_text = _extract_json(raw) | |
| if not json_text: | |
| return [] | |
| try: | |
| result = json.loads(json_text) | |
| skills = result.get("skills", []) | |
| except json.JSONDecodeError as e: | |
| log_debug(f"❌ JSON Skills parsing error: {e}") | |
| log_debug(f"🔍 Problematic JSON Skills: {json_text}") | |
| return [] | |
| validated_skills = [] | |
| for skill in skills: | |
| try: | |
| validated = { | |
| "skill_name": str(skill["skill_name"]).strip(), | |
| "skill_description": str(skill["skill_description"]).strip(), | |
| "skill_code": str(skill["skill_code"]).strip() | |
| } | |
| validated_skills.append(validated) | |
| except (KeyError, TypeError) as e: | |
| log_debug(f"⚠️ Skipping invalid skill: {skill}. Error: {e}") | |
| continue | |
| return validated_skills[:top_n] | |
| def extract_skills(responsibilities: List[str], top_n: int = 10) -> List[Dict[str, str]]: | |
| prompt = f""" | |
| Here is a list of job responsibilities: | |
| {responsibilities} | |
| List the required skills and knowledge as bullet points (without numbers) using ESCO-style terms. | |
| For each Skill: | |
| 1. skill_name: precise skills name as used in ESCO framework | |
| 2. skill_description: add the long description as mentioned in ESCO framework | |
| 3. skill_code: include the detailed corresponding ESCO code for that skill. | |
| Required JSON structure: | |
| {{ | |
| "skills": [ | |
| {{ | |
| "skill_name": "string", | |
| "skill_description": "string", | |
| "skill_code": "string" | |
| }} | |
| ] | |
| }} | |
| **Important:** | |
| - Return ONLY the JSON object with no other text | |
| - Use double quotes for all strings | |
| - No trailing commas in arrays/objects | |
| - No markdown formatting (no ```json) | |
| - No text before or after the JSON | |
| - Escape all special characters in strings | |
| - Ensure all brackets are properly closed | |
| """ | |
| raw = gpt_call("You are an HR expert working for the International Organisation for Migration and with in-depth knowledge of the European Skills, Competences, Qualifications and Occupations. Extract skills required for this position.", prompt) | |
| json_text = _extract_json(raw) | |
| if not json_text: | |
| return [] | |
| try: | |
| result = json.loads(json_text) | |
| skills = result.get("skills", []) | |
| except json.JSONDecodeError as e: | |
| log_debug(f"❌ JSON Skills extrac parsing error: {e}") | |
| log_debug(f"🔍 Problematic JSON Skills extract: {json_text}") | |
| return [] | |
| validated_skills = [] | |
| for skill in skills: | |
| try: | |
| validated = { | |
| "skill_name": str(skill["skill_name"]).strip(), | |
| "skill_description": str(skill["skill_description"]).strip(), | |
| "skill_code": str(skill["skill_code"]).strip() | |
| } | |
| validated_skills.append(validated) | |
| except (KeyError, TypeError) as e: | |
| log_debug(f"⚠️ Skipping invalid skill extract: {skill}. Error: {e}") | |
| continue | |
| return validated_skills[:top_n] | |
| def map_proficiency_and_assessment(skills: List[str], responsibilities: List[str]) -> List[Dict]: | |
| prompt = f""" | |
| Here is a list of job responsibilities: {responsibilities} that have been associated with the following skills: {skills} | |
| For each skill, accounting for the context defined within the responsibilities, return a JSON object with: | |
| - skill_name: the name of the skill | |
| - importance: essential or optional | |
| - type: "skill/competence" or "knowledge" | |
| - proficiency_level: Basic, Intermediate, or Advanced | |
| - distinctive_elements: what specific and distinctive elements are required at this defined proficiency level? | |
| - resume_signals: what to look for in a resume to assess this skill? | |
| - assessment_method: what is the preferred assessment method to accurately assess this skill? | |
| Respond ONLY with a list of dictionaries in valid JSON. | |
| Use double quotes for all strings. No markdown, no commentary, no trailing commas. | |
| """ | |
| raw = gpt_call("Define proficiency level and assessment for each skill.", prompt) | |
| json_text = _extract_json_array(raw) | |
| if not json_text: | |
| return [] | |
| try: | |
| results = json.loads(json_text) | |
| except json.JSONDecodeError as e: | |
| log_debug(f"❌ JSON proficiency parsing error: {e}") | |
| log_debug(f"🔍 Problematic JSON proficiency: {json_text}") | |
| return [] | |
| validated = [] | |
| for item in results: | |
| try: | |
| validated.append({ | |
| "skill_name": str(item["skill_name"]).strip(), | |
| "importance": item["importance"].strip().lower(), | |
| "type": item["type"].strip().lower(), | |
| "proficiency_level": item["proficiency_level"].strip().capitalize(), | |
| "distinctive_elements": item["distinctive_elements"].strip(), | |
| "resume_signals": item["resume_signals"].strip(), | |
| "assessment_method": item["assessment_method"].strip() | |
| }) | |
| except (KeyError, TypeError) as e: | |
| log_debug(f"⚠️ Skipping invalid profiency item: {item}. Error: {e}") | |
| continue | |
| return validated | |
| def _extract_json_array(raw: str) -> str: | |
| json_start = raw.find('[') | |
| json_end = raw.rfind(']') + 1 | |
| if json_start == -1 or json_end == 0: | |
| log_debug(f"❌ No JSON array found in response: {raw}") | |
| return "" | |
| json_text = raw[json_start:json_end] | |
| json_text = re.sub(r',\s*([}\]])', r'\1', json_text) | |
| json_text = re.sub(r'[\n\r\t]', ' ', json_text) | |
| json_text = re.sub(r'(?<!\\)"', '"', json_text) | |
| return json_text | |
| def extract_qualification(responsibilities: List[str]) -> List[str]: | |
| prompt = f""" | |
| Here is a list of job responsibilities: {responsibilities} | |
| Infer the required level within the European Qualifications Framework (EQF) to implement them. | |
| Identify the potential diplomas to testify such qualification | |
| """ | |
| raw = gpt_call("You are an HR expert that excel in developing competency-based interview questions.", prompt) | |
| return [line.strip("-• ").strip() for line in raw.splitlines() if line.strip()] | |
| def build_interview(responsibilities: List[str], skill_assess: List[str]) -> List[str]: | |
| prompt = f""" | |
| Here is a list of job responsibilities: {responsibilities} and related skills: {skill_assess} | |
| Output: A structured 40-minute interview with: | |
| Opening questions (5 min) | |
| Core competency-based questions (30 min, 5-6 questions) | |
| Closing & candidate questions (5 min) | |
| """ | |
| raw = gpt_call("You are an HR expert that excel in developing competency-based interview questions.", prompt) | |
| return [line.strip("-• ").strip() for line in raw.splitlines() if line.strip()] | |
| def _extract_json(raw: str) -> str: | |
| json_start = raw.find('{') | |
| json_end = raw.rfind('}') + 1 | |
| if json_start == -1 or json_end == 0: | |
| log_debug(f"❌ No JSON found in response: {raw}") | |
| return "" | |
| json_text = raw[json_start:json_end] | |
| json_text = re.sub(r',\s*([}\]])', r'\1', json_text) | |
| json_text = re.sub(r'[\n\r\t]', ' ', json_text) | |
| json_text = re.sub(r'\s{2,}', ' ', json_text) | |
| json_text = re.sub(r'\\(?!["\\/bfnrtu])', r'\\\\', json_text) | |
| json_text = json_text.strip() | |
| return json_text | |
| # ================= Format Skills Visualisation ================= | |
| def format_skill_cards(skills_data): | |
| if not skills_data: | |
| return "No skills data available" | |
| cards = [] | |
| for skill in skills_data: | |
| card = f""" | |
| <div class='skill-card'> | |
| <div class='skill-header'> | |
| <h3>{skill.get('skill_name', 'Unnamed Skill')}</h3> | |
| <div class='skill-pill {skill.get("type", "").lower()}'>{skill.get("type", "").capitalize()}</div> | |
| <div class='skill-pill {skill.get("importance", "").lower()}'>{skill.get("importance", "").capitalize()}</div> | |
| </div> | |
| <div class='skill-body'> | |
| <p><strong>Description:</strong> {skill.get('skill_description', '')}</p> | |
| <div class='skill-meta'> | |
| <span class='proficiency'> | |
| <strong>Level:</strong> | |
| <progress value={get_progress_value(skill.get("proficiency_level"))} max="3"></progress> | |
| {skill.get("proficiency_level", "").capitalize()} | |
| </span> | |
| <span class='assessment'> | |
| <strong>Assessment:</strong> {skill.get("assessment_method", "")} | |
| </span> | |
| </div> | |
| </div> | |
| </div> | |
| """ | |
| cards.append(card) | |
| return f"<div class='skills-container'>{''.join(cards)}</div>" | |
| def get_progress_value(level): | |
| level_map = {"basic": 1, "intermediate": 2, "advanced": 3} | |
| return str(level_map.get(level.lower(), 1)) | |
| # ================= Process Analysis ================= | |
| from concurrent.futures import ThreadPoolExecutor | |
| def process_pdf(file): | |
| if file is None: | |
| return ( | |
| "Please upload a PDF file.", | |
| "", | |
| "", | |
| "", | |
| {}, | |
| "", | |
| [], | |
| {}, | |
| {}, | |
| "No file uploaded." | |
| ) | |
| try: | |
| extracted_text = extract_text_from_pdf(file.name) | |
| responsibilities = extract_section_from_pdf(extracted_text, section_title="Responsibilities and Accountabilities") | |
| if not responsibilities: | |
| log_debug(f"Skipping {os.path.basename(file.name)} - no responsibilities section found") | |
| return ( | |
| os.path.basename(file.name), | |
| "", | |
| "", | |
| "", | |
| {}, | |
| "", | |
| [], | |
| {}, | |
| {}, | |
| "No responsibilities section found." | |
| ) | |
| # Use ThreadPoolExecutor to parallelize independent tasks | |
| with ThreadPoolExecutor() as executor: | |
| # Submit tasks to the executor | |
| job_family_future = executor.submit(classify_job_family, responsibilities) | |
| occ_group_future = executor.submit(classify_occupational_group_by_level, responsibilities) | |
| esco_occ_future = executor.submit(classify_esco_by_hierarchical_level, responsibilities) | |
| qualification_future = executor.submit(extract_qualification, responsibilities) | |
| skills_future = executor.submit(extract_skills, responsibilities) | |
| # Retrieve results from futures | |
| job_family = job_family_future.result() | |
| occ_group = occ_group_future.result() | |
| esco_occ = esco_occ_future.result() | |
| qualification = qualification_future.result() | |
| skills = skills_future.result() | |
| log_debug(f"Identified {job_family}") | |
| skill_map = map_proficiency_and_assessment(skills, responsibilities) | |
| has_esco = esco_occ.get("Level_5_ESCO_code") is not None | |
| skill_esco_extract = [] | |
| skill_esco_map = [] | |
| if has_esco: | |
| Level_5_code = esco_occ["Level_5_ESCO_code"] | |
| skill_esco_extract = review_skills(Level_5_code) | |
| skill_esco_map = map_proficiency_and_assessment(skill_esco_extract, responsibilities) | |
| else: | |
| log_debug(f"No Level 5 ESCO code found for {os.path.basename(file.name)}, skipping ESCO skills mapping") | |
| time.sleep(6) | |
| assessment_lookup = {item['skill_name']: item for item in skill_map} | |
| joined_skills = [ | |
| { | |
| "skill_name": skill["skill_name"], | |
| "skill_description": skill["skill_description"], | |
| "skill_code": skill["skill_code"], | |
| "importance": assessment_lookup.get(skill["skill_name"], {}).get("importance"), | |
| "type": assessment_lookup.get(skill["skill_name"], {}).get("type"), | |
| "proficiency_level": assessment_lookup.get(skill["skill_name"], {}).get("proficiency_level"), | |
| "distinctive_elements": assessment_lookup.get(skill["skill_name"], {}).get("distinctive_elements"), | |
| "resume_signals": assessment_lookup.get(skill["skill_name"], {}).get("resume_signals"), | |
| "assessment_method": assessment_lookup.get(skill["skill_name"], {}).get("assessment_method") | |
| } | |
| for skill in skills | |
| ] | |
| # Format skills before returning | |
| formatted_skills = format_skill_cards(joined_skills) | |
| joined_skills_esco = [] | |
| if has_esco and skill_esco_extract: | |
| assessment_esco_lookup = {item['skill_name']: item for item in skill_esco_map} | |
| joined_skills_esco = [ | |
| { | |
| "skill_name": skill["skill_name"], | |
| "skill_description": skill["skill_description"], | |
| "skill_code": skill["skill_code"], | |
| **assessment_esco_lookup.get(skill["skill_name"], {}) | |
| } | |
| for skill in skill_esco_extract | |
| ] | |
| interview = build_interview(responsibilities, skills) | |
| # Prepare the results for each output component | |
| ccoq_levels = {f"Level_{i}_CCOG_{field}": occ_group.get(f"Level_{i}_CCOG_{field}") | |
| for i in range(1, 5) for field in ["code", "name", "desc"]} | |
| if has_esco: | |
| esco_levels = {f"Level_{i}_ESCO_{field}": esco_occ.get(f"Level_{i}_ESCO_{field}") | |
| for i in range(1, 6) for field in ["code", "name", "desc"]} | |
| esco_skills = { | |
| "file": os.path.basename(file.name), | |
| "classified_job_family": job_family, | |
| "skills": joined_skills_esco | |
| } | |
| else: | |
| esco_levels = {f"Level_{i}_ESCO_{field}": None | |
| for i in range(1, 6) for field in ["code", "name", "desc"]} | |
| esco_skills = None | |
| debug_message = "Processing completed successfully." | |
| return ( | |
| os.path.basename(file.name), | |
| responsibilities, | |
| job_family, | |
| "\n".join(qualification), | |
| ccoq_levels, | |
| "\n".join(interview), | |
| joined_skills, | |
| esco_levels, | |
| esco_skills, | |
| debug_message if DEBUG else None | |
| ) | |
| except Exception as e: | |
| error_message = f"Error processing PDF: {str(e)}" | |
| return ( | |
| error_message, | |
| "", | |
| "", | |
| "", | |
| {}, | |
| "", | |
| [], | |
| {}, | |
| {}, | |
| error_message | |
| ) | |
| # ================= Build Word Report ================= | |
| from docx import Document | |
| import os | |
| import re | |
| import time | |
| import tempfile | |
| from typing import Dict, List, Union | |
| def generate_word_document( | |
| file_name: str, | |
| responsibilities: str, | |
| job_family: str, | |
| qualification: str, | |
| ccoq_levels: Dict, | |
| interview: str, | |
| skills: List[Dict], | |
| esco_levels: Dict, | |
| esco_skills: Dict | |
| ) -> str: | |
| """ | |
| Generate a comprehensive Word document from analysis results with multiple fallback mechanisms. | |
| Args: | |
| file_name: Original PDF filename | |
| responsibilities: Extracted responsibilities text | |
| job_family: Identified job family | |
| qualification: Required qualifications | |
| ccoq_levels: CCOG classification levels | |
| interview: Generated interview questions | |
| skills: List of required skills | |
| esco_levels: ESCO classification levels | |
| esco_skills: ESCO mapped skills | |
| Returns: | |
| Path to the generated Word document | |
| """ | |
| # Initialize document with metadata | |
| doc = Document() | |
| doc.core_properties.author = "IOM Talent Management System" | |
| doc.core_properties.title = "Position Description Analysis Report" | |
| # Default values for all fields | |
| default_values = { | |
| "file": "Unknown file", | |
| "responsibilities": "No responsibilities extracted", | |
| "classified_job_family": "No job family identified", | |
| "qualification": ["No qualification information available"], | |
| "interview": ["No interview questions generated"], | |
| "skills": {"skills": [{"skill_name": "No skills identified", "description": "", "code": ""}]}, | |
| "skills_esco": {"skills": [{"skill_name": "No ESCO skills identified", "description": "", "code": ""}]} | |
| } | |
| # Safely build the result dictionary with fallbacks | |
| try: | |
| result = { | |
| "file": file_name if file_name and isinstance(file_name, str) else default_values["file"], | |
| "responsibilities": responsibilities if responsibilities else default_values["responsibilities"], | |
| "classified_job_family": job_family if job_family else default_values["classified_job_family"], | |
| "qualification": qualification.split('\n') if qualification else default_values["qualification"], | |
| "interview": interview.split('\n') if interview else default_values["interview"], | |
| "skills": {"skills": skills} if skills and isinstance(skills, list) else default_values["skills"], | |
| "skills_esco": esco_skills if esco_skills and isinstance(esco_skills, dict) else default_values["skills_esco"] | |
| } | |
| # Add level information with validation | |
| if ccoq_levels and isinstance(ccoq_levels, dict): | |
| result.update({k: v for k, v in ccoq_levels.items() if v is not None}) | |
| if esco_levels and isinstance(esco_levels, dict): | |
| result.update({k: v for k, v in esco_levels.items() if v is not None}) | |
| except Exception as e: | |
| log_debug(f"Error building result dictionary: {str(e)}") | |
| result = default_values | |
| # ================= DOCUMENT CONTENT GENERATION ================= | |
| try: | |
| # Document header | |
| doc.add_heading('Job Description Analysis Report', level=0) | |
| doc.add_paragraph(f"Generated on {time.strftime('%Y-%m-%d %H:%M:%S')}") | |
| doc.add_paragraph("International Organization for Migration", style="Intense Quote") | |
| # Metadata table | |
| table = doc.add_table(rows=1, cols=2) | |
| table.style = 'Light Shading Accent 1' | |
| hdr_cells = table.rows[0].cells | |
| hdr_cells[0].text = 'Field' | |
| hdr_cells[1].text = 'Value' | |
| def _add_table_row(table, field, value): | |
| row = table.add_row().cells | |
| row[0].text = field | |
| row[1].text = str(value or "Not available") | |
| _add_table_row(table, "File Name", result["file"]) | |
| _add_table_row(table, "Job Family", result["classified_job_family"]) | |
| # Section generator with error handling | |
| def _add_section(heading, content, level=2): | |
| doc.add_heading(heading, level=level) | |
| if not content: | |
| doc.add_paragraph("No information available", style='Subtle Emphasis') | |
| return | |
| if isinstance(content, (list, tuple)): | |
| for item in content: | |
| if item and str(item).strip(): | |
| doc.add_paragraph(str(item).strip(), style='List Bullet' if level > 2 else None) | |
| elif isinstance(content, dict): | |
| for k, v in content.items(): | |
| if v is not None: | |
| doc.add_paragraph(f"{k}: {v}") | |
| elif isinstance(content, str): | |
| doc.add_paragraph(content) | |
| # Core sections | |
| _add_section("1. Responsibilities", result["responsibilities"]) | |
| _add_section("2. Qualifications", result["qualification"]) | |
| # Skills sections with robust handling | |
| def _add_skills_section(heading, skills_data): | |
| doc.add_heading(heading, level=2) | |
| if not skills_data or not skills_data.get("skills"): | |
| doc.add_paragraph("No skills information available", style='Subtle Emphasis') | |
| return | |
| try: | |
| skills_table = doc.add_table(rows=1, cols=4) | |
| skills_table.style = 'Medium List 2 Accent 1' | |
| hdr = skills_table.rows[0].cells | |
| hdr[0].text = 'Skill' | |
| hdr[1].text = 'Description' | |
| hdr[2].text = 'Proficiency' | |
| hdr[3].text = 'Assessment' | |
| for skill in skills_data["skills"]: | |
| if not isinstance(skill, dict): | |
| continue | |
| row = skills_table.add_row().cells | |
| row[0].text = str(skill.get("skill_name", "Unnamed skill")) | |
| row[1].text = str(skill.get("skill_description", ""))[:100] + ("..." if len(str(skill.get("skill_description", ""))) > 100 else "") | |
| row[2].text = str(skill.get("proficiency_level", "Not specified")) | |
| row[3].text = str(skill.get("assessment_method", "Not specified")) | |
| except Exception as e: | |
| doc.add_paragraph(f"Could not display skills table: {str(e)}", style='Subtle Emphasis') | |
| _add_skills_section("3. Required Skills", result["skills"]) | |
| _add_skills_section("4. ESCO Mapped Skills", result["skills_esco"]) | |
| # Classification sections | |
| def _add_classification_section(heading, prefix, levels=4): | |
| doc.add_heading(heading, level=2) | |
| found = False | |
| for i in range(1, levels+1): | |
| code = result.get(f"{prefix}_{i}_code") | |
| name = result.get(f"{prefix}_{i}_name") | |
| desc = result.get(f"{prefix}_{i}_desc") | |
| if any([code, name, desc]): | |
| found = True | |
| doc.add_heading(f"Level {i}", level=3) | |
| if code: | |
| doc.add_paragraph(f"Code: {code}") | |
| if name: | |
| doc.add_paragraph(f"Name: {name}") | |
| if desc: | |
| doc.add_paragraph(f"Description: {desc}") | |
| if not found: | |
| doc.add_paragraph("No classification information available", style='Subtle Emphasis') | |
| _add_classification_section("5. CCOG Classification", "Level_CCOG") | |
| _add_classification_section("6. ESCO Classification", "Level_ESCO", levels=5) | |
| # Interview questions | |
| doc.add_heading("7. Suggested Interview Questions", level=2) | |
| if result["interview"] and any(q.strip() for q in result["interview"]): | |
| for i, question in enumerate(result["interview"], 1): | |
| if question.strip(): | |
| doc.add_paragraph(f"{i}. {question}", style='List Number') | |
| else: | |
| doc.add_paragraph("No interview questions generated", style='Subtle Emphasis') | |
| # Footer | |
| doc.add_paragraph() | |
| doc.add_paragraph("Generated by IOM Talent Management AI Tool", style='Footer') | |
| except Exception as e: | |
| log_debug(f"Error generating document content: {str(e)}") | |
| # Fallback to simple error document | |
| doc = Document() | |
| doc.add_heading("Partial Report Generated", level=1) | |
| doc.add_paragraph(f"Some sections could not be generated due to: {str(e)}") | |
| # ================= FILE SAVING WITH MULTIPLE FALLBACKS ================= | |
| try: | |
| # Generate appropriate filename | |
| if file_name and isinstance(file_name, str): | |
| base_name = os.path.splitext(os.path.basename(file_name))[0] | |
| clean_name = re.sub(r'[^\w\-]', '_', base_name)[:50] # Sanitize and truncate | |
| output_filename = f"{clean_name}_analysis_{time.strftime('%Y%m%d')}.docx" | |
| else: | |
| output_filename = f"job_analysis_{time.strftime('%Y%m%d_%H%M%S')}.docx" | |
| # Try saving to reports directory first | |
| output_dir = "generated_reports" | |
| try: | |
| os.makedirs(output_dir, exist_ok=True) | |
| output_path = os.path.join(output_dir, output_filename) | |
| doc.save(output_path) | |
| return output_path | |
| except PermissionError: | |
| # Fallback to system temp directory | |
| temp_dir = tempfile.gettempdir() | |
| temp_path = os.path.join(temp_dir, output_filename) | |
| doc.save(temp_path) | |
| return temp_path | |
| except Exception as e: | |
| # Ultimate fallback with error document | |
| error_doc = Document() | |
| error_doc.add_heading("Error Generating Report", level=1) | |
| error_doc.add_paragraph(f"Could not save report due to: {str(e)}") | |
| fallback_path = os.path.join(tempfile.gettempdir(), f"error_report_{time.strftime('%Y%m%d_%H%M%S')}.docx") | |
| error_doc.save(fallback_path) | |
| return fallback_path | |
| # ================= GRADIO INTERFACE ================= | |
| with gr.Blocks( | |
| title="AI-powered tool to review Job Position Description", | |
| css=""" | |
| @import url('https://fonts.googleapis.com/css2?family=Lato:wght@400;700&display=swap'); | |
| @import url('https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css'); | |
| /* Completely disable Gradio's dark theme */ | |
| .gradio-container.dark { | |
| --body-background-fill: white !important; | |
| --background-fill-primary: white !important; | |
| --background-fill-secondary: #f8f9fa !important; | |
| --block-background-fill: white !important; | |
| --input-background-fill: white !important; | |
| --block-label-text-color: #212529 !important; | |
| --body-text-color: #212529 !important; | |
| --block-title-text-color: var(--primary-color) !important; | |
| --border-color-primary: #dee2e6 !important; | |
| } | |
| .gradio-container.dark .gr-markdown, | |
| .gradio-container.dark .gr-textbox, | |
| .gradio-container.dark .gr-dropdown, | |
| .gradio-container.dark .output-section { | |
| background: white !important; | |
| color: #212529 !important; | |
| border-color: #dee2e6 !important; | |
| } | |
| /* Set the size of the SVG icon for file download */ | |
| .feather-file { | |
| width: 20px !important; /* Adjust the size as needed */ | |
| height: 20px !important; /* Adjust the size as needed */ | |
| } | |
| /* Base Styles */ | |
| :root { | |
| --primary-color: #0033A0; | |
| --secondary-color: #e67e22; | |
| --accent-color: #f59e0b; | |
| --dark-color: #34495e; | |
| --light-color: #ecf0f1; | |
| --success-color: #27ae60; | |
| --warning-color: #f39c12; | |
| --danger-color: #e74c3c; | |
| --text-color: #333; | |
| --text-light: #7f8c8d; | |
| --border-radius: 8px; | |
| --box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); | |
| --transition: all 0.3s ease; | |
| } | |
| /* Header Styles */ | |
| .header { | |
| text-align: center; | |
| margin-bottom: 2rem; | |
| padding: 1rem; | |
| } | |
| .header h1 { | |
| margin: 0; | |
| font-family: 'Lato', sans-serif; | |
| font-size: 2.5rem; | |
| font-weight: 600; | |
| color: var(--primary-color); | |
| } | |
| .header p { | |
| margin: 0.5rem 0 0; | |
| font-family: 'Lato', sans-serif; | |
| opacity: 0.9; | |
| font-size: 1.5rem; | |
| color: #4b5563; | |
| } | |
| /* Section Titles */ | |
| .section-title { | |
| display: flex; | |
| align-items: left; | |
| font-family: 'Lato', sans-serif; | |
| gap: 0.5rem; | |
| color: var(--primary-color); | |
| margin: 1rem 0; | |
| font-size: 1.25rem; | |
| font-weight: 600; | |
| } | |
| .section-title i { | |
| font-size: 1.1em; | |
| color: var(--accent-color); | |
| } | |
| /* Input Section */ | |
| .input-section { | |
| background: white; | |
| padding: 0.75rem 0.5rem; | |
| border: 1px solid #d1d5db; | |
| border-radius: var(--border-radius); | |
| box-shadow: var(--box-shadow); | |
| margin-right: 1rem; | |
| } | |
| /* Output Section */ | |
| .output-section { | |
| background: white; | |
| padding: 1.5rem; | |
| border-radius: var(--border-radius); | |
| box-shadow: var(--box-shadow); | |
| } | |
| /* Form Elements */ | |
| .gr-textbox, .gr-dropdown { | |
| border: 1px solid #ddd; | |
| border-radius: var(--border-radius) !important; | |
| padding: 0.75rem 1rem !important; | |
| transition: var(--transition); | |
| } | |
| .gr-textbox:focus, .gr-dropdown:focus { | |
| border-color: var(--primary-color) !important; | |
| box-shadow: 0 0 0 2px rgba(44, 110, 203, 0.2) !important; | |
| outline: none !important; | |
| } | |
| .gr-textbox::placeholder { | |
| color: var(--text-light) !important; | |
| opacity: 0.7 !important; | |
| } | |
| label { | |
| font-weight: 500 !important; | |
| color: var(--dark-color) !important; | |
| margin-bottom: 0.5rem !important; | |
| display: block !important; | |
| } | |
| /* Buttons */ | |
| .btn-primary { | |
| background: var(--primary-color) !important; | |
| color: white !important; | |
| border: none !important; | |
| border-radius: var(--border-radius) !important; | |
| padding: 0.75rem 1.5rem !important; | |
| font-weight: 500 !important; | |
| transition: var(--transition) !important; | |
| text-transform: uppercase !important; | |
| letter-spacing: 0.5px !important; | |
| } | |
| .btn-primary:hover { | |
| background: #002080 !important; | |
| transform: translateY(-2px) !important; | |
| box-shadow: 0 4px 8px rgba(0, 0, 0, 0.15) !important; | |
| } | |
| .btn-primary:active { | |
| transform: translateY(0) !important; | |
| } | |
| /* Intro */ | |
| .intro-box { | |
| background: #f0f7ff; | |
| border-left: 4px solid #0033A0; | |
| border-radius: 4px; | |
| padding: 16px; | |
| margin-bottom: 20px; | |
| } | |
| .intro-title { | |
| color: #0033A0; | |
| font-weight: 600; | |
| margin-top: 0 !important; | |
| } | |
| .intro-icon { | |
| color: #0033A0; | |
| margin-right: 8px; | |
| } | |
| .benefits-grid { | |
| display: grid; | |
| grid-template-columns: repeat(auto-fit, minmax(250px, 1fr)); | |
| gap: 12px; | |
| margin: 16px 0; | |
| } | |
| .benefit-card { | |
| background: white; | |
| padding: 12px; | |
| border-radius: 8px; | |
| box-shadow: 0 2px 4px rgba(0,0,0,0.05); | |
| } | |
| /* Skills Card */ | |
| .skills-container { | |
| display: grid; | |
| grid-template-columns: repeat(auto-fill, minmax(350px, 1fr)); | |
| gap: 1rem; | |
| padding: 1rem; | |
| } | |
| .skill-card { | |
| background: white; | |
| border-radius: 8px; | |
| box-shadow: 0 2px 8px rgba(0,0,0,0.1); | |
| overflow: hidden; | |
| transition: transform 0.2s; | |
| } | |
| .skill-card:hover { | |
| transform: translateY(-3px); | |
| box-shadow: 0 4px 12px rgba(0,0,0,0.15); | |
| } | |
| .skill-header { | |
| background: #0033A0; | |
| color: white; | |
| padding: 1rem; | |
| display: flex; | |
| flex-wrap: wrap; | |
| gap: 0.5rem; | |
| align-items: center; | |
| } | |
| .skill-header h3 { | |
| margin: 0; | |
| flex-grow: 1; | |
| font-size: 1.1rem; | |
| } | |
| .skill-pill { | |
| padding: 0.25rem 0.5rem; | |
| border-radius: 999px; | |
| font-size: 0.8rem; | |
| font-weight: bold; | |
| } | |
| .skill-pill.skill { background: #4CAF50; } | |
| .skill-pill.knowledge { background: #2196F3; } | |
| .skill-pill.essential { background: #F44336; } | |
| .skill-pill.optional { background: #FF9800; } | |
| .skill-body { | |
| padding: 1rem; | |
| } | |
| .skill-meta { | |
| margin-top: 1rem; | |
| padding-top: 1rem; | |
| border-top: 1px solid #eee; | |
| display: flex; | |
| flex-direction: column; | |
| gap: 0.5rem; | |
| } | |
| progress { | |
| width: 100%; | |
| height: 6px; | |
| margin-top: 0.25rem; | |
| } | |
| /* Output Markdown */ | |
| .gr-markdown { | |
| background: #f9f9f9; | |
| padding: 1.5rem; | |
| border-radius: var(--border-radius); | |
| border-left: 4px solid var(--primary-color); | |
| } | |
| /* Debug Console */ | |
| .gr-textbox[label="⚠️ Console Log"] { | |
| font-family: monospace !important; | |
| background: #2c3e50 !important; | |
| color: #ecf0f1 !important; | |
| border-radius: var(--border-radius) !important; | |
| padding: 1rem !important; | |
| } | |
| /* Responsive Layout */ | |
| @media (max-width: 768px) { | |
| .gr-row { | |
| flex-direction: column !important; | |
| } | |
| .input-section { | |
| margin-right: 0 !important; | |
| margin-bottom: 1rem !important; | |
| } | |
| } | |
| """, | |
| head=''' | |
| <meta name="description" content="AI-powered tool to review Job Position Description."> | |
| <meta name="keywords" content="HR, Position, Job, Skills, Qualification, Interview"> | |
| <meta name="author" content="Edouard Legoupil | IOM Chief Data Officer"> | |
| <link rel="author" href="https://edouard-legoupil.github.io/"> | |
| <meta property="og:title" content="AI-powered tool to review Job Position Description"> | |
| <meta property="og:description" content="AI-powered tool to review Job Position Description"> | |
| <meta property="og:type" content="website"> | |
| <link rel="icon" href="https://www.iom.int/themes/custom/phoenix/favicon.ico" type="image/vnd.microsoft.icon"> | |
| <link rel="apple-touch-icon" href="https://www.iom.int/sites/g/files/tmzbdl486/files/favicon.ico"> | |
| ''' | |
| ) as demo: | |
| # Header section | |
| with gr.Column(): | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.HTML(""" | |
| <div class="header"> | |
| <h1>Position Description Review (Demo)</h1> | |
| <p>Use AI to standardise an initial draft position description and identify related Job Family, Occupation, Qualification, match Skills and suggest interview questions.</p> | |
| </div> | |
| """) | |
| # Introduction Section | |
| with gr.Column(elem_classes="intro-box"): | |
| gr.Markdown(""" | |
| <div class='benefits-grid'> | |
| <div class='benefit-card'> | |
| <p><strong>🔍 Strategic Workforce Management</strong>: Aligns existing positions with standard Job Families as well as with the <a href="https://icsc.un.org/Home/JobClassification">UN Common Classification of Occupational Groups (CCOG)</a> and the <a href="https://esco.ec.europa.eu/en/classification">ESCO classification of the European Commission</a></p> | |
| </div> | |
| <div class='benefit-card'> | |
| <p><strong>⏱️ Time Saver</strong>: Reduces hours of manual research and mapping to minutes</p> | |
| </div> | |
| <div class='benefit-card'> | |
| <p><strong>⚖️ Reduced Recruitment Bias</strong>: Suggest Data-driven skills recommendations using both the description of responsibilities and mapped occupation</p> | |
| </div> | |
| <div class='benefit-card'> | |
| <p><strong>🎯 Better Hiring</strong>: Generates expected qualification description, skills assessment method and tailored interview questions</p> | |
| </div> | |
| </div> | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| file_input = gr.File( | |
| label="Upload a Post Description PDF file", | |
| file_types=[".pdf"]) | |
| submit_btn = gr.Button( | |
| value="✨ Analyse Post Description", | |
| variant="primary", | |
| elem_classes="btn-primary" | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| file_name_output = gr.Textbox(label="File Name", interactive=False) | |
| responsibilities_output = gr.Textbox(label="List of Responsibilities used for the review", lines=5, interactive=False) | |
| job_family_output = gr.Textbox(label="Classified Job Family", interactive=False) | |
| skills_output = gr.Textbox(label="Related identified Skills") | |
| with gr.Row(): | |
| gr.HTML("""<p>This mapping uses the <a href="https://icsc.un.org/Home/JobClassification">UN Common Classification of Occupational Groups (CCOG)</a> | |
| .</p>""") | |
| with gr.Column(): | |
| gr.Markdown("### CCOG Occupation Group Levels") | |
| ccoq_levels_output = gr.Textbox(label="CCOG Levels") | |
| with gr.Row(): | |
| gr.HTML("""<p>This mapping uses the <a href="https://esco.ec.europa.eu/en/classification">ESCO classification of the European Commission</a>.</p>""") | |
| with gr.Column(): | |
| gr.Markdown("### ESCO Levels") | |
| esco_levels_output = gr.Textbox(label="ESCO Levels") | |
| with gr.Column(): | |
| gr.Markdown("### ESCO Skills") | |
| esco_skills_output = gr.Textbox(label="ESCO Skills") | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("### Expected Qualifications") | |
| qualification_output = gr.Textbox(label="Qualification", lines=5, interactive=False) | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("## Interview Questions") | |
| interview_output = gr.Textbox(label="Interview Questions", lines=10, interactive=False) | |
| with gr.Row(): | |
| with gr.Column(): | |
| download_btn = gr.Button( | |
| value="📄 Download Word Document", | |
| variant="primary", | |
| elem_classes="btn-primary") | |
| if DEBUG: | |
| with gr.Row(): | |
| with gr.Column(): | |
| debug_console = gr.Textbox( | |
| label="⚠️ Execution Log", | |
| interactive=False, | |
| elem_classes=["debug-console"] | |
| ) | |
| submit_btn.click( | |
| fn=process_pdf, | |
| inputs=file_input, | |
| outputs=[ | |
| file_name_output, | |
| responsibilities_output, | |
| job_family_output, | |
| qualification_output, | |
| ccoq_levels_output, | |
| interview_output, | |
| skills_output, | |
| esco_levels_output, | |
| esco_skills_output, | |
| debug_console if DEBUG else None | |
| ] | |
| ) | |
| download_btn.click( | |
| fn=generate_word_document, | |
| inputs=[ | |
| file_name_output, | |
| responsibilities_output, | |
| job_family_output, | |
| qualification_output, | |
| ccoq_levels_output, | |
| interview_output, | |
| skills_output, | |
| esco_levels_output, | |
| esco_skills_output | |
| ], | |
| outputs=gr.File(label="Download Word Document") | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(show_error=True, debug=DEBUG) |