import gradio as gr import pdfplumber import pandas as pd import re import warnings import logging import os from dotenv import load_dotenv import json from concurrent.futures import ThreadPoolExecutor from typing import List, Dict, Optional import traceback import time import openai # Debugging setup DEBUG = True debug_messages = [] def log_debug(message): """Log debug messages and keep last 20 entries""" if DEBUG: timestamp = time.strftime("%Y-%m-%d %H:%M:%S") full_message = f"[{timestamp}] {message}" debug_messages.append(full_message) print(full_message) # Print to console # Keep only the last 20 messages if len(debug_messages) > 20: debug_messages.pop(0) return "\n".join(debug_messages) return "" # Initialize debug logging log_debug("Application starting...") # Load environment variables load_dotenv() # Configure logging for pdfminer logging.getLogger('pdfminer').setLevel(logging.ERROR) # Suppress specific warnings warnings.filterwarnings("ignore", category=UserWarning, message="CropBox.*") # ================= DataFrame initializations ================= try: job_families_df = pd.read_csv("job_families1.csv", on_bad_lines='skip') except Exception as e: print(f"Error reading job_families1.csv: {e}") job_families_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately try: occupational_groups_df = pd.read_csv("occupational_groups.csv", on_bad_lines='skip') except Exception as e: log_debug(f"Error reading occupational_groups.csv: {e}") occupational_groups_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately try: esco_df = pd.read_csv("ISCOGroups_en.csv", on_bad_lines='skip', dtype={'code': str} ) # Force 'code' to be read as string except Exception as e: log_debug(f"Error reading ISCOGroups_en.csv: {e}") esco_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately try: esco_level5_df = pd.read_csv("occupations_en.csv", on_bad_lines='skip', dtype={'code': str, 'iscoGroup': str, } ) # Force 'code' to be read as string except Exception as e: log_debug(f"Error reading occupations_en.csv: {e}") esco_level5_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately try: esco_skill_df = pd.read_csv("skills_en.csv", on_bad_lines='skip') except Exception as e: log_debug(f"Error reading skills_en.csv: {e}") esco_skill_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately try: esco_skill_map_df = pd.read_csv("occupationSkillRelations_en.csv", on_bad_lines='skip') except Exception as e: log_debug(f"Error reading occupationSkillRelations_en.csv: {e}") esco_skill_map_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately # ================= LLM API ================= def initialize_openai_client(): try: client = openai.AzureOpenAI( api_key=os.getenv("AZURE_OPENAI_API_KEY"), azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"), api_version=os.getenv("OPENAI_API_VERSION"), ) return client except Exception as e: raise Exception(f"Failed to initialize OpenAI client: {e}") client = initialize_openai_client() def gpt_call(system_prompt: str, user_prompt: str) -> str: try: response = client.chat.completions.create( model=os.getenv("AZURE_DEPLOYMENT_NAME"), messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt} ], temperature=0.3 ) return response.choices[0].message.content.strip() except Exception as e: return f"ERROR: {e}" # ================= Extract text ================= def extract_text_from_pdf(pdf_path: str) -> str: text = "" with pdfplumber.open(pdf_path) as pdf: for page in pdf.pages: page_text = page.extract_text() if page_text: text += page_text + "\n" for table in page.extract_tables(): for row in table: for cell in row: if isinstance(cell, str): text += cell + " " text += "\n" return text # ================= AI Functions ================= def extract_section_from_pdf(full_text: str, section_title: str) -> str: user_prompt = f""" Carefully evaluate the provided position description (PD) document and extract the content of the section titled "{section_title}" from the following text. Return only the content of the section, without the title. If the section cannot be found or explicitly mentioned in the text, use "N/A" as the default value. Do not repeat in the extracted text the name of the section. Extract precisely all the related text. Text of the position description: {full_text} Section to identify: "{section_title}": """ return gpt_call("You are an HR expert working for IOM.", user_prompt) def classify_job_family(responsibilities: List[str]) -> str: job_family_list = "\n".join(f"- {row['Job_family']}: {row['Job_subfamily']}" for _, row in job_families_df.iterrows()) user_prompt = f""" Here is a list of job responsibilities: {responsibilities} Here is a list of Job families: {job_family_list} Based on the responsibilities, suggest the most relevant job family and subfamily from the list above. **Important:** - Return ONLY the job family, nothing else. - The job family should be exactly as shown in the list. - Do not include any additional text or explanation. """ return gpt_call("Suggest job family and subfamily based on responsibilities.", user_prompt) def get_level_CCOG_info(df, code, level_name): matches = df[df['code'] == code] if len(matches) == 0: log_debug(f"Warning: No {level_name} found for CCOG code {code}") return { f'{level_name}_CCOG_code': code, f'{level_name}_CCOG_name': 'UNKNOWN', f'{level_name}_CCOG_desc': 'No matching occupation found' } info = matches.iloc[0] return { f'{level_name}_CCOG_code': code, f'{level_name}_CCOG_name': info['occupation'], f'{level_name}_CCOG_desc': info.get('occupation_description', '') } def code_sanitize(input_string, valid_codes): for code in valid_codes: if code in input_string: return code return None def classify_occupational_group_by_level(responsibilities: List[str]) -> dict: result = {} try: for level in range(1, 5): level_df = occupational_groups_df[occupational_groups_df['level'] == f"Level {level}"] if level > 1: prev_level_code = result[f'Level_{level-1}_CCOG_code'] level_df = level_df[level_df['code'].str.startswith(prev_level_code)] job_occupation_list = "\n".join(f"- {row['code']}: {row['occupation']} - {row.get('occupation_description', '')}" for _, row in level_df.iterrows()) list_output = level_df["code"].tolist() user_prompt = f""" Here is a list of job responsibilities: {responsibilities} Here is a list of level {level} Occupation classifications: {job_occupation_list} Based on the responsibilities, suggest the most relevant level {level} Occupation code from within this list: {', '.join(map(str, list_output))}. **Important:** - Return ONLY the code, nothing else. - The code should be exactly as shown in the list. - Do not include any additional text or explanation. """ level_code = gpt_call(f"Identify level {level} occupational group", user_prompt).strip() level_code = code_sanitize(level_code, list_output) result.update(get_level_CCOG_info(level_df, level_code, f'Level_{level}')) except Exception as e: log_debug(f"Error during classification: {str(e)}") result['error'] = str(e) return result def classify_esco_by_hierarchical_level(responsibilities: List[str]) -> dict: """ Classifies job responsibilities into occupational groups at 4 levels, [European Skills, Competences, Qualifications, and Occupations (ESCO)](https://esco.ec.europa.eu/en) returning codes, names, and descriptions for each level. Args: responsibilities: List of job responsibility strings Returns: Dictionary containing classification information or error message """ result = {} ######################## Level 1 ################### # Get all top-level codes (single character/digit) top_level_codes = sorted({ code for code in esco_df['code'] if len(code) == 1 and code.isalnum() }) level1_code = None if top_level_codes: level1_df = esco_df[esco_df['code'].isin(top_level_codes)] job_occupation_list = "\n".join(f"- {row['code']}: {row['preferredLabel']} - {row['description']}" for _, row in level1_df.iterrows()) list1_output = level1_df["code"].tolist() # Convert Series to list list1 = ", ".join(map(str, list1_output)) # Join elements with comma user_prompt1 = f""" Here is a list of job responsibilities: {responsibilities} Select the most relevant top-level code from these options: {job_occupation_list} Based on the responsibilities, suggest the most relevant level 1 Occupation code from within this list: {list1}. **Important:** - Return ONLY the code, nothing else. - The code should be exactly as shown in the list. - Do not include any additional text or explanation. """ level1_code = gpt_call("Identify top-level occupational group", user_prompt1).strip() level1_code = code_sanitize(level1_code, list1_output) result.update(get_level_ESCO_info(level1_df, level1_code, 'Level_1')) ######################## Level 2 ################### level2_code = None if level1_code: level2_df = esco_df[ (esco_df['code'].str.startswith(level1_code)) & (esco_df['code'].str.len() == len(level1_code) + 1) ] if not level2_df.empty: level2_options = "\n".join(f"- {row['code']}: {row['preferredLabel']} - {row['description']}" for _, row in level2_df.iterrows()) list2_output = level2_df["code"].tolist() # Convert Series to list list2 = ", ".join(map(str, list2_output)) # Join elements with comma user_prompt2 = f""" Here is a list of job responsibilities: {responsibilities} Here is a list of level 2 Occupation classifications within {level1_code}: {level2_options} Based on the responsibilities, suggest the most relevant level 2 Occupation code from within this list: {list2}. **Important:** - Return ONLY the code, nothing else. - The code should be exactly as shown in the list. - Do not include any additional text or explanation. """ level2_code = gpt_call("Identify second-level occupational group", user_prompt2).strip() level2_code = code_sanitize(level2_code, list2_output) result.update(get_level_ESCO_info(level2_df, level2_code, 'Level_2')) ######################## Level 3 ################### level3_code = None if level2_code: level3_df = esco_df[ (esco_df['code'].str.startswith(level2_code)) & (esco_df['code'].str.len() == len(level2_code) + 1) ] if not level3_df.empty: level3_options = "\n".join(f"- {row['code']}: {row['preferredLabel']} - {row['description']}" for _, row in level3_df.iterrows()) list3_output = level3_df["code"].tolist() # Convert Series to list list3 = ", ".join(map(str, list3_output)) # Join elements with comma user_prompt3 = f""" Here is a list of job responsibilities: {responsibilities} Here is a list of level 3 Occupation classifications within {level2_code}: {level3_options} Based on the responsibilities, suggest the most relevant level 3 Occupation code from within this list: {list3}. **Important:** - Return ONLY the code, nothing else. - The code should be exactly as shown in the list. - Do not include any additional text or explanation. """ level3_code = gpt_call("Identify third-level occupational group", user_prompt3).strip() level3_code = code_sanitize(level3_code, list3_output) result.update(get_level_ESCO_info(level3_df, level3_code, 'Level_3')) ######################## Level 4 ################### level4_code = None if level3_code: level4_df = esco_df[ (esco_df['code'].str.startswith(level3_code)) & (esco_df['code'].str.len() == len(level3_code) + 1) ] if not level4_df.empty: level4_options = "\n".join(f"- {row['code']}: {row['preferredLabel']} - {row['description']}" for _, row in level4_df.iterrows()) list4_output = level4_df["code"].tolist() # Convert Series to list list4 = ", ".join(map(str, list4_output)) # Join elements with comma user_prompt4 = f""" Here is a list of job responsibilities: {responsibilities} Here is a list of level 4 Occupation classifications within {level3_code}: {level4_options} Based on the responsibilities, suggest the most relevant level 4 Occupation code from within this list: {list4}. **Important:** - Return ONLY the code, nothing else. - The code should be exactly as shown in the list. - Do not include any additional text or explanation. """ level4_code = gpt_call("Identify fourth-level occupational group", user_prompt4).strip() level4_code = code_sanitize(level4_code, list4_output) result.update(get_level_ESCO_info(level4_df, level4_code, 'Level_4')) ######################## Level 5 ################### level5_code = None if level4_code: level5_df = esco_level5_df[ (esco_level5_df['iscoGroup'].str.startswith(level4_code)) ] if not level5_df.empty: level5_options = "\n".join(f"- {row['code']}: {row['preferredLabel']} - {row['description']}" for _, row in level5_df.iterrows()) list5_output = level5_df["code"].tolist() # Convert Series to list list5 = ", ".join(map(str, list5_output)) # Join elements with comma user_prompt5 = f""" Here is a list of job responsibilities: {responsibilities} Here is a list of level 4 Occupation classifications within {level4_code}: {level5_options} Based on the responsibilities, suggest the most relevant level 4 Occupation code from within this list: {list5}. **Important:** - Return ONLY the code as stated in the provided list, nothing else. - The code should be exactly as shown in the list. - Do not include any additional text, occupation code or explanation. """ level5_code = gpt_call("Identify fifth-level occupational group", user_prompt5).strip() # Handle the case where the LLM might return just the code part level5_code = code_sanitize(level5_code, list5_output) result.update(get_level_ESCO_info(level5_df, level5_code, 'Level_5')) ## Et voila!! return result def get_level_ESCO_info(df, code, level_name): """Helper function to get level info with error handling""" matches = df[df['code'] == code] if len(matches) == 0: log_debug(f"Warning: No {level_name} found for ESCO code {code}") return { f'{level_name}_ESCO_code': code, f'{level_name}_ESCO_name': 'UNKNOWN', f'{level_name}_ESCO_desc': 'No matching occupation found' } info = matches.iloc[0] return { f'{level_name}_ESCO_code': code, f'{level_name}_ESCO_name': info['preferredLabel'], f'{level_name}_ESCO_desc': info.get('description', '') } def get_skills_info_esco(Level_5_code): matches = esco_level5_df[esco_level5_df['code'] == Level_5_code] conceptUris = matches['conceptUri'].values.tolist() skills = esco_skill_map_df[esco_skill_map_df['occupationUri'].isin(conceptUris)] skillUris = skills['skillUri'].values.tolist() thisskillslist = esco_skill_df[esco_skill_df['conceptUri'].isin(skillUris)] result = thisskillslist[['preferredLabel', 'conceptUri', 'description']].drop_duplicates() result = result.rename(columns={'preferredLabel': 'skill_name', 'description': 'skill_description', 'conceptUri': 'skill_code'}) return result def review_skills(Level_5_code: str, top_n: int = 10) -> List[Dict[str, str]]: matches = esco_level5_df[esco_level5_df['code'] == Level_5_code] esco_occup = matches['preferredLabel'].values.tolist() skill_filtered = get_skills_info_esco(Level_5_code) skill_filtered_options = "\n".join(f"- {row['skill_code']}: {row['skill_name']} - {row['skill_description']}" for _, row in skill_filtered.iterrows()) prompt = f""" Here is a list of skills: {skill_filtered_options} Filter the skills that are relevant in the context of the work of the International Organisation for Migration. Ensure that skills are relevant in the context of a {esco_occup} working for a non-profit public organization. Required JSON structure: {{ "skills": [ {{ "skill_name": "string", "skill_description": "string", "skill_code": "string" }} ] }} **Important:** - Do not duplicate any records of skills - Keep only the 10 most relevant skills - Return ONLY the JSON object with no other text - Use double quotes for all strings - No trailing commas in arrays/objects - No markdown formatting (no ```json) - No text before or after the JSON - Escape all special characters in strings - Ensure all brackets are properly closed - No trailing commas in arrays/objects, especially before closing brackets """ raw = gpt_call("You are an HR expert working for the International Organisation for Migration and with in-depth knowledge of the European Skills, Competences, Qualifications and Occupations. Extract skills required for this position.", prompt) json_text = _extract_json(raw) if not json_text: return [] try: result = json.loads(json_text) skills = result.get("skills", []) except json.JSONDecodeError as e: log_debug(f"❌ JSON Skills parsing error: {e}") log_debug(f"🔍 Problematic JSON Skills: {json_text}") return [] validated_skills = [] for skill in skills: try: validated = { "skill_name": str(skill["skill_name"]).strip(), "skill_description": str(skill["skill_description"]).strip(), "skill_code": str(skill["skill_code"]).strip() } validated_skills.append(validated) except (KeyError, TypeError) as e: log_debug(f"⚠️ Skipping invalid skill: {skill}. Error: {e}") continue return validated_skills[:top_n] def extract_skills(responsibilities: List[str], top_n: int = 10) -> List[Dict[str, str]]: prompt = f""" Here is a list of job responsibilities: {responsibilities} List the required skills and knowledge as bullet points (without numbers) using ESCO-style terms. For each Skill: 1. skill_name: precise skills name as used in ESCO framework 2. skill_description: add the long description as mentioned in ESCO framework 3. skill_code: include the detailed corresponding ESCO code for that skill. Required JSON structure: {{ "skills": [ {{ "skill_name": "string", "skill_description": "string", "skill_code": "string" }} ] }} **Important:** - Return ONLY the JSON object with no other text - Use double quotes for all strings - No trailing commas in arrays/objects - No markdown formatting (no ```json) - No text before or after the JSON - Escape all special characters in strings - Ensure all brackets are properly closed """ raw = gpt_call("You are an HR expert working for the International Organisation for Migration and with in-depth knowledge of the European Skills, Competences, Qualifications and Occupations. Extract skills required for this position.", prompt) json_text = _extract_json(raw) if not json_text: return [] try: result = json.loads(json_text) skills = result.get("skills", []) except json.JSONDecodeError as e: log_debug(f"❌ JSON Skills extrac parsing error: {e}") log_debug(f"🔍 Problematic JSON Skills extract: {json_text}") return [] validated_skills = [] for skill in skills: try: validated = { "skill_name": str(skill["skill_name"]).strip(), "skill_description": str(skill["skill_description"]).strip(), "skill_code": str(skill["skill_code"]).strip() } validated_skills.append(validated) except (KeyError, TypeError) as e: log_debug(f"⚠️ Skipping invalid skill extract: {skill}. Error: {e}") continue return validated_skills[:top_n] def map_proficiency_and_assessment(skills: List[str], responsibilities: List[str]) -> List[Dict]: prompt = f""" Here is a list of job responsibilities: {responsibilities} that have been associated with the following skills: {skills} For each skill, accounting for the context defined within the responsibilities, return a JSON object with: - skill_name: the name of the skill - importance: essential or optional - type: "skill/competence" or "knowledge" - proficiency_level: Basic, Intermediate, or Advanced - distinctive_elements: what specific and distinctive elements are required at this defined proficiency level? - resume_signals: what to look for in a resume to assess this skill? - assessment_method: what is the preferred assessment method to accurately assess this skill? Respond ONLY with a list of dictionaries in valid JSON. Use double quotes for all strings. No markdown, no commentary, no trailing commas. """ raw = gpt_call("Define proficiency level and assessment for each skill.", prompt) json_text = _extract_json_array(raw) if not json_text: return [] try: results = json.loads(json_text) except json.JSONDecodeError as e: log_debug(f"❌ JSON proficiency parsing error: {e}") log_debug(f"🔍 Problematic JSON proficiency: {json_text}") return [] validated = [] for item in results: try: validated.append({ "skill_name": str(item["skill_name"]).strip(), "importance": item["importance"].strip().lower(), "type": item["type"].strip().lower(), "proficiency_level": item["proficiency_level"].strip().capitalize(), "distinctive_elements": item["distinctive_elements"].strip(), "resume_signals": item["resume_signals"].strip(), "assessment_method": item["assessment_method"].strip() }) except (KeyError, TypeError) as e: log_debug(f"⚠️ Skipping invalid profiency item: {item}. Error: {e}") continue return validated def _extract_json_array(raw: str) -> str: json_start = raw.find('[') json_end = raw.rfind(']') + 1 if json_start == -1 or json_end == 0: log_debug(f"❌ No JSON array found in response: {raw}") return "" json_text = raw[json_start:json_end] json_text = re.sub(r',\s*([}\]])', r'\1', json_text) json_text = re.sub(r'[\n\r\t]', ' ', json_text) json_text = re.sub(r'(? List[str]: prompt = f""" Here is a list of job responsibilities: {responsibilities} Infer the required level within the European Qualifications Framework (EQF) to implement them. Identify the potential diplomas to testify such qualification """ raw = gpt_call("You are an HR expert that excel in developing competency-based interview questions.", prompt) return [line.strip("-• ").strip() for line in raw.splitlines() if line.strip()] def build_interview(responsibilities: List[str], skill_assess: List[str]) -> List[str]: prompt = f""" Here is a list of job responsibilities: {responsibilities} and related skills: {skill_assess} Output: A structured 40-minute interview with: Opening questions (5 min) Core competency-based questions (30 min, 5-6 questions) Closing & candidate questions (5 min) """ raw = gpt_call("You are an HR expert that excel in developing competency-based interview questions.", prompt) return [line.strip("-• ").strip() for line in raw.splitlines() if line.strip()] def _extract_json(raw: str) -> str: json_start = raw.find('{') json_end = raw.rfind('}') + 1 if json_start == -1 or json_end == 0: log_debug(f"❌ No JSON found in response: {raw}") return "" json_text = raw[json_start:json_end] json_text = re.sub(r',\s*([}\]])', r'\1', json_text) json_text = re.sub(r'[\n\r\t]', ' ', json_text) json_text = re.sub(r'\s{2,}', ' ', json_text) json_text = re.sub(r'\\(?!["\\/bfnrtu])', r'\\\\', json_text) json_text = json_text.strip() return json_text # ================= Process Analysis ================= from concurrent.futures import ThreadPoolExecutor def process_pdf(file): if file is None: return ( "Please upload a PDF file.", "", "", "", {}, "", [], {}, {}, "No file uploaded." ) try: extracted_text = extract_text_from_pdf(file.name) responsibilities = extract_section_from_pdf(extracted_text, section_title="Responsibilities and Accountabilities") if not responsibilities: log_debug(f"Skipping {os.path.basename(file.name)} - no responsibilities section found") return ( os.path.basename(file.name), "", "", "", {}, "", [], {}, {}, "No responsibilities section found." ) # Use ThreadPoolExecutor to parallelize independent tasks with ThreadPoolExecutor() as executor: # Submit tasks to the executor job_family_future = executor.submit(classify_job_family, responsibilities) occ_group_future = executor.submit(classify_occupational_group_by_level, responsibilities) esco_occ_future = executor.submit(classify_esco_by_hierarchical_level, responsibilities) qualification_future = executor.submit(extract_qualification, responsibilities) skills_future = executor.submit(extract_skills, responsibilities) # Retrieve results from futures job_family = job_family_future.result() occ_group = occ_group_future.result() esco_occ = esco_occ_future.result() qualification = qualification_future.result() skills = skills_future.result() log_debug(f"Identified {job_family}") skill_map = map_proficiency_and_assessment(skills, responsibilities) has_esco = esco_occ.get("Level_5_ESCO_code") is not None skill_esco_extract = [] skill_esco_map = [] if has_esco: Level_5_code = esco_occ["Level_5_ESCO_code"] skill_esco_extract = review_skills(Level_5_code) skill_esco_map = map_proficiency_and_assessment(skill_esco_extract, responsibilities) else: log_debug(f"No Level 5 ESCO code found for {os.path.basename(file.name)}, skipping ESCO skills mapping") time.sleep(6) assessment_lookup = {item['skill_name']: item for item in skill_map} joined_skills = [ { "skill_name": skill["skill_name"], "skill_description": skill["skill_description"], "skill_code": skill["skill_code"], "importance": assessment_lookup.get(skill["skill_name"], {}).get("importance"), "type": assessment_lookup.get(skill["skill_name"], {}).get("type"), "proficiency_level": assessment_lookup.get(skill["skill_name"], {}).get("proficiency_level"), "distinctive_elements": assessment_lookup.get(skill["skill_name"], {}).get("distinctive_elements"), "resume_signals": assessment_lookup.get(skill["skill_name"], {}).get("resume_signals"), "assessment_method": assessment_lookup.get(skill["skill_name"], {}).get("assessment_method") } for skill in skills ] joined_skills_esco = [] if has_esco and skill_esco_extract: assessment_esco_lookup = {item['skill_name']: item for item in skill_esco_map} joined_skills_esco = [ { "skill_name": skill["skill_name"], "skill_description": skill["skill_description"], "skill_code": skill["skill_code"], **assessment_esco_lookup.get(skill["skill_name"], {}) } for skill in skill_esco_extract ] interview = build_interview(responsibilities, skills) # Prepare the results for each output component ccoq_levels = {f"Level_{i}_CCOG_{field}": occ_group.get(f"Level_{i}_CCOG_{field}") for i in range(1, 5) for field in ["code", "name", "desc"]} if has_esco: esco_levels = {f"Level_{i}_ESCO_{field}": esco_occ.get(f"Level_{i}_ESCO_{field}") for i in range(1, 6) for field in ["code", "name", "desc"]} esco_skills = { "file": os.path.basename(file.name), "classified_job_family": job_family, "skills": joined_skills_esco } else: esco_levels = {f"Level_{i}_ESCO_{field}": None for i in range(1, 6) for field in ["code", "name", "desc"]} esco_skills = None debug_message = "Processing completed successfully." return ( os.path.basename(file.name), responsibilities, job_family, "\n".join(qualification), ccoq_levels, "\n".join(interview), joined_skills, esco_levels, esco_skills, debug_message if DEBUG else None ) except Exception as e: error_message = f"Error processing PDF: {str(e)}" return ( error_message, "", "", "", {}, "", [], {}, {}, error_message ) # ================= Build Word Report ================= from docx import Document def generate_word_document(result): doc = Document() # Add a title doc.add_heading('Job Description Analysis', level=1) # Add file name doc.add_heading('File Name', level=2) doc.add_paragraph(result["file"]) # Add responsibilities doc.add_heading('Responsibilities', level=2) doc.add_paragraph(result["responsibilities"]) # Add job family doc.add_heading('Classified Job Family', level=2) doc.add_paragraph(result["classified_job_family"]) # Add qualifications doc.add_heading('Qualification', level=2) doc.add_paragraph("\n".join(result["qualification"])) # Add CCOG Levels doc.add_heading('CCOG Levels', level=2) for i in range(1, 5): for field in ["code", "name", "desc"]: key = f"Level_{i}_CCOG_{field}" if key in result: doc.add_paragraph(f"{key}: {result[key]}") # Add interview questions doc.add_heading('Interview Questions', level=2) doc.add_paragraph("\n".join(result["interview"])) # Add skills doc.add_heading('Skills', level=2) for skill in result["skills"]["skills"]: doc.add_paragraph(f"Skill Name: {skill['skill_name']}") doc.add_paragraph(f"Description: {skill['skill_description']}") doc.add_paragraph(f"Code: {skill['skill_code']}") doc.add_paragraph(f"Importance: {skill.get('importance', 'N/A')}") doc.add_paragraph(f"Type: {skill.get('type', 'N/A')}") doc.add_paragraph(f"Proficiency Level: {skill.get('proficiency_level', 'N/A')}") doc.add_paragraph(f"Distinctive Elements: {skill.get('distinctive_elements', 'N/A')}") doc.add_paragraph(f"Resume Signals: {skill.get('resume_signals', 'N/A')}") doc.add_paragraph(f"Assessment Method: {skill.get('assessment_method', 'N/A')}") doc.add_paragraph("") # Add an empty line for separation # Add ESCO Levels if available if "skills_esco" in result and result["skills_esco"]: doc.add_heading('ESCO Levels', level=2) for i in range(1, 6): for field in ["code", "name", "desc"]: key = f"Level_{i}_ESCO_{field}" if key in result: doc.add_paragraph(f"{key}: {result[key]}") # Add ESCO Skills doc.add_heading('ESCO Skills', level=2) for skill in result["skills_esco"]["skills"]: doc.add_paragraph(f"Skill Name: {skill['skill_name']}") doc.add_paragraph(f"Description: {skill['skill_description']}") doc.add_paragraph(f"Code: {skill['skill_code']}") doc.add_paragraph(f"Importance: {skill.get('importance', 'N/A')}") doc.add_paragraph(f"Type: {skill.get('type', 'N/A')}") doc.add_paragraph(f"Proficiency Level: {skill.get('proficiency_level', 'N/A')}") doc.add_paragraph(f"Distinctive Elements: {skill.get('distinctive_elements', 'N/A')}") doc.add_paragraph(f"Resume Signals: {skill.get('resume_signals', 'N/A')}") doc.add_paragraph(f"Assessment Method: {skill.get('assessment_method', 'N/A')}") doc.add_paragraph("") # Add an empty line for separation # Save the document to a temporary file temp_file_path = "job_description_analysis.docx" doc.save(temp_file_path) return temp_file_path # ================= GRADIO INTERFACE ================= with gr.Blocks( title="AI-powered tool to review Job Position Description", css=""" @import url('https://fonts.googleapis.com/css2?family=Lato:wght@400;700&display=swap'); @import url('https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css'); /* Completely disable Gradio's dark theme */ .gradio-container.dark { --body-background-fill: white !important; --background-fill-primary: white !important; --background-fill-secondary: #f8f9fa !important; --block-background-fill: white !important; --input-background-fill: white !important; --block-label-text-color: #212529 !important; --body-text-color: #212529 !important; --block-title-text-color: var(--primary-color) !important; --border-color-primary: #dee2e6 !important; } .gradio-container.dark .gr-markdown, .gradio-container.dark .gr-textbox, .gradio-container.dark .gr-dropdown, .gradio-container.dark .output-section { background: white !important; color: #212529 !important; border-color: #dee2e6 !important; } /* Base Styles */ :root { --primary-color: #0033A0; --secondary-color: #e67e22; --accent-color: #f59e0b; --dark-color: #34495e; --light-color: #ecf0f1; --success-color: #27ae60; --warning-color: #f39c12; --danger-color: #e74c3c; --text-color: #333; --text-light: #7f8c8d; --border-radius: 8px; --box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); --transition: all 0.3s ease; } /* Header Styles */ .header { text-align: center; margin-bottom: 2rem; padding: 1rem; } .header h1 { margin: 0; font-family: 'Lato', sans-serif; font-size: 2.5rem; font-weight: 600; color: var(--primary-color); } .header p { margin: 0.5rem 0 0; font-family: 'Lato', sans-serif; opacity: 0.9; font-size: 1.5rem; color: #4b5563; } /* Section Titles */ .section-title { display: flex; align-items: left; font-family: 'Lato', sans-serif; gap: 0.5rem; color: var(--primary-color); margin: 1rem 0; font-size: 1.25rem; font-weight: 600; } .section-title i { font-size: 1.1em; color: var(--accent-color); } /* Input Section */ .input-section { background: white; padding: 0.75rem 0.5rem; border: 1px solid #d1d5db; border-radius: var(--border-radius); box-shadow: var(--box-shadow); margin-right: 1rem; } /* Output Section */ .output-section { background: white; padding: 1.5rem; border-radius: var(--border-radius); box-shadow: var(--box-shadow); } /* Form Elements */ .gr-textbox, .gr-dropdown { border: 1px solid #ddd; border-radius: var(--border-radius) !important; padding: 0.75rem 1rem !important; transition: var(--transition); } .gr-textbox:focus, .gr-dropdown:focus { border-color: var(--primary-color) !important; box-shadow: 0 0 0 2px rgba(44, 110, 203, 0.2) !important; outline: none !important; } .gr-textbox::placeholder { color: var(--text-light) !important; opacity: 0.7 !important; } label { font-weight: 500 !important; color: var(--dark-color) !important; margin-bottom: 0.5rem !important; display: block !important; } /* Buttons */ .btn-primary { background: var(--primary-color) !important; color: white !important; border: none !important; border-radius: var(--border-radius) !important; padding: 0.75rem 1.5rem !important; font-weight: 500 !important; transition: var(--transition) !important; text-transform: uppercase !important; letter-spacing: 0.5px !important; } .btn-primary:hover { background: #002080 !important; transform: translateY(-2px) !important; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.15) !important; } .btn-primary:active { transform: translateY(0) !important; } /* Output Markdown */ .gr-markdown { background: #f9f9f9; padding: 1.5rem; border-radius: var(--border-radius); border-left: 4px solid var(--primary-color); } /* Debug Console */ .gr-textbox[label="⚠️ Console Log"] { font-family: monospace !important; background: #2c3e50 !important; color: #ecf0f1 !important; border-radius: var(--border-radius) !important; padding: 1rem !important; } /* Responsive Layout */ @media (max-width: 768px) { .gr-row { flex-direction: column !important; } .input-section { margin-right: 0 !important; margin-bottom: 1rem !important; } } """, head=''' ''' ) as demo: # Header section with gr.Column(): with gr.Row(): with gr.Column(): gr.HTML("""
Use AI to standardise an initial draft position description and identify related Job Family, Occupation, Qualification, match Skills and suggest interview questions.