import gradio as gr
import pdfplumber
import pandas as pd
import re
import warnings
import logging
import os
from dotenv import load_dotenv
import json
from concurrent.futures import ThreadPoolExecutor
from typing import List, Dict, Optional
import traceback
import time
import openai
# Debugging setup
DEBUG = True
debug_messages = []
def log_debug(message):
"""Log debug messages and keep last 20 entries"""
if DEBUG:
timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
full_message = f"[{timestamp}] {message}"
debug_messages.append(full_message)
print(full_message) # Print to console
# Keep only the last 20 messages
if len(debug_messages) > 20:
debug_messages.pop(0)
return "\n".join(debug_messages)
return ""
# Initialize debug logging
log_debug("Application starting...")
# Load environment variables
load_dotenv()
# Configure logging for pdfminer
logging.getLogger('pdfminer').setLevel(logging.ERROR)
# Suppress specific warnings
warnings.filterwarnings("ignore", category=UserWarning, message="CropBox.*")
# ================= DataFrame initializations =================
try:
job_families_df = pd.read_csv("job_families1.csv", on_bad_lines='skip')
log_debug(f"Reading {len(job_families_df)} job_families")
except Exception as e:
log_debug(f"Error reading job_families1.csv: {e}")
job_families_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
try:
occupational_groups_df = pd.read_csv("occupational_groups.csv", on_bad_lines='skip')
log_debug(f"Reading {len(occupational_groups_df)} occupational_groups")
except Exception as e:
log_debug(f"Error reading occupational_groups.csv: {e}")
occupational_groups_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
try:
esco_df = pd.read_csv("ISCOGroups_en.csv", on_bad_lines='skip', dtype={'code': str} ) # Force 'code' to be read as string
log_debug(f"Reading {len(esco_df)} esco groups")
except Exception as e:
log_debug(f"Error reading ISCOGroups_en.csv: {e}")
esco_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
try:
esco_level5_df = pd.read_csv("occupations_en.csv", on_bad_lines='skip', dtype={'code': str, 'iscoGroup': str, } ) # Force 'code' to be read as string
log_debug(f"Reading {len(esco_level5_df)} esco_level5")
except Exception as e:
log_debug(f"Error reading occupations_en.csv: {e}")
esco_level5_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
try:
esco_skill_df = pd.read_csv("skills_en.csv", on_bad_lines='skip')
log_debug(f"Reading {len(esco_skill_df)} esco_skill")
except Exception as e:
log_debug(f"Error reading skills_en.csv: {e}")
esco_skill_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
try:
esco_skill_map_df = pd.read_csv("occupationSkillRelations_en.csv", on_bad_lines='skip')
log_debug(f"Reading {len(esco_skill_map_df)} esco_skill_map")
except Exception as e:
log_debug(f"Error reading occupationSkillRelations_en.csv: {e}")
esco_skill_map_df = pd.DataFrame() # Fallback to an empty DataFrame or handle the error appropriately
# ================= LLM API =================
def initialize_openai_client():
try:
client = openai.AzureOpenAI(
api_key=os.getenv("AZURE_OPENAI_API_KEY"),
azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
api_version=os.getenv("OPENAI_API_VERSION"),
)
return client
except Exception as e:
raise Exception(f"Failed to initialize OpenAI client: {e}")
client = initialize_openai_client()
def gpt_call(system_prompt: str, user_prompt: str) -> str:
try:
response = client.chat.completions.create(
model=os.getenv("AZURE_DEPLOYMENT_NAME"),
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
],
temperature=0.3
)
return response.choices[0].message.content.strip()
except Exception as e:
return f"ERROR: {e}"
# ================= Extract text =================
def extract_text_from_pdf(pdf_path: str) -> str:
text = ""
with pdfplumber.open(pdf_path) as pdf:
for page in pdf.pages:
page_text = page.extract_text()
if page_text:
text += page_text + "\n"
for table in page.extract_tables():
for row in table:
for cell in row:
if isinstance(cell, str):
text += cell + " "
text += "\n"
return text
# ================= AI Functions =================
def extract_section_from_pdf(full_text: str, section_title: str) -> str:
user_prompt = f"""
Carefully evaluate the provided position description (PD) document and extract the content of the section titled "{section_title}" from the following text.
Return only the content of the section, without the title.
If the section cannot be found or explicitly mentioned in the text, use "N/A" as the default value.
Do not repeat in the extracted text the name of the section.
Extract precisely all the related text.
Text of the position description:
{full_text}
Section to identify: "{section_title}":
"""
return gpt_call("You are an HR expert working for IOM.", user_prompt)
def classify_job_family(responsibilities: List[str]) -> str:
job_family_list = "\n".join(f"- {row['Job_family']}: {row['Job_subfamily']}" for _, row in job_families_df.iterrows())
user_prompt = f"""
Here is a list of job responsibilities:
{responsibilities}
Here is a list of Job families:
{job_family_list}
Based on the responsibilities, suggest the most relevant job family and subfamily from the list above.
**Important:**
- Return ONLY the job family, nothing else.
- The job family should be exactly as shown in the list.
- Do not include any additional text or explanation.
"""
return gpt_call("Suggest job family and subfamily based on responsibilities.", user_prompt)
def get_level_CCOG_info(df, code, level_name):
matches = df[df['code'] == code]
if len(matches) == 0:
log_debug(f"Warning: No {level_name} found for CCOG code {code}")
return {
f'{level_name}_CCOG_code': code,
f'{level_name}_CCOG_name': 'UNKNOWN',
f'{level_name}_CCOG_desc': 'No matching occupation found'
}
info = matches.iloc[0]
return {
f'{level_name}_CCOG_code': code,
f'{level_name}_CCOG_name': info['occupation'],
f'{level_name}_CCOG_desc': info.get('occupation_description', '')
}
def code_sanitize(input_string, valid_codes):
for code in valid_codes:
if code in input_string:
return code
return None
def classify_occupational_group_by_level(responsibilities: List[str]) -> dict:
result = {}
try:
for level in range(1, 5):
level_df = occupational_groups_df[occupational_groups_df['level'] == f"Level {level}"]
if level > 1:
prev_level_code = result[f'Level_{level-1}_CCOG_code']
level_df = level_df[level_df['code'].str.startswith(prev_level_code)]
job_occupation_list = "\n".join(f"- {row['code']}: {row['occupation']} - {row.get('occupation_description', '')}" for _, row in level_df.iterrows())
list_output = level_df["code"].tolist()
user_prompt = f"""
Here is a list of job responsibilities:
{responsibilities}
Here is a list of level {level} Occupation classifications:
{job_occupation_list}
Based on the responsibilities, suggest the most relevant level {level} Occupation code from within this list: {', '.join(map(str, list_output))}.
**Important:**
- Return ONLY the code, nothing else.
- The code should be exactly as shown in the list.
- Do not include any additional text or explanation.
"""
level_code = gpt_call(f"Identify level {level} occupational group", user_prompt).strip()
level_code = code_sanitize(level_code, list_output)
result.update(get_level_CCOG_info(level_df, level_code, f'Level_{level}'))
except Exception as e:
log_debug(f"Error during classification: {str(e)}")
result['error'] = str(e)
return result
def classify_esco_by_hierarchical_level(responsibilities: List[str]) -> dict:
"""
Classifies job responsibilities into occupational groups at 4 levels,
[European Skills, Competences, Qualifications, and Occupations (ESCO)](https://esco.ec.europa.eu/en)
returning codes, names, and descriptions for each level.
Args:
responsibilities: List of job responsibility strings
Returns:
Dictionary containing classification information or error message
"""
result = {}
######################## Level 1 ###################
# Get all top-level codes (single character/digit)
top_level_codes = sorted({
code for code in esco_df['code']
if len(code) == 1 and code.isalnum()
})
level1_code = None
if top_level_codes:
level1_df = esco_df[esco_df['code'].isin(top_level_codes)]
job_occupation_list = "\n".join(f"- {row['code']}: {row['preferredLabel']} - {row['description']}"
for _, row in level1_df.iterrows())
list1_output = level1_df["code"].tolist() # Convert Series to list
list1 = ", ".join(map(str, list1_output)) # Join elements with comma
user_prompt1 = f"""
Here is a list of job responsibilities:
{responsibilities}
Select the most relevant top-level code from these options:
{job_occupation_list}
Based on the responsibilities, suggest the most relevant level 1 Occupation code from within this list: {list1}.
**Important:**
- Return ONLY the code, nothing else.
- The code should be exactly as shown in the list.
- Do not include any additional text or explanation.
"""
level1_code = gpt_call("Identify top-level occupational group", user_prompt1).strip()
level1_code = code_sanitize(level1_code, list1_output)
result.update(get_level_ESCO_info(level1_df, level1_code, 'Level_1'))
######################## Level 2 ###################
level2_code = None
if level1_code:
level2_df = esco_df[
(esco_df['code'].str.startswith(level1_code)) & (esco_df['code'].str.len() == len(level1_code) + 1)
]
if not level2_df.empty:
level2_options = "\n".join(f"- {row['code']}: {row['preferredLabel']} - {row['description']}"
for _, row in level2_df.iterrows())
list2_output = level2_df["code"].tolist() # Convert Series to list
list2 = ", ".join(map(str, list2_output)) # Join elements with comma
user_prompt2 = f"""
Here is a list of job responsibilities:
{responsibilities}
Here is a list of level 2 Occupation classifications within {level1_code}:
{level2_options}
Based on the responsibilities, suggest the most relevant level 2 Occupation code from within this list: {list2}.
**Important:**
- Return ONLY the code, nothing else.
- The code should be exactly as shown in the list.
- Do not include any additional text or explanation.
"""
level2_code = gpt_call("Identify second-level occupational group", user_prompt2).strip()
level2_code = code_sanitize(level2_code, list2_output)
result.update(get_level_ESCO_info(level2_df, level2_code, 'Level_2'))
######################## Level 3 ###################
level3_code = None
if level2_code:
level3_df = esco_df[
(esco_df['code'].str.startswith(level2_code)) & (esco_df['code'].str.len() == len(level2_code) + 1)
]
if not level3_df.empty:
level3_options = "\n".join(f"- {row['code']}: {row['preferredLabel']} - {row['description']}"
for _, row in level3_df.iterrows())
list3_output = level3_df["code"].tolist() # Convert Series to list
list3 = ", ".join(map(str, list3_output)) # Join elements with comma
user_prompt3 = f"""
Here is a list of job responsibilities:
{responsibilities}
Here is a list of level 3 Occupation classifications within {level2_code}:
{level3_options}
Based on the responsibilities, suggest the most relevant level 3 Occupation code from within this list: {list3}.
**Important:**
- Return ONLY the code, nothing else.
- The code should be exactly as shown in the list.
- Do not include any additional text or explanation.
"""
level3_code = gpt_call("Identify third-level occupational group", user_prompt3).strip()
level3_code = code_sanitize(level3_code, list3_output)
result.update(get_level_ESCO_info(level3_df, level3_code, 'Level_3'))
######################## Level 4 ###################
level4_code = None
if level3_code:
level4_df = esco_df[
(esco_df['code'].str.startswith(level3_code)) & (esco_df['code'].str.len() == len(level3_code) + 1)
]
if not level4_df.empty:
level4_options = "\n".join(f"- {row['code']}: {row['preferredLabel']} - {row['description']}"
for _, row in level4_df.iterrows())
list4_output = level4_df["code"].tolist() # Convert Series to list
list4 = ", ".join(map(str, list4_output)) # Join elements with comma
user_prompt4 = f"""
Here is a list of job responsibilities:
{responsibilities}
Here is a list of level 4 Occupation classifications within {level3_code}:
{level4_options}
Based on the responsibilities, suggest the most relevant level 4 Occupation code from within this list: {list4}.
**Important:**
- Return ONLY the code, nothing else.
- The code should be exactly as shown in the list.
- Do not include any additional text or explanation.
"""
level4_code = gpt_call("Identify fourth-level occupational group", user_prompt4).strip()
level4_code = code_sanitize(level4_code, list4_output)
result.update(get_level_ESCO_info(level4_df, level4_code, 'Level_4'))
######################## Level 5 ###################
level5_code = None
if level4_code:
level5_df = esco_level5_df[
(esco_level5_df['iscoGroup'].str.startswith(level4_code))
]
if not level5_df.empty:
level5_options = "\n".join(f"- {row['code']}: {row['preferredLabel']} - {row['description']}"
for _, row in level5_df.iterrows())
list5_output = level5_df["code"].tolist() # Convert Series to list
list5 = ", ".join(map(str, list5_output)) # Join elements with comma
user_prompt5 = f"""
Here is a list of job responsibilities:
{responsibilities}
Here is a list of level 4 Occupation classifications within {level4_code}:
{level5_options}
Based on the responsibilities, suggest the most relevant level 4 Occupation code from within this list: {list5}.
**Important:**
- Return ONLY the code as stated in the provided list, nothing else.
- The code should be exactly as shown in the list.
- Do not include any additional text, occupation code or explanation.
"""
level5_code = gpt_call("Identify fifth-level occupational group", user_prompt5).strip()
# Handle the case where the LLM might return just the code part
level5_code = code_sanitize(level5_code, list5_output)
result.update(get_level_ESCO_info(level5_df, level5_code, 'Level_5'))
## Et voila!!
return result
def get_level_ESCO_info(df, code, level_name):
"""Helper function to get level info with error handling"""
matches = df[df['code'] == code]
if len(matches) == 0:
log_debug(f"Warning: No {level_name} found for ESCO code {code}")
return {
f'{level_name}_ESCO_code': code,
f'{level_name}_ESCO_name': 'UNKNOWN',
f'{level_name}_ESCO_desc': 'No matching occupation found'
}
info = matches.iloc[0]
return {
f'{level_name}_ESCO_code': code,
f'{level_name}_ESCO_name': info['preferredLabel'],
f'{level_name}_ESCO_desc': info.get('description', '')
}
def get_skills_info_esco(Level_5_code):
matches = esco_level5_df[esco_level5_df['code'] == Level_5_code]
conceptUris = matches['conceptUri'].values.tolist()
skills = esco_skill_map_df[esco_skill_map_df['occupationUri'].isin(conceptUris)]
skillUris = skills['skillUri'].values.tolist()
thisskillslist = esco_skill_df[esco_skill_df['conceptUri'].isin(skillUris)]
result = thisskillslist[['preferredLabel', 'conceptUri', 'description']].drop_duplicates()
result = result.rename(columns={'preferredLabel': 'skill_name', 'description': 'skill_description', 'conceptUri': 'skill_code'})
return result
def review_skills(Level_5_code: str, top_n: int = 10) -> List[Dict[str, str]]:
matches = esco_level5_df[esco_level5_df['code'] == Level_5_code]
esco_occup = matches['preferredLabel'].values.tolist()
skill_filtered = get_skills_info_esco(Level_5_code)
skill_filtered_options = "\n".join(f"- {row['skill_code']}: {row['skill_name']} - {row['skill_description']}" for _, row in skill_filtered.iterrows())
prompt = f"""
Here is a list of skills:
{skill_filtered_options}
Filter the skills that are relevant in the context of the work of the International Organisation for Migration.
Ensure that skills are relevant in the context of a {esco_occup} working for a non-profit public organization.
Required JSON structure:
{{
"skills": [
{{
"skill_name": "string",
"skill_description": "string",
"skill_code": "string"
}}
]
}}
**Important:**
- Do not duplicate any records of skills
- Keep only the 10 most relevant skills
- Return ONLY the JSON object with no other text
- Use double quotes for all strings
- No trailing commas in arrays/objects
- No markdown formatting (no ```json)
- No text before or after the JSON
- Escape all special characters in strings
- Ensure all brackets are properly closed
- No trailing commas in arrays/objects, especially before closing brackets
"""
raw = gpt_call("You are an HR expert working for the International Organisation for Migration and with in-depth knowledge of the European Skills, Competences, Qualifications and Occupations. Extract skills required for this position.", prompt)
json_text = _extract_json(raw)
if not json_text:
return []
try:
result = json.loads(json_text)
skills = result.get("skills", [])
except json.JSONDecodeError as e:
log_debug(f"❌ JSON Skills parsing error: {e}")
log_debug(f"🔍 Problematic JSON Skills: {json_text}")
return []
validated_skills = []
for skill in skills:
try:
validated = {
"skill_name": str(skill["skill_name"]).strip(),
"skill_description": str(skill["skill_description"]).strip(),
"skill_code": str(skill["skill_code"]).strip()
}
validated_skills.append(validated)
except (KeyError, TypeError) as e:
log_debug(f"⚠️ Skipping invalid skill: {skill}. Error: {e}")
continue
return validated_skills[:top_n]
def extract_skills(responsibilities: List[str], top_n: int = 10) -> List[Dict[str, str]]:
prompt = f"""
Here is a list of job responsibilities:
{responsibilities}
List the required skills and knowledge as bullet points (without numbers) using ESCO-style terms.
For each Skill:
1. skill_name: precise skills name as used in ESCO framework
2. skill_description: add the long description as mentioned in ESCO framework
3. skill_code: include the detailed corresponding ESCO code for that skill.
Required JSON structure:
{{
"skills": [
{{
"skill_name": "string",
"skill_description": "string",
"skill_code": "string"
}}
]
}}
**Important:**
- Return ONLY the JSON object with no other text
- Use double quotes for all strings
- No trailing commas in arrays/objects
- No markdown formatting (no ```json)
- No text before or after the JSON
- Escape all special characters in strings
- Ensure all brackets are properly closed
"""
raw = gpt_call("You are an HR expert working for the International Organisation for Migration and with in-depth knowledge of the European Skills, Competences, Qualifications and Occupations. Extract skills required for this position.", prompt)
json_text = _extract_json(raw)
if not json_text:
return []
try:
result = json.loads(json_text)
skills = result.get("skills", [])
except json.JSONDecodeError as e:
log_debug(f"❌ JSON Skills extrac parsing error: {e}")
log_debug(f"🔍 Problematic JSON Skills extract: {json_text}")
return []
validated_skills = []
for skill in skills:
try:
validated = {
"skill_name": str(skill["skill_name"]).strip(),
"skill_description": str(skill["skill_description"]).strip(),
"skill_code": str(skill["skill_code"]).strip()
}
validated_skills.append(validated)
except (KeyError, TypeError) as e:
log_debug(f"⚠️ Skipping invalid skill extract: {skill}. Error: {e}")
continue
return validated_skills[:top_n]
def map_proficiency_and_assessment(skills: List[str], responsibilities: List[str]) -> List[Dict]:
prompt = f"""
Here is a list of job responsibilities: {responsibilities} that have been associated with the following skills: {skills}
For each skill, accounting for the context defined within the responsibilities, return a JSON object with:
- skill_name: the name of the skill
- importance: essential or optional
- type: "skill/competence" or "knowledge"
- proficiency_level: Basic, Intermediate, or Advanced
- distinctive_elements: what specific and distinctive elements are required at this defined proficiency level?
- resume_signals: what to look for in a resume to assess this skill?
- assessment_method: what is the preferred assessment method to accurately assess this skill?
Respond ONLY with a list of dictionaries in valid JSON.
Use double quotes for all strings. No markdown, no commentary, no trailing commas.
"""
raw = gpt_call("Define proficiency level and assessment for each skill.", prompt)
json_text = _extract_json_array(raw)
if not json_text:
return []
try:
results = json.loads(json_text)
except json.JSONDecodeError as e:
log_debug(f"❌ JSON proficiency parsing error: {e}")
log_debug(f"🔍 Problematic JSON proficiency: {json_text}")
return []
validated = []
for item in results:
try:
validated.append({
"skill_name": str(item["skill_name"]).strip(),
"importance": item["importance"].strip().lower(),
"type": item["type"].strip().lower(),
"proficiency_level": item["proficiency_level"].strip().capitalize(),
"distinctive_elements": item["distinctive_elements"].strip(),
"resume_signals": item["resume_signals"].strip(),
"assessment_method": item["assessment_method"].strip()
})
except (KeyError, TypeError) as e:
log_debug(f"⚠️ Skipping invalid profiency item: {item}. Error: {e}")
continue
return validated
def _extract_json_array(raw: str) -> str:
json_start = raw.find('[')
json_end = raw.rfind(']') + 1
if json_start == -1 or json_end == 0:
log_debug(f"❌ No JSON array found in response: {raw}")
return ""
json_text = raw[json_start:json_end]
json_text = re.sub(r',\s*([}\]])', r'\1', json_text)
json_text = re.sub(r'[\n\r\t]', ' ', json_text)
json_text = re.sub(r'(? List[str]:
prompt = f"""
Here is a list of job responsibilities: {responsibilities}
Infer the required level within the European Qualifications Framework (EQF) to implement them.
Identify the potential diplomas to testify such qualification
"""
raw = gpt_call("You are an HR expert that excel in developing competency-based interview questions.", prompt)
return [line.strip("-• ").strip() for line in raw.splitlines() if line.strip()]
def build_interview(responsibilities: List[str], skill_assess: List[str]) -> List[str]:
prompt = f"""
Here is a list of job responsibilities: {responsibilities} and related skills: {skill_assess}
Output: A structured 40-minute interview with:
Opening questions (5 min)
Core competency-based questions (30 min, 5-6 questions)
Closing & candidate questions (5 min)
"""
raw = gpt_call("You are an HR expert that excel in developing competency-based interview questions.", prompt)
return [line.strip("-• ").strip() for line in raw.splitlines() if line.strip()]
def _extract_json(raw: str) -> str:
json_start = raw.find('{')
json_end = raw.rfind('}') + 1
if json_start == -1 or json_end == 0:
log_debug(f"❌ No JSON found in response: {raw}")
return ""
json_text = raw[json_start:json_end]
json_text = re.sub(r',\s*([}\]])', r'\1', json_text)
json_text = re.sub(r'[\n\r\t]', ' ', json_text)
json_text = re.sub(r'\s{2,}', ' ', json_text)
json_text = re.sub(r'\\(?!["\\/bfnrtu])', r'\\\\', json_text)
json_text = json_text.strip()
return json_text
# ================= Format Skills Visualisation =================
def format_skill_cards(skills_data):
if not skills_data or not isinstance(skills_data, list):
return "
No skills data available
"
cards = []
for skill in skills_data:
if not isinstance(skill, dict):
continue
# Safely get all fields with fallbacks
skill_name = skill.get('skill_name', 'Unnamed Skill')
skill_code = skill.get('skill_code', 'N/A')
description = skill.get('skill_description', 'No description available')
skill_type = skill.get('type', '').capitalize()
importance = skill.get('importance', '').capitalize()
proficiency = skill.get('proficiency_level', '').capitalize()
distinctive = skill.get('distinctive_elements', 'Not specified')
resume_signals = skill.get('resume_signals', 'Not specified')
assessment = skill.get('assessment_method', 'Not specified')
card = f"""
{skill_name}
Code: {skill_code}
{skill_type}{importance}
{description}
{proficiency}
{distinctive}
{resume_signals}
{assessment}
"""
cards.append(card)
return f"
{''.join(cards)}
"
def get_progress_value(level):
level_map = {"basic": 1, "intermediate": 2, "advanced": 3}
return str(level_map.get(level.lower(), 1))
# ================= Format CCOG =================
def format_ccog_card(ccog_data):
if not ccog_data or not isinstance(ccog_data, dict):
return "
No CCOG classification data available
"
# Extract level data
levels = []
for i in range(1, 5):
level_data = {
'code': ccog_data.get(f'Level_{i}_CCOG_code'),
'name': ccog_data.get(f'Level_{i}_CCOG_name'),
'desc': ccog_data.get(f'Level_{i}_CCOG_desc')
}
if level_data['code'] or level_data['name']:
levels.append(level_data)
if not levels:
return "
No valid CCOG classification found
"
# Build the card
card = f"""
UN Common Classification of Occupational Groups
"""
for i, level in enumerate(levels, 1):
card += f"""
Level {i}{level['code'] or 'N/A'}
{level['name'] or 'Not classified'}
{f"
{level['desc']}
" if level['desc'] else ""}
"""
card += """
"""
return f"
{card}
"
# ================= Format CCOG =================
def format_esco_card(esco_data):
if not esco_data or not isinstance(esco_data, dict):
return "
No ESCO classification data available
"
# Extract level data
levels = []
for i in range(1, 6):
level_data = {
'code': esco_data.get(f'Level_{i}_ESCO_code'),
'name': esco_data.get(f'Level_{i}_ESCO_name'),
'desc': esco_data.get(f'Level_{i}_ESCO_desc')
}
if level_data['code'] or level_data['name']:
levels.append(level_data)
if not levels:
return "
No valid ESCO classification found
"
# Build the card
card = f"""
ESCO Occupation Classification
"""
for i, level in enumerate(levels, 1):
card += f"""
Level {i}{level['code'] or 'N/A'}
{level['name'] or 'Not classified'}
{f"
{level['desc']}
" if level['desc'] else ""}
"""
card += """
"""
return f"
{card}
"
# ================= Process Analysis =================
import tempfile
import json
from concurrent.futures import ThreadPoolExecutor
def process_pdf(file):
if file is None:
return (
"Please upload a PDF file.",
"",
"",
"",
{},
"",
[],
{},
{},
"No file uploaded.",
None # JSON path
)
try:
extracted_text = extract_text_from_pdf(file.name)
responsibilities = extract_section_from_pdf(extracted_text, section_title="Responsibilities and Accountabilities")
if not responsibilities:
log_debug(f"Skipping {os.path.basename(file.name)} - no responsibilities section found")
return (
os.path.basename(file.name),
"",
"",
"",
{},
"",
[],
{},
{},
"No responsibilities section found.",
None # JSON path
)
# Use ThreadPoolExecutor to parallelize independent tasks
with ThreadPoolExecutor() as executor:
job_family_future = executor.submit(classify_job_family, responsibilities)
occ_group_future = executor.submit(classify_occupational_group_by_level, responsibilities)
esco_occ_future = executor.submit(classify_esco_by_hierarchical_level, responsibilities)
qualification_future = executor.submit(extract_qualification, responsibilities)
skills_future = executor.submit(extract_skills, responsibilities)
job_family = job_family_future.result()
occ_group = occ_group_future.result()
esco_occ = esco_occ_future.result()
qualification = qualification_future.result()
skills = skills_future.result()
log_debug(f"Identified {job_family}")
interview = build_interview(responsibilities, skills)
## Map skills from responsibilities
skill_map = map_proficiency_and_assessment(skills, responsibilities)
time.sleep(6)
assessment_lookup = {item['skill_name']: item for item in skill_map}
joined_skills = [
{
"skill_name": skill["skill_name"],
"skill_description": skill["skill_description"],
"skill_code": skill["skill_code"],
"importance": assessment_lookup.get(skill["skill_name"], {}).get("importance"),
"type": assessment_lookup.get(skill["skill_name"], {}).get("type"),
"proficiency_level": assessment_lookup.get(skill["skill_name"], {}).get("proficiency_level"),
"distinctive_elements": assessment_lookup.get(skill["skill_name"], {}).get("distinctive_elements"),
"resume_signals": assessment_lookup.get(skill["skill_name"], {}).get("resume_signals"),
"assessment_method": assessment_lookup.get(skill["skill_name"], {}).get("assessment_method")
}
for skill in skills
]
## Generate ESCO skills if we have level 5 mapping....
has_esco = esco_occ.get("Level_5_ESCO_code") is not None
skill_esco_extract = []
skill_esco_map = []
if has_esco:
Level_5_code = esco_occ["Level_5_ESCO_code"]
skill_esco_extract = review_skills(Level_5_code)
skill_esco_map = map_proficiency_and_assessment(skill_esco_extract, responsibilities)
else:
log_debug(f"No Level 5 ESCO code found for {os.path.basename(file.name)}, skipping ESCO skills mapping")
joined_skills_esco = []
if has_esco and skill_esco_extract:
assessment_esco_lookup = {item['skill_name']: item for item in skill_esco_map}
joined_skills_esco = [
{
"skill_name": skill["skill_name"],
"skill_description": skill["skill_description"],
"skill_code": skill["skill_code"],
**assessment_esco_lookup.get(skill["skill_name"], {})
}
for skill in skill_esco_extract
]
if has_esco:
esco_levels = {f"Level_{i}_ESCO_{field}": esco_occ.get(f"Level_{i}_ESCO_{field}")
for i in range(1, 6) for field in ["code", "name", "desc"]}
esco_skills = {
"skills": joined_skills_esco
}
else:
esco_levels = {f"Level_{i}_ESCO_{field}": None
for i in range(1, 6) for field in ["code", "name", "desc"]}
esco_skills = None
# Prepare all data for JSON output
result_data = {
"file_name": os.path.basename(file.name),
"responsibilities": responsibilities,
"job_family": job_family,
"qualification": qualification,
"ccoq_levels": {f"Level_{i}_CCOG_{field}": occ_group.get(f"Level_{i}_CCOG_{field}")
for i in range(1, 5) for field in ["code", "name", "desc"]},
"interview_questions": build_interview(responsibilities, skills),
"skills": joined_skills,
"esco_levels": {f"Level_{i}_ESCO_{field}": esco_occ.get(f"Level_{i}_ESCO_{field}")
for i in range(1, 5) for field in ["code", "name", "desc"]},
"esco_skills": esco_skills,
"processing_time": time.strftime("%Y-%m-%d %H:%M:%S")
}
# Save to temporary JSON file
with tempfile.NamedTemporaryFile(suffix=".json", delete=False, mode='w') as f:
json.dump(result_data, f, indent=2)
json_path = f.name
log_debug(f"Results saved to temporary JSON file: {json_path}")
# Format outputs for display through html cards
formatted_skills = format_skill_cards(joined_skills)
formatted_ccog = format_ccog_card(result_data['ccoq_levels'])
formatted_esco_levels = format_esco_card(result_data['esco_levels'])
formatted_esco_skills = format_skill_cards(result_data['esco_skills'])
return (
os.path.basename(file.name),
responsibilities,
job_family,
"\n".join(qualification),
formatted_ccog,
"\n".join(interview),
formatted_skills,
formatted_esco_levels,
formatted_esco_skills,
"Processed...",
json_path # Return path to JSON file
)
except Exception as e:
error_message = f"Error processing PDF: {str(e)}"
log_debug(error_message)
traceback.print_exc()
return (
error_message,
"",
"",
"",
{},
"",
[],
{},
{},
error_message,
None # No JSON path on error
)
# ================= Build Word Report =================
from docx import Document
import os
import re
import time
import tempfile
from typing import Dict, List, Union
def create_error_doc(message: str) -> str:
"""Create a simple Word document with an error message."""
doc = Document()
doc.add_heading('Error Generating Report', level=1)
doc.add_paragraph(message)
temp_file = tempfile.NamedTemporaryFile(suffix=".docx", delete=False)
doc.save(temp_file.name)
return temp_file.name
def generate_word_document(json_path: Optional[str]) -> str:
"""
Generate a Word document from the analysis results JSON file.
Args:
json_path: Path to the JSON file containing analysis results
Returns:
Path to the generated Word document
"""
if not json_path or not os.path.exists(json_path):
return create_error_doc("No valid analysis data was provided.")
try:
with open(json_path, 'r') as f:
data = json.load(f)
except Exception as e:
return create_error_doc(f"Failed to load JSON file: {str(e)}")
# Initialize document with metadata
doc = Document()
doc.core_properties.author = "IOM Talent Management System"
doc.core_properties.title = "Position Description Analysis Report"
# Default values for all fields
default_values = {
"file": "Unknown file",
"responsibilities": "No responsibilities extracted.",
"classified_job_family": "No job family identified.",
"qualification": ["No qualification information available."],
"interview": ["No interview questions generated."],
"skills": {"skills": [{"skill_name": "No skills identified", "description": "", "code": ""}]},
"skills_esco": {"skills": [{"skill_name": "No ESCO skills identified", "description": "", "code": ""}]}
}
# Safely build the result dictionary with fallbacks
try:
result = {
"file": data.get("file", default_values["file"]),
"responsibilities": data.get("responsibilities", default_values["responsibilities"]),
"classified_job_family": data.get("job_family", default_values["classified_job_family"]),
"qualification": data.get("qualification", default_values["qualification"]),
"interview": data.get("interview", default_values["interview"]),
"skills": data.get("skills", default_values["skills"]),
"skills_esco": data.get("skills_esco", default_values["skills_esco"]),
"ccog_levels": data.get("ccog_levels", {}),
"esco_levels": data.get("esco_levels", {})
}
# Add level information with validation
if result.get("ccog_levels") and isinstance(result["ccog_levels"], dict):
result["ccog_levels"] = {k: v for k, v in result["ccog_levels"].items() if v is not None}
if result.get("esco_levels") and isinstance(result["esco_levels"], dict):
result["esco_levels"] = {k: v for k, v in result["esco_levels"].items() if v is not None}
except Exception as e:
log_debug(f"Error building result dictionary: {str(e)}")
result = default_values
# DOCUMENT CONTENT GENERATION
try:
# Document header
doc.add_heading('Job Description Analysis Report', level=0)
doc.add_paragraph(f"Generated on {time.strftime('%Y-%m-%d %H:%M:%S')}")
doc.add_paragraph("International Organization for Migration", style="Intense Quote")
doc.add_heading('Position Description Analysis Report', level=1)
doc.add_paragraph(f"File: {result['file']}")
doc.add_paragraph(f"Job Family: {result['classified_job_family']}")
doc.add_heading('Responsibilities', level=2)
doc.add_paragraph(result['responsibilities'])
doc.add_heading('Qualifications', level=2)
for item in result['qualification']:
doc.add_paragraph(item, style='List Bullet')
doc.add_heading('Interview Questions', level=2)
for item in result['interview']:
doc.add_paragraph(item, style='List Bullet')
doc.add_heading('Skills (Extracted)', level=2)
for skill in result['skills'].get("skills", []):
doc.add_paragraph(f"{skill.get('skill_name', 'Unnamed Skill')} - {skill.get('description', '')}")
doc.add_heading('Skills (ESCO)', level=2)
for skill in result['skills_esco'].get("skills", []):
doc.add_paragraph(f"{skill.get('skill_name', 'Unnamed Skill')} - {skill.get('description', '')}")
if result["ccog_levels"]:
doc.add_heading('C-COG Levels', level=2)
for key, value in result["ccog_levels"].items():
doc.add_paragraph(f"{key}: {value}")
if result["esco_levels"]:
doc.add_heading('ESCO Levels', level=2)
for key, value in result["esco_levels"].items():
doc.add_paragraph(f"{key}: {value}")
# Footer
doc.add_paragraph()
doc.add_paragraph("Generated by IOM Talent Management AI Tool", style='Footer')
except Exception as e:
log_debug(f"Error generating document content: {str(e)}")
# Fallback to simple error document
doc = Document()
doc.add_heading("Partial Report Generated", level=1)
doc.add_paragraph(f"Some sections could not be generated due to: {str(e)}")
# FILE SAVING WITH MULTIPLE FALLBACKS
try:
# Generate appropriate filename
base_name = os.path.splitext(os.path.basename(result['file']))[0]
if base_name:
clean_name = re.sub(r'[^\w\-]', '_', base_name)[:50] # Sanitize and truncate
output_filename = f"{clean_name}_analysis_{time.strftime('%Y%m%d')}.docx"
else:
output_filename = f"job_analysis_{time.strftime('%Y%m%d_%H%M%S')}.docx"
# Try saving to reports directory first
output_dir = "generated_reports"
try:
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, output_filename)
doc.save(output_path)
return output_path
except PermissionError:
# Fallback to system temp directory
temp_dir = tempfile.gettempdir()
temp_path = os.path.join(temp_dir, output_filename)
doc.save(temp_path)
return temp_path
except Exception as e:
# Ultimate fallback with error document
error_doc = Document()
error_doc.add_heading("Error Generating Report", level=1)
error_doc.add_paragraph(f"Could not save report due to: {str(e)}")
fallback_path = os.path.join(tempfile.gettempdir(), f"error_report_{time.strftime('%Y%m%d_%H%M%S')}.docx")
error_doc.save(fallback_path)
return fallback_path
# ================= GRADIO INTERFACE =================
with gr.Blocks(
title="AI-powered tool to review Job Position Description",
css="""
@import url('https://fonts.googleapis.com/css2?family=Lato:wght@400;700&display=swap');
@import url('https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css');
/* Completely disable Gradio's dark theme */
.gradio-container.dark {
--body-background-fill: white !important;
--background-fill-primary: white !important;
--background-fill-secondary: #f8f9fa !important;
--block-background-fill: white !important;
--input-background-fill: white !important;
--block-label-text-color: #212529 !important;
--body-text-color: #212529 !important;
--block-title-text-color: var(--primary-color) !important;
--border-color-primary: #dee2e6 !important;
}
.gradio-container.dark .gr-markdown,
.gradio-container.dark .gr-textbox,
.gradio-container.dark .gr-dropdown,
.gradio-container.dark .output-section {
background: white !important;
color: #212529 !important;
border-color: #dee2e6 !important;
}
/* Gradio layout fixes */
.gradio-container {
max-width: none !important;
padding: 0 20px !important;
}
.gradio-container .gradio-row {
max-width: 100% !important;
margin: 0 auto !important;
flex: 1 !important;
display: grid !important;
grid-template-columns: 1fr !important;
}
.gradio-container .gradio-column {
min-width: 0 !important;
padding: 0 !important;
flex: 1 !important;
max-width: none !important;
}
/* Ensure the parent container doesn't constrain the grid */
.container-wrap {
width: 100%;
max-width: none !important;
padding: 0 !important;
margin: 0 !important;
}
/* Set the size of the SVG icon for file download */
.feather-file {
width: 20px !important; /* Adjust the size as needed */
height: 20px !important; /* Adjust the size as needed */
}
/* Base Styles */
:root {
--primary-color: #0033A0;
--secondary-color: #e67e22;
--accent-color: #f59e0b;
--dark-color: #34495e;
--light-color: #ecf0f1;
--success-color: #27ae60;
--warning-color: #f39c12;
--danger-color: #e74c3c;
--text-color: #333;
--text-light: #7f8c8d;
--border-radius: 8px;
--box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
--transition: all 0.3s ease;
}
/* Header Styles */
.header {
text-align: center;
margin-bottom: 2rem;
padding: 1rem;
}
.header h1 {
margin: 0;
font-family: 'Lato', sans-serif;
font-size: 2.5rem;
font-weight: 600;
color: var(--primary-color);
}
.header p {
margin: 0.5rem 0 0;
font-family: 'Lato', sans-serif;
opacity: 0.9;
font-size: 1.5rem;
color: #4b5563;
}
/* Section Titles */
.section-title {
display: flex;
align-items: left;
font-family: 'Lato', sans-serif;
gap: 0.5rem;
color: var(--primary-color);
margin: 1rem 0;
font-size: 1.25rem;
font-weight: 600;
}
.section-title i {
font-size: 1.1em;
color: var(--accent-color);
}
/* Input Section */
.input-section {
background: white;
padding: 0.75rem 0.5rem;
border: 1px solid #d1d5db;
border-radius: var(--border-radius);
box-shadow: var(--box-shadow);
margin-right: 1rem;
}
/* Output Section */
.output-section {
background: white;
padding: 1.5rem;
border-radius: var(--border-radius);
box-shadow: var(--box-shadow);
}
/* Form Elements */
.gr-textbox, .gr-dropdown {
border: 1px solid #ddd;
border-radius: var(--border-radius) !important;
padding: 0.75rem 1rem !important;
transition: var(--transition);
}
.gr-textbox:focus, .gr-dropdown:focus {
border-color: var(--primary-color) !important;
box-shadow: 0 0 0 2px rgba(44, 110, 203, 0.2) !important;
outline: none !important;
}
.gr-textbox::placeholder {
color: var(--text-light) !important;
opacity: 0.7 !important;
}
label {
font-weight: 500 !important;
color: var(--dark-color) !important;
margin-bottom: 0.5rem !important;
display: block !important;
}
/* Buttons */
.btn-primary {
background: var(--primary-color) !important;
color: white !important;
border: none !important;
border-radius: var(--border-radius) !important;
padding: 0.75rem 1.5rem !important;
font-weight: 500 !important;
transition: var(--transition) !important;
text-transform: uppercase !important;
letter-spacing: 0.5px !important;
}
.btn-primary:hover {
background: #002080 !important;
transform: translateY(-2px) !important;
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.15) !important;
}
.btn-primary:active {
transform: translateY(0) !important;
}
/* Intro */
.intro-box {
background: #f0f7ff;
border-left: 4px solid #0033A0;
border-radius: 4px;
padding: 16px;
margin-bottom: 20px;
}
.intro-title {
color: #0033A0;
font-weight: 600;
margin-top: 0 !important;
}
.intro-icon {
color: #0033A0;
margin-right: 8px;
}
.benefits-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
gap: 12px;
margin: 16px 0;
}
.benefit-card {
background: white;
padding: 12px;
border-radius: 8px;
box-shadow: 0 2px 4px rgba(0,0,0,0.05);
}
/* Skills Card */
.skills-outer-container {
width: 100%;
padding: 0 1rem;
box-sizing: border-box;
}
.skills-container {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(350px, 1fr));
gap: 1.5rem;
padding: 1rem;
width: 100%;
margin: 0 auto;
}
/* Card styling */
.skill-card {
background: white;
border-radius: 8px;
box-shadow: 0 2px 12px rgba(0,0,0,0.08);
overflow: hidden;
transition: all 0.3s ease;
border: 1px solid #e0e0e0;
height: 100%;
display: flex;
flex-direction: column;
}
.skill-card:hover {
transform: translateY(-5px);
box-shadow: 0 6px 16px rgba(0,0,0,0.12);
}
/* Header section */
.skill-header {
background: #ecf0f1;
color: white;
padding: 1.2rem;
display: flex;
flex-direction: column;
gap: 0.8rem;
}
.skill-title {
display: flex;
justify-content: space-between;
align-items: center;
}
.skill-title h3 {
margin: 0;
font-size: 1.2rem;
font-weight: 600;
}
.skill-code {
font-size: 0.85rem;
opacity: 0.8;
background: rgba(255,255,255,0.15);
padding: 0.25rem 0.5rem;
border-radius: 4px;
}
.skill-pills {
display: flex;
gap: 0.5rem;
flex-wrap: wrap;
}
.skill-pill {
padding: 0.35rem 0.7rem;
border-radius: 999px;
font-size: 0.8rem;
font-weight: 500;
}
/* Type and Importance pills */
.skill-pill.type-skill { background: #4CAF50; color: white; }
.skill-pill.type-knowledge { background: #2196F3; color: white; }
.skill-pill.importance-essential { background: #F44336; color: white; }
.skill-pill.importance-optional { background: #FF9800; color: white; }
/* Body section */
.skill-body {
padding: 1.2rem;
}
.skill-description {
margin-bottom: 1.2rem;
padding-bottom: 1rem;
border-bottom: 1px dashed #eee;
}
.skill-description p {
margin: 0;
color: #555;
line-height: 1.5;
}
/* Details sections */
.skill-details {
display: flex;
flex-direction: column;
gap: 1rem;
}
.detail-group {
display: flex;
flex-direction: column;
gap: 0.3rem;
}
.detail-group label {
font-weight: 600;
font-size: 0.9rem;
color: #0033A0;
}
.detail-content {
margin: 0;
font-size: 0.95rem;
color: #444;
line-height: 1.5;
}
/* Proficiency bar */
.proficiency-bar {
display: flex;
align-items: center;
gap: 0.8rem;
margin-top: 0.3rem;
}
progress {
flex-grow: 1;
height: 8px;
border-radius: 4px;
}
progress::-webkit-progress-bar {
background-color: #f0f0f0;
border-radius: 4px;
}
progress::-webkit-progress-value {
background-color: #0033A0;
border-radius: 4px;
}
.proficiency-bar span {
font-size: 0.9rem;
font-weight: 500;
min-width: 80px;
text-align: right;
}
/* CCOG card */
.ccog-container {
margin: 1.5rem 0;
}
.ccog-card {
background: white;
border-radius: 10px;
box-shadow: 0 4px 12px rgba(0,0,0,0.08);
overflow: hidden;
border: 1px solid #e0e0e0;
}
.ccog-header {
background: #ecf0f1;
color: white;
padding: 1.2rem;
border-bottom: 2px solid rgba(255,255,255,0.1);
}
.ccog-header h3 {
margin: 0;
font-size: 1.3rem;
font-weight: 600;
}
.ccog-system {
opacity: 0.9;
font-size: 0.85rem;
margin-top: 0.3rem;
}
.ccog-levels {
padding: 1rem;
display: flex;
flex-direction: column;
gap: 0.5rem;
}
.ccog-level {
padding: 1rem;
border-radius: 6px;
position: relative;
}
.ccog-level.active {
background: #f8fafc;
border-left: 4px solid #0033A0;
}
.ccog-level.inactive {
background: #f5f5f5;
opacity: 0.7;
}
/* ESCO card */
.esco-container {
margin: 1.5rem 0;
}
.esco-card {
background: white;
border-radius: 10px;
box-shadow: 0 4px 12px rgba(0,0,0,0.08);
overflow: hidden;
border: 1px solid #e0e0e0;
}
.esco-header {
background: #ecf0f1;
color: white;
padding: 1.2rem;
border-bottom: 2px solid rgba(255,255,255,0.1);
}
.esco-header h3 {
margin: 0;
font-size: 1.3rem;
font-weight: 600;
}
.esco-system {
opacity: 0.9;
font-size: 0.85rem;
margin-top: 0.3rem;
}
.esco-levels {
padding: 1rem;
display: flex;
flex-direction: column;
gap: 0.5rem;
}
.esco-level {
padding: 1rem;
border-radius: 6px;
position: relative;
}
.esco-level.active {
background: #f8fafc;
border-left: 4px solid #0033A0;
}
.esco-level.inactive {
background: #f5f5f5;
opacity: 0.7;
}
.level-header {
display: flex;
justify-content: space-between;
margin-bottom: 0.5rem;
align-items: center;
}
.level-number {
font-weight: 600;
color: #0033A0;
font-size: 0.9rem;
}
.level-code {
background: rgba(0,51,160,0.1);
color: #0033A0;
padding: 0.2rem 0.5rem;
border-radius: 4px;
font-size: 0.8rem;
font-family: monospace;
}
.level-name {
font-weight: 500;
font-size: 1.05rem;
margin-bottom: 0.5rem;
color: #333;
}
.level-desc {
font-size: 0.9rem;
color: #555;
line-height: 1.5;
padding-top: 0.5rem;
border-top: 1px dashed #e0e0e0;
margin-top: 0.5rem;
}
/* Output Markdown */
.gr-markdown {
background: #f9f9f9;
padding: 1.5rem;
border-radius: var(--border-radius);
border-left: 4px solid var(--primary-color);
}
/* Debug Console */
.gr-textbox[label="⚠️ Console Log"] {
font-family: monospace !important;
background: #2c3e50 !important;
color: #ecf0f1 !important;
border-radius: var(--border-radius) !important;
padding: 1rem !important;
}
/* Responsive Layout */
/* For larger screens */
@media (min-width: 1200px) {
.skills-container {
grid-template-columns: repeat(auto-fit, minmax(380px, 1fr));
max-width: 1400px;
}
}
@media (max-width: 768px) {
.gr-row {
flex-direction: column !important;
}
.input-section {
margin-right: 0 !important;
margin-bottom: 1rem !important;
}
.skills-container {
grid-template-columns: 1fr;
grid-template-columns: repeat(auto-fit, minmax(350px, 1fr));
}
.skill-header {
flex-direction: column;
}
.skill-title {
flex-direction: column;
align-items: flex-start;
gap: 0.5rem;
}
.ccog-level {
padding: 0.8rem;
}
.esco-level {
padding: 0.8rem;
}
.level-name {
font-size: 1rem;
}
}
""",
head='''
'''
) as demo:
# Header section
with gr.Column():
with gr.Row():
with gr.Column():
gr.HTML("""
Position Description Review (Demo)
Use AI to standardise an initial position description.
""")
# Introduction Section
with gr.Column(elem_classes="intro-box"):
gr.Markdown("""
⏱️ Time Saver: Reduces hours of manual research and mapping to minutes and minimise risk of errors
⚖️ Reduced Recruitment Bias: Get Skills Requirements Recommendations using both the description of responsibilities and the standard ESCO skills linked the previously mapped ESCO occupation
""")
with gr.Row():
with gr.Column():
file_input = gr.File(
label="Upload a Post Description PDF file - not a scanned file!!!",
file_types=[".pdf"])
submit_btn = gr.Button(
value="✨ Analyse this Post Description! It should take about 2 minutes...",
variant="primary",
elem_classes="btn-primary"
)
with gr.Row():
with gr.Column():
gr.Markdown("### Input for Analysis")
file_name_output = gr.Textbox(label="File Name", interactive=False)
responsibilities_output = gr.Textbox(label="List of Responsibilities used for the review", lines=5, interactive=False)
with gr.Row():
with gr.Column():
gr.Markdown("### Mapped Skills")
skills_output = gr.HTML(label="", elem_classes="skills-container")
with gr.Row():
with gr.Column():
gr.Markdown("### Expected Qualifications")
qualification_output = gr.Textbox(label="", lines=5, interactive=False)
with gr.Row():
with gr.Column():
gr.Markdown("## Interview Questions")
interview_output = gr.Textbox(label="", lines=10, interactive=False)
with gr.Row():
with gr.Column():
gr.Markdown("### Mapped Job Family")
job_family_output = gr.Textbox(label="", interactive=False)
with gr.Row():
with gr.Column():
ccoq_levels_output = gr.HTML(label="", elem_classes="ccog-container")
with gr.Row():
with gr.Column():
esco_levels_output = gr.HTML(label="", elem_classes="esco-container")
with gr.Row():
with gr.Column():
esco_skills_output = gr.HTML(label="Linked ESCO Skills", elem_classes="skills-container")
with gr.Row():
with gr.Column():
download_btn = gr.Button(
value="📄 Download Word Document",
variant="primary",
elem_classes="btn-primary")
if DEBUG:
with gr.Row():
with gr.Column():
debug_console = gr.Textbox(
label="⚠️ Execution Log",
interactive=False,
elem_classes=["debug-console"]
)
temp_json_path = gr.Textbox(label="", interactive=False)
submit_btn.click(
fn=process_pdf,
inputs=file_input,
outputs=[
file_name_output,
responsibilities_output,
job_family_output,
qualification_output,
ccoq_levels_output,
interview_output,
skills_output,
esco_levels_output,
esco_skills_output,
debug_console if DEBUG else None,
temp_json_path
]
)
download_btn.click(
fn=generate_word_document,
inputs=[
temp_json_path
],
outputs=gr.File(label="Download the corresponding Word report")
)
if __name__ == "__main__":
demo.launch(show_error=True, debug=DEBUG)