""" Skills APIs - compute skill-related metrics from actual data. AUTO-GENERATED by scripts/generate_hf.sh - DO NOT EDIT DIRECTLY Edit skills.py in main repo and regenerate. """ from typing import Dict, List, Any, Optional, Union import pandas as pd from loguru import logger from data_loader import get_data_loader from models import ( RequisitionNotFoundResponse, SkillAnalysisResponse, SkillImpactFillRateResponse, SkillImpactSLAResponse, SkillRelevanceResponse, SuccessfulPostingResponse, DataSourcesResponse, SkillJustificationData, SkillJustificationImpact, SuccessCriteria, ) BPO_LOG_API_CALLS = False # Disabled for deployment def _log_api_call(msg: str) -> None: """Log API call if BPO_LOG_API_CALLS is enabled.""" if BPO_LOG_API_CALLS: logger.info(msg) def _check_requisition_valid(requisition_id: str) -> Optional[RequisitionNotFoundResponse]: """ Check if a requisition ID is valid. Returns None if valid, or an error response model if invalid. """ loader = get_data_loader() if not loader.is_valid_requisition(requisition_id): suggestions = loader.get_suggested_requisitions(requisition_id) return RequisitionNotFoundResponse( error="requisition_not_found", message=f"No job can be found with the ID {requisition_id}.", suggested_requisition_ids=suggestions, ) return None def get_skill_analysis(requisition_id: str) -> Union[SkillAnalysisResponse, RequisitionNotFoundResponse]: """ Provides statistical indicators for each skill associated with the requisition, enabling an LLM or analyst to decide whether a skill should be retained, removed, or reconsidered. Args: requisition_id: The job requisition ID. Returns: Dict with historical counts and SLA correlation per skill. """ _log_api_call(f"API call: get_skill_analysis(requisition_id={requisition_id})") # Check if requisition ID is valid error = _check_requisition_valid(requisition_id) if error: return error loader = get_data_loader() data = loader.get_similar_requisitions(requisition_id) # Get all unique skills across all candidates all_skills = [] for skills_list in data['skills_parsed']: all_skills.extend(skills_list) skill_counts = pd.Series(all_skills).value_counts() # For each skill, compute SLA correlation historical_skills = [] for skill, count in skill_counts.head(10).items(): # Top 10 skills # Filter to reviewed candidates only (SLA only applies to reviewed candidates) reviewed_data = data[data['reviewed']] # Get candidates with and without this skill has_skill = reviewed_data[reviewed_data['skills_parsed'].apply(lambda x: skill in x)] no_skill = reviewed_data[reviewed_data['skills_parsed'].apply(lambda x: skill not in x)] # Calculate SLA rates sla_with = has_skill['sla_met'].mean() if len(has_skill) > 0 else 0 sla_without = no_skill['sla_met'].mean() if len(no_skill) > 0 else 0 # Determine correlation diff = sla_with - sla_without if diff < -0.10: correlation = "highly negative impact on SLA" elif diff < 0: correlation = "slightly negative impact on SLA" elif diff > 0.10: correlation = "highly positive impact on SLA" elif diff > 0: correlation = "slightly positive impact on SLA" else: correlation = "no impact on SLA" historical_skills.append({ "name": skill, "skill_occurrence": int(count), "correlation": correlation }) num_jobs = data['requisition_id'].nunique() return SkillAnalysisResponse( historical_jobs=num_jobs, input_skills=[], # Would come from requisition details historical_skills_with_analysis=historical_skills, ) def get_skill_impact_fill_rate(requisition_id: str, skill_name: str) -> Union[SkillImpactFillRateResponse, RequisitionNotFoundResponse]: """ Evaluates how the inclusion of a specific skill affects requisition fill-rate metrics and candidate pool size. Args: requisition_id: The job requisition ID. skill_name: The skill to evaluate. Returns: Impact metrics with and without the skill. """ _log_api_call(f"API call: get_skill_impact_fill_rate(requisition_id={requisition_id}, skill_name={skill_name})") # Check if requisition ID is valid error = _check_requisition_valid(requisition_id) if error: return error loader = get_data_loader() data = loader.get_similar_requisitions(requisition_id) # Split data by whether requisitions included this skill has_skill_reqs = data[data['skills_parsed'].apply(lambda x: skill_name in x)]['requisition_id'].unique() no_skill_reqs = data[~data['requisition_id'].isin(has_skill_reqs)]['requisition_id'].unique() def calc_metrics(req_ids): if len(req_ids) == 0: return {"fill_rate_percentage": 0, "time_to_fill_days": 0, "candidate_pool_size": 0} req_data = data[data['requisition_id'].isin(req_ids)] # Fill rate: % of reqs that got at least one hire reqs_with_hires = req_data[req_data['hired']]['requisition_id'].nunique() fill_rate = reqs_with_hires / len(req_ids) * 100 # Time to fill: average days from applied to hired hired = req_data[req_data['hired']] if len(hired) > 0: time_to_fill = (hired['hire_date'] - hired['applied_at']).dt.days.mean() else: time_to_fill = 0 # Candidate pool size pool_size = len(req_data) return { "fill_rate_percentage": round(fill_rate, 1), "time_to_fill_days": int(time_to_fill), "candidate_pool_size": pool_size } with_skill = calc_metrics(has_skill_reqs) without_skill = calc_metrics(no_skill_reqs) return SkillImpactFillRateResponse( skill_name=skill_name, impact=with_skill, compared_to_baseline=without_skill, ) def get_skill_impact_sla(requisition_id: str, skill_name: str) -> Union[SkillImpactSLAResponse, RequisitionNotFoundResponse]: """ Analyzes how a skill affects SLA achievement rate. Args: requisition_id: The job requisition ID. skill_name: The skill being analyzed. Returns: Success percentages with/without the skill and the delta. """ _log_api_call(f"API call: get_skill_impact_sla(requisition_id={requisition_id}, skill_name={skill_name})") # Check if requisition ID is valid error = _check_requisition_valid(requisition_id) if error: return error loader = get_data_loader() data = loader.get_similar_requisitions(requisition_id) # Filter to reviewed candidates only (SLA only applies to reviewed candidates) reviewed_data = data[data['reviewed']] # Get candidates with and without this skill has_skill = reviewed_data[reviewed_data['skills_parsed'].apply(lambda x: skill_name in x)] no_skill = reviewed_data[reviewed_data['skills_parsed'].apply(lambda x: skill_name not in x)] sla_with = round(has_skill['sla_met'].mean() * 100) if len(has_skill) > 0 else 0 sla_without = round(no_skill['sla_met'].mean() * 100) if len(no_skill) > 0 else 0 return SkillImpactSLAResponse( requisition_id=requisition_id, skill_name=skill_name, sla_achievement_with_skill=sla_with, sla_achievement_without_skill=sla_without, delta=sla_with - sla_without, ) def get_skill_relevance_justification(requisition_id: str, skill_name: str) -> Union[SkillRelevanceResponse, RequisitionNotFoundResponse]: """ Explains whether a skill is relevant and why, based on historical hiring success and outcome data. Args: requisition_id: The job requisition ID. skill_name: The skill being justified. Returns: Relevance determination with justification. """ _log_api_call(f"API call: get_skill_relevance_justification(requisition_id={requisition_id}, skill_name={skill_name})") # Check if requisition ID is valid error = _check_requisition_valid(requisition_id) if error: return error # Get both SLA and fill rate impacts sla_impact = get_skill_impact_sla(requisition_id, skill_name) fill_impact = get_skill_impact_fill_rate(requisition_id, skill_name) # Determine relevance based on both metrics is_relevant = False if sla_impact.delta > 5 or fill_impact.impact.fill_rate_percentage > fill_impact.compared_to_baseline.fill_rate_percentage * 1.2: is_relevant = True justification = SkillJustificationData( requisition_id=requisition_id, skill_name=skill_name, sla_achievement_with_skill=sla_impact.sla_achievement_with_skill, sla_achievement_without_skill=sla_impact.sla_achievement_without_skill, delta=sla_impact.delta, impact=SkillJustificationImpact( fill_rate_percentage=fill_impact.impact.fill_rate_percentage, time_to_fill_days=fill_impact.impact.time_to_fill_days, candidate_pool_size=fill_impact.impact.candidate_pool_size, ), compared_to_baseline=SkillJustificationImpact( fill_rate_percentage=fill_impact.compared_to_baseline.fill_rate_percentage, time_to_fill_days=fill_impact.compared_to_baseline.time_to_fill_days, candidate_pool_size=fill_impact.compared_to_baseline.candidate_pool_size, ), ) return SkillRelevanceResponse( requisition_id=requisition_id, skill_name=skill_name, is_relevant=is_relevant, justification=justification, ) def get_successful_posting_criteria() -> SuccessfulPostingResponse: """ Returns the business definition of a successful job posting, including thresholds and benchmarks for success. Returns: Success criteria thresholds. """ _log_api_call("API call: get_successful_posting_criteria()") return SuccessfulPostingResponse( criteria=SuccessCriteria( time_to_fill_threshold_days=90, offer_acceptance_rate_min=50, sla_compliance_min=80, candidate_quality_rating_avg=3.5, ), justification="Based on historical performance benchmarks and industry standards", ) def get_data_sources_used(requisition_id: str) -> Union[DataSourcesResponse, RequisitionNotFoundResponse]: """ Lists the datasets and ML models used to make hiring recommendations for a requisition. Args: requisition_id: The job requisition ID. Returns: Data sources and models used. """ _log_api_call(f"API call: get_data_sources_used(requisition_id={requisition_id})") # Check if requisition ID is valid error = _check_requisition_valid(requisition_id) if error: return error return DataSourcesResponse( requisition_id=requisition_id, datasets_used=[ "Historical hiring success data", "Requisition skill tagging", "Funnel conversion metrics", "Candidate quality feedback", ], models_involved=[ "Skill relevance classifier", "SLA impact regression model", "Funnel conversion recommender", ], )