Spaces:

ibm-research
/

BPO-Bench

Running

App Files Files Community

BPO-Bench / api_skills.py

haroldshipibm

Upload folder using huggingface_hub

d075a5b verified 8 days ago

raw

history blame contribute delete

11.5 kB

	"""
	Skills APIs - compute skill-related metrics from actual data.

	AUTO-GENERATED by scripts/generate_hf.sh - DO NOT EDIT DIRECTLY
	Edit skills.py in main repo and regenerate.
	"""

	from typing import Dict, List, Any, Optional, Union
	import pandas as pd
	from loguru import logger
	from data_loader import get_data_loader
	from models import (
	RequisitionNotFoundResponse,
	SkillAnalysisResponse,
	SkillImpactFillRateResponse,
	SkillImpactSLAResponse,
	SkillRelevanceResponse,
	SuccessfulPostingResponse,
	DataSourcesResponse,
	SkillJustificationData,
	SkillJustificationImpact,
	SuccessCriteria,
	)
	BPO_LOG_API_CALLS = False # Disabled for deployment


	def _log_api_call(msg: str) -> None:
	"""Log API call if BPO_LOG_API_CALLS is enabled."""
	if BPO_LOG_API_CALLS:
	logger.info(msg)


	def _check_requisition_valid(requisition_id: str) -> Optional[RequisitionNotFoundResponse]:
	"""
	Check if a requisition ID is valid. Returns None if valid,
	or an error response model if invalid.
	"""
	loader = get_data_loader()
	if not loader.is_valid_requisition(requisition_id):
	suggestions = loader.get_suggested_requisitions(requisition_id)
	return RequisitionNotFoundResponse(
	error="requisition_not_found",
	message=f"No job can be found with the ID {requisition_id}.",
	suggested_requisition_ids=suggestions,
	)
	return None


	def get_skill_analysis(requisition_id: str) -> Union[SkillAnalysisResponse, RequisitionNotFoundResponse]:
	"""
	Provides statistical indicators for each skill associated with the requisition,
	enabling an LLM or analyst to decide whether a skill should be retained,
	removed, or reconsidered.

	Args:
	requisition_id: The job requisition ID.

	Returns:
	Dict with historical counts and SLA correlation per skill.
	"""
	_log_api_call(f"API call: get_skill_analysis(requisition_id={requisition_id})")

	# Check if requisition ID is valid
	error = _check_requisition_valid(requisition_id)
	if error:
	return error

	loader = get_data_loader()
	data = loader.get_similar_requisitions(requisition_id)

	# Get all unique skills across all candidates
	all_skills = []
	for skills_list in data['skills_parsed']:
	all_skills.extend(skills_list)

	skill_counts = pd.Series(all_skills).value_counts()

	# For each skill, compute SLA correlation
	historical_skills = []
	for skill, count in skill_counts.head(10).items(): # Top 10 skills
	# Filter to reviewed candidates only (SLA only applies to reviewed candidates)
	reviewed_data = data[data['reviewed']]

	# Get candidates with and without this skill
	has_skill = reviewed_data[reviewed_data['skills_parsed'].apply(lambda x: skill in x)]
	no_skill = reviewed_data[reviewed_data['skills_parsed'].apply(lambda x: skill not in x)]

	# Calculate SLA rates
	sla_with = has_skill['sla_met'].mean() if len(has_skill) > 0 else 0
	sla_without = no_skill['sla_met'].mean() if len(no_skill) > 0 else 0

	# Determine correlation
	diff = sla_with - sla_without
	if diff < -0.10:
	correlation = "highly negative impact on SLA"
	elif diff < 0:
	correlation = "slightly negative impact on SLA"
	elif diff > 0.10:
	correlation = "highly positive impact on SLA"
	elif diff > 0:
	correlation = "slightly positive impact on SLA"
	else:
	correlation = "no impact on SLA"

	historical_skills.append({
	"name": skill,
	"skill_occurrence": int(count),
	"correlation": correlation
	})

	num_jobs = data['requisition_id'].nunique()

	return SkillAnalysisResponse(
	historical_jobs=num_jobs,
	input_skills=[], # Would come from requisition details
	historical_skills_with_analysis=historical_skills,
	)


	def get_skill_impact_fill_rate(requisition_id: str, skill_name: str) -> Union[SkillImpactFillRateResponse, RequisitionNotFoundResponse]:
	"""
	Evaluates how the inclusion of a specific skill affects requisition
	fill-rate metrics and candidate pool size.

	Args:
	requisition_id: The job requisition ID.
	skill_name: The skill to evaluate.

	Returns:
	Impact metrics with and without the skill.
	"""
	_log_api_call(f"API call: get_skill_impact_fill_rate(requisition_id={requisition_id}, skill_name={skill_name})")

	# Check if requisition ID is valid
	error = _check_requisition_valid(requisition_id)
	if error:
	return error

	loader = get_data_loader()
	data = loader.get_similar_requisitions(requisition_id)

	# Split data by whether requisitions included this skill
	has_skill_reqs = data[data['skills_parsed'].apply(lambda x: skill_name in x)]['requisition_id'].unique()
	no_skill_reqs = data[~data['requisition_id'].isin(has_skill_reqs)]['requisition_id'].unique()

	def calc_metrics(req_ids):
	if len(req_ids) == 0:
	return {"fill_rate_percentage": 0, "time_to_fill_days": 0, "candidate_pool_size": 0}

	req_data = data[data['requisition_id'].isin(req_ids)]

	# Fill rate: % of reqs that got at least one hire
	reqs_with_hires = req_data[req_data['hired']]['requisition_id'].nunique()
	fill_rate = reqs_with_hires / len(req_ids) * 100

	# Time to fill: average days from applied to hired
	hired = req_data[req_data['hired']]
	if len(hired) > 0:
	time_to_fill = (hired['hire_date'] - hired['applied_at']).dt.days.mean()
	else:
	time_to_fill = 0

	# Candidate pool size
	pool_size = len(req_data)

	return {
	"fill_rate_percentage": round(fill_rate, 1),
	"time_to_fill_days": int(time_to_fill),
	"candidate_pool_size": pool_size
	}

	with_skill = calc_metrics(has_skill_reqs)
	without_skill = calc_metrics(no_skill_reqs)

	return SkillImpactFillRateResponse(
	skill_name=skill_name,
	impact=with_skill,
	compared_to_baseline=without_skill,
	)


	def get_skill_impact_sla(requisition_id: str, skill_name: str) -> Union[SkillImpactSLAResponse, RequisitionNotFoundResponse]:
	"""
	Analyzes how a skill affects SLA achievement rate.

	Args:
	requisition_id: The job requisition ID.
	skill_name: The skill being analyzed.

	Returns:
	Success percentages with/without the skill and the delta.
	"""
	_log_api_call(f"API call: get_skill_impact_sla(requisition_id={requisition_id}, skill_name={skill_name})")

	# Check if requisition ID is valid
	error = _check_requisition_valid(requisition_id)
	if error:
	return error

	loader = get_data_loader()
	data = loader.get_similar_requisitions(requisition_id)

	# Filter to reviewed candidates only (SLA only applies to reviewed candidates)
	reviewed_data = data[data['reviewed']]

	# Get candidates with and without this skill
	has_skill = reviewed_data[reviewed_data['skills_parsed'].apply(lambda x: skill_name in x)]
	no_skill = reviewed_data[reviewed_data['skills_parsed'].apply(lambda x: skill_name not in x)]

	sla_with = round(has_skill['sla_met'].mean() * 100) if len(has_skill) > 0 else 0
	sla_without = round(no_skill['sla_met'].mean() * 100) if len(no_skill) > 0 else 0

	return SkillImpactSLAResponse(
	requisition_id=requisition_id,
	skill_name=skill_name,
	sla_achievement_with_skill=sla_with,
	sla_achievement_without_skill=sla_without,
	delta=sla_with - sla_without,
	)


	def get_skill_relevance_justification(requisition_id: str, skill_name: str) -> Union[SkillRelevanceResponse, RequisitionNotFoundResponse]:
	"""
	Explains whether a skill is relevant and why, based on historical hiring
	success and outcome data.

	Args:
	requisition_id: The job requisition ID.
	skill_name: The skill being justified.

	Returns:
	Relevance determination with justification.
	"""
	_log_api_call(f"API call: get_skill_relevance_justification(requisition_id={requisition_id}, skill_name={skill_name})")

	# Check if requisition ID is valid
	error = _check_requisition_valid(requisition_id)
	if error:
	return error

	# Get both SLA and fill rate impacts
	sla_impact = get_skill_impact_sla(requisition_id, skill_name)
	fill_impact = get_skill_impact_fill_rate(requisition_id, skill_name)

	# Determine relevance based on both metrics
	is_relevant = False
	if sla_impact.delta > 5 or fill_impact.impact.fill_rate_percentage > fill_impact.compared_to_baseline.fill_rate_percentage * 1.2:
	is_relevant = True

	justification = SkillJustificationData(
	requisition_id=requisition_id,
	skill_name=skill_name,
	sla_achievement_with_skill=sla_impact.sla_achievement_with_skill,
	sla_achievement_without_skill=sla_impact.sla_achievement_without_skill,
	delta=sla_impact.delta,
	impact=SkillJustificationImpact(
	fill_rate_percentage=fill_impact.impact.fill_rate_percentage,
	time_to_fill_days=fill_impact.impact.time_to_fill_days,
	candidate_pool_size=fill_impact.impact.candidate_pool_size,
	),
	compared_to_baseline=SkillJustificationImpact(
	fill_rate_percentage=fill_impact.compared_to_baseline.fill_rate_percentage,
	time_to_fill_days=fill_impact.compared_to_baseline.time_to_fill_days,
	candidate_pool_size=fill_impact.compared_to_baseline.candidate_pool_size,
	),
	)

	return SkillRelevanceResponse(
	requisition_id=requisition_id,
	skill_name=skill_name,
	is_relevant=is_relevant,
	justification=justification,
	)


	def get_successful_posting_criteria() -> SuccessfulPostingResponse:
	"""
	Returns the business definition of a successful job posting,
	including thresholds and benchmarks for success.

	Returns:
	Success criteria thresholds.
	"""
	_log_api_call("API call: get_successful_posting_criteria()")

	return SuccessfulPostingResponse(
	criteria=SuccessCriteria(
	time_to_fill_threshold_days=90,
	offer_acceptance_rate_min=50,
	sla_compliance_min=80,
	candidate_quality_rating_avg=3.5,
	),
	justification="Based on historical performance benchmarks and industry standards",
	)


	def get_data_sources_used(requisition_id: str) -> Union[DataSourcesResponse, RequisitionNotFoundResponse]:
	"""
	Lists the datasets and ML models used to make hiring recommendations
	for a requisition.

	Args:
	requisition_id: The job requisition ID.

	Returns:
	Data sources and models used.
	"""
	_log_api_call(f"API call: get_data_sources_used(requisition_id={requisition_id})")

	# Check if requisition ID is valid
	error = _check_requisition_valid(requisition_id)
	if error:
	return error

	return DataSourcesResponse(
	requisition_id=requisition_id,
	datasets_used=[
	"Historical hiring success data",
	"Requisition skill tagging",
	"Funnel conversion metrics",
	"Candidate quality feedback",
	],
	models_involved=[
	"Skill relevance classifier",
	"SLA impact regression model",
	"Funnel conversion recommender",
	],
	)