Spaces:

ibm-research
/

BPO-Bench

Running

App Files Files Community

BPO-Bench / data /tasks.json

haroldshipibm

Upload folder using huggingface_hub

d075a5b verified 7 days ago

raw

history blame contribute delete

54.5 kB

	[
	{
	"name": "bpo-benchmark",
	"user_info": [],
	"test_cases": [
	{
	"name": "task_1",
	"description": "Lists sources ranked by SLA success rate. \| Explanation: CyberSec Jobs was identified as the lowest-performing source because its SLA success rate is 67 %, well below Dice (80 %), LinkedIn (79 %), GitHub (78 %), and the other sources returned by the API.",
	"intent": "For requisition 05958BR, which source has the lowest SLA performance?",
	"difficulty": "easy",
	"expected_output": {
	"response": "CyberSec Jobs with 67%",
	"keywords": [
	"CyberSec Jobs",
	"67%\|67 %\|67"
	],
	"tool_calls": [
	{
	"name": "candidate_source_sla_per_source",
	"args": {}
	}
	],
	"tool_call_results": [
	{
	"name": "candidate_source_sla_per_source",
	"result": {
	"metrics": [
	{
	"source_name": "CyberSec Jobs",
	"sla_percentage": 67
	},
	{
	"source_name": "Indeed",
	"sla_percentage": 86
	},
	{
	"source_name": "GitHub",
	"sla_percentage": 90
	},
	{
	"source_name": "Dice",
	"sla_percentage": 95
	},
	{
	"source_name": "Internal",
	"sla_percentage": 95
	},
	{
	"source_name": "LinkedIn",
	"sla_percentage": 95
	},
	{
	"source_name": "Referral",
	"sla_percentage": 95
	}
	]
	}
	}
	]
	}
	},
	{
	"name": "task_2",
	"description": "Asks for the missing requisition id. \| Explanation: The query lacks a requisition ID which is required for the API call.",
	"intent": "What's the percentage of hires and the total hires per source?",
	"difficulty": "easy",
	"expected_output": {
	"response": "Can you share the requisition ID (e.g., 05958BR or 05959BR)?",
	"keywords": [
	"requisition\|req",
	"ID\|id\|identifier",
	"missing\|without\|share\|provide\|required"
	],
	"tool_calls": []
	}
	},
	{
	"name": "task_3",
	"description": "Shows each source's candidate volume and offer/hire success metrics for jobs similar to 05958BR. \| Explanation: Candidate counts and percentages were taken from the candidate-volume API; hire counts and offer-acceptance rates were taken from the recommendation-summary API. The two tables were joined on \"source_name\", producing a combined view of volume and effectiveness for the three leading sources. \| Note: Cross-references performance and volume per source. Requires joining APIs on 'source_name'.",
	"intent": "For requisitions like 05958BR, which sources provided the most candidates, and how effective were they at converting to hires?",
	"difficulty": "medium",
	"expected_output": {
	"response": "LinkedIn: 519 candidates (18%), 7 hires. Offer acceptance rate: 70%. Dice: 516 candidates (18%), 11 hires. Offer acceptance rate: 79%. GitHub: 468 candidates (16%), 10 hires. Offer acceptance rate: 77%.",
	"keywords": [
	"LinkedIn",
	"Dice",
	"GitHub",
	"Offer acceptance rate",
	"519",
	"516",
	"468",
	"18%\|18 %\|18",
	"70%\|70 %\|70",
	"79%\|79 %\|79",
	"77%\|77 %\|77",
	"hires"
	],
	"tool_calls": [
	{
	"name": "candidate_source_candidate_volume_by_source",
	"args": {}
	},
	{
	"name": "candidate_source_source_recommendation_summary",
	"args": {}
	}
	],
	"tool_call_results": [
	{
	"name": "candidate_source_candidate_volume_by_source",
	"result": {
	"job_id": "05958BR",
	"total_candidate_volume": 2913,
	"metrics": [
	{
	"source_name": "LinkedIn",
	"candidate_volume": 519,
	"percentage": 18
	},
	{
	"source_name": "Dice",
	"candidate_volume": 516,
	"percentage": 18
	},
	{
	"source_name": "GitHub",
	"candidate_volume": 468,
	"percentage": 16
	},
	{
	"source_name": "Indeed",
	"candidate_volume": 410,
	"percentage": 14
	},
	{
	"source_name": "Internal",
	"candidate_volume": 400,
	"percentage": 14
	},
	{
	"source_name": "Referral",
	"candidate_volume": 400,
	"percentage": 14
	},
	{
	"source_name": "CyberSec Jobs",
	"candidate_volume": 200,
	"percentage": 7
	}
	],
	"heading": "For requisitions similar to 05958BR, there were 2913 candidates over the past three years. Here's how many candidates came from each source (with percentages from the total number):"
	}
	},
	{
	"name": "candidate_source_source_recommendation_summary",
	"result": {
	"total_requisitions": 40,
	"metrics": [
	{
	"source_name": "CyberSec Jobs",
	"jobs_filled_percentage": 2,
	"first_round_review_percentage": 80,
	"offer_acceptance_rate": 67,
	"total_hires": 3
	},
	{
	"source_name": "Dice",
	"jobs_filled_percentage": 2,
	"first_round_review_percentage": 11,
	"offer_acceptance_rate": 79,
	"total_hires": 11
	},
	{
	"source_name": "GitHub",
	"jobs_filled_percentage": 2,
	"first_round_review_percentage": 76,
	"offer_acceptance_rate": 77,
	"total_hires": 10
	},
	{
	"source_name": "Indeed",
	"jobs_filled_percentage": 0,
	"first_round_review_percentage": 77,
	"offer_acceptance_rate": 0,
	"total_hires": 0
	},
	{
	"source_name": "Internal",
	"jobs_filled_percentage": 2,
	"first_round_review_percentage": 74,
	"offer_acceptance_rate": 70,
	"total_hires": 5
	},
	{
	"source_name": "LinkedIn",
	"jobs_filled_percentage": 2,
	"first_round_review_percentage": 75,
	"offer_acceptance_rate": 70,
	"total_hires": 7
	},
	{
	"source_name": "Referral",
	"jobs_filled_percentage": 2,
	"first_round_review_percentage": 70,
	"offer_acceptance_rate": 62,
	"total_hires": 4
	}
	]
	}
	}
	]
	}
	},
	{
	"name": "task_4",
	"description": "Asks for the missing requisition id. \| Explanation: The query lacks a requisition ID which is required for the API call.",
	"intent": "Did Dice provide a good funnel conversion rate?",
	"difficulty": "easy",
	"expected_output": {
	"response": "Can you share the requisition ID (e.g., 05958BR or 05959BR)?",
	"keywords": [
	"requisition\|req",
	"ID\|id\|identifier",
	"missing\|without\|share\|provide\|required"
	],
	"tool_calls": []
	}
	},
	{
	"name": "task_5",
	"description": "Asks for the missing requisition id. \| Explanation: The query lacks a requisition ID which is required for the API call.",
	"intent": "Should I include the skill Python? What is its impact on SLA, fill rate, and overall relevance?",
	"difficulty": "easy",
	"expected_output": {
	"response": "Can you share the requisition ID (e.g., 05958BR or 05959BR)?",
	"keywords": [
	"requisition\|req",
	"ID\|id\|identifier",
	"missing\|without\|share\|provide\|required"
	],
	"tool_calls": []
	}
	},
	{
	"name": "task_6",
	"description": "Recommends top-performing sources by combining SLA success, candidate volume, and funnel effectiveness. \| Explanation: Each source received a weighted score (50 % SLA success, 30 % candidate volume share, 20 % offer-conversion rate). Dice and LinkedIn tied for top SLA (100 %) and high volume; GitHub's best-in-class conversion (2.8 %) offset its 80 % SLA. Indeed scored 0 on SLA and offers, so it was excluded. \| Note: This benchmark tests multi-criteria decision-making and cross-API synthesis.",
	"intent": "What are the best sources to prioritize for 05959BR?",
	"difficulty": "hard",
	"expected_output": {
	"response": "You should prioritize Dice, GitHub, and LinkedIn. Dice and LinkedIn both met SLA 100% of the time and brought in 18% of all candidates. Dice had a strong offer conversion rate (2.7%), and GitHub had the highest conversion (2.8%) despite slightly lower SLA. Indeed should be avoided due to 0% SLA and 0% offer conversion.",
	"keywords": [
	"Dice",
	"GitHub",
	"LinkedIn",
	"SLA",
	"Indeed"
	],
	"tool_calls": [
	{
	"name": "candidate_source_sla_per_source",
	"args": {}
	},
	{
	"name": "candidate_source_candidate_volume_by_source",
	"args": {}
	},
	{
	"name": "candidate_source_funnel_conversion_by_source",
	"args": {}
	}
	],
	"tool_call_results": [
	{
	"name": "candidate_source_sla_per_source",
	"result": {
	"metrics": [
	{
	"source_name": "Indeed",
	"sla_percentage": 0
	},
	{
	"source_name": "CyberSec Jobs",
	"sla_percentage": 70
	},
	{
	"source_name": "GitHub",
	"sla_percentage": 80
	},
	{
	"source_name": "Internal",
	"sla_percentage": 85
	},
	{
	"source_name": "Dice",
	"sla_percentage": 100
	},
	{
	"source_name": "LinkedIn",
	"sla_percentage": 100
	},
	{
	"source_name": "Referral",
	"sla_percentage": 100
	}
	]
	}
	},
	{
	"name": "candidate_source_candidate_volume_by_source",
	"result": {
	"job_id": "05959BR",
	"total_candidate_volume": 2913,
	"metrics": [
	{
	"source_name": "Dice",
	"candidate_volume": 525,
	"percentage": 18
	},
	{
	"source_name": "LinkedIn",
	"candidate_volume": 525,
	"percentage": 18
	},
	{
	"source_name": "GitHub",
	"candidate_volume": 465,
	"percentage": 16
	},
	{
	"source_name": "Internal",
	"candidate_volume": 403,
	"percentage": 14
	},
	{
	"source_name": "Indeed",
	"candidate_volume": 400,
	"percentage": 14
	},
	{
	"source_name": "Referral",
	"candidate_volume": 400,
	"percentage": 14
	},
	{
	"source_name": "CyberSec Jobs",
	"candidate_volume": 195,
	"percentage": 7
	}
	],
	"heading": "For requisitions similar to 05959BR, there were 2913 candidates over the past three years. Here's how many candidates came from each source (with percentages from the total number):"
	}
	},
	{
	"name": "candidate_source_funnel_conversion_by_source",
	"result": {
	"job_id": "05959BR",
	"metrics": [
	{
	"source_name": "CyberSec Jobs",
	"first_round_review_percentage": 80.5,
	"interview_rate": 18.5,
	"offer_acceptance_rate": 3.1
	},
	{
	"source_name": "Dice",
	"first_round_review_percentage": 76.0,
	"interview_rate": 9.9,
	"offer_acceptance_rate": 2.7
	},
	{
	"source_name": "GitHub",
	"first_round_review_percentage": 72.0,
	"interview_rate": 16.6,
	"offer_acceptance_rate": 2.8
	},
	{
	"source_name": "Indeed",
	"first_round_review_percentage": 72.2,
	"interview_rate": 14.8,
	"offer_acceptance_rate": 0.0
	},
	{
	"source_name": "Internal",
	"first_round_review_percentage": 76.9,
	"interview_rate": 19.6,
	"offer_acceptance_rate": 2.5
	},
	{
	"source_name": "LinkedIn",
	"first_round_review_percentage": 70.1,
	"interview_rate": 21.0,
	"offer_acceptance_rate": 1.9
	},
	{
	"source_name": "Referral",
	"first_round_review_percentage": 74.5,
	"interview_rate": 20.5,
	"offer_acceptance_rate": 2.0
	}
	]
	}
	}
	]
	}
	},
	{
	"name": "task_7",
	"description": "Asks for the missing requisition id. \| Explanation: The query lacks a requisition ID which is required for the API call.",
	"intent": "Out of these skills — Python, Quantum Physics, Cyber Engineering, Risk Analysis, Wireshark — which ones negatively affect SLA performance?",
	"difficulty": "easy",
	"expected_output": {
	"response": "Can you share the requisition ID (e.g., 05958BR or 05959BR)?",
	"keywords": [
	"requisition\|req",
	"ID\|id\|identifier",
	"missing\|without\|share\|provide\|required"
	],
	"tool_calls": []
	}
	},
	{
	"name": "task_8",
	"description": "Returns the definition of the SLA metric for the given requisition. \| Explanation: The definitions-and-methodology endpoint contains a JSON field \"sla\" holding the textual definition; the agent extracted that string verbatim. \| Note: Tests the agent's ability to locate and return a specific definition.",
	"intent": "How is the SLA metric defined for 05958BR?",
	"difficulty": "easy",
	"expected_output": {
	"response": "SLA is defined as 'Percentage of candidates reviewed within the defined SLA window (e.g., 48 hours)'.",
	"keywords": [
	"SLA",
	"Percentage",
	"reviewed",
	"window"
	],
	"tool_calls": [
	{
	"name": "candidate_source_definitions_and_methodology",
	"args": {}
	}
	],
	"tool_call_results": [
	{
	"name": "candidate_source_definitions_and_methodology",
	"result": {
	"job_id": "05958BR",
	"definitions": {
	"sla": "Percentage of candidates reviewed within the defined SLA window (e.g., 48 hours)",
	"time_to_fill": "Average time from job posting to accepted offer",
	"success_rate": "Ratio of candidates who accepted offers out of those interviewed"
	},
	"calculation_notes": "Metrics are computed from 1047 requisitions over the last 1.4 years. Funnel stats are based on system timestamps and recruiter actions in ATS.",
	"top_metrics_considered": [
	"SLA %",
	"First round review %",
	"Offer acceptance rate",
	"Candidate volume",
	"Total hires"
	]
	}
	}
	]
	}
	},
	{
	"name": "task_9",
	"description": "Returns the number of requisitions used to compute the reported metrics. \| Explanation: The methodology response includes a note like \"Metrics calculated over N = 1047 requisitions\"; the agent parsed the integer 1047 and returned it. \| Note: Tests string parsing / information extraction from notes field.",
	"intent": "How many requisitions were used to compute these metrics for 05958BR?",
	"difficulty": "easy",
	"expected_output": {
	"response": "Metrics are computed from 1047 requisitions.",
	"keywords": [
	"1047",
	"requisitions"
	],
	"tool_calls": [
	{
	"name": "candidate_source_definitions_and_methodology",
	"args": {}
	}
	],
	"tool_call_results": [
	{
	"name": "candidate_source_definitions_and_methodology",
	"result": {
	"job_id": "05958BR",
	"definitions": {
	"sla": "Percentage of candidates reviewed within the defined SLA window (e.g., 48 hours)",
	"time_to_fill": "Average time from job posting to accepted offer",
	"success_rate": "Ratio of candidates who accepted offers out of those interviewed"
	},
	"calculation_notes": "Metrics are computed from 1047 requisitions over the last 1.4 years. Funnel stats are based on system timestamps and recruiter actions in ATS.",
	"top_metrics_considered": [
	"SLA %",
	"First round review %",
	"Offer acceptance rate",
	"Candidate volume",
	"Total hires"
	]
	}
	}
	]
	}
	},
	{
	"name": "task_10",
	"description": "Returns the list of top metrics considered for source evaluation. \| Explanation: The agent read the \"top_metrics_considered\" array from the methodology API response and returned the metrics in the same order. \| Note: Tests structured list extraction and formatting.",
	"intent": "What are the top metrics considered when evaluating candidate sources for 05958BR?",
	"difficulty": "easy",
	"expected_output": {
	"response": "The top metrics considered are: SLA %, First round review %, Offer acceptance rate, Candidate volume, Total hires.",
	"keywords": [
	"SLA",
	"First round review",
	"Offer acceptance",
	"Candidate volume",
	"Total hires"
	],
	"tool_calls": [
	{
	"name": "candidate_source_definitions_and_methodology",
	"args": {}
	}
	],
	"tool_call_results": [
	{
	"name": "candidate_source_definitions_and_methodology",
	"result": {
	"job_id": "05958BR",
	"definitions": {
	"sla": "Percentage of candidates reviewed within the defined SLA window (e.g., 48 hours)",
	"time_to_fill": "Average time from job posting to accepted offer",
	"success_rate": "Ratio of candidates who accepted offers out of those interviewed"
	},
	"calculation_notes": "Metrics are computed from 1047 requisitions over the last 1.4 years. Funnel stats are based on system timestamps and recruiter actions in ATS.",
	"top_metrics_considered": [
	"SLA %",
	"First round review %",
	"Offer acceptance rate",
	"Candidate volume",
	"Total hires"
	]
	}
	}
	]
	}
	},
	{
	"name": "task_11",
	"description": "Loops through the provided list of models and reports which ones were used. \| Explanation: The agent compared each provided model name against the \"models_involved\" array returned by data-sources-used API and reported matches (used) or non-matches (not used). \| Note: Tests loop-based reasoning and partial matching for list membership.",
	"intent": "Were the following models used to generate metrics for 05958BR: SLA impact regression model, Candidate ranking model, Skill relevance classifier?",
	"difficulty": "easy",
	"expected_output": {
	"response": "Yes, 'SLA impact regression model' and 'Skill relevance classifier' were used. 'Candidate ranking model' was not listed among the models involved.",
	"keywords": [
	"SLA impact regression model",
	"Skill relevance classifier",
	"Candidate ranking model"
	],
	"tool_calls": [
	{
	"name": "skills_data_sources_used",
	"args": {}
	}
	],
	"tool_call_results": [
	{
	"name": "skills_data_sources_used",
	"result": {
	"requisition_id": "05958BR",
	"datasets_used": [
	"Historical hiring success data",
	"Requisition skill tagging",
	"Funnel conversion metrics",
	"Candidate quality feedback"
	],
	"models_involved": [
	"Skill relevance classifier",
	"SLA impact regression model",
	"Funnel conversion recommender"
	]
	}
	}
	]
	}
	},
	{
	"name": "task_12",
	"description": "Loops through the provided list of data sources and reports which ones were used. \| Explanation: Each candidate data source was checked against the \"datasets_used\" array from data-sources-used API; two matched and one did not, which the agent reported accordingly. \| Note: Tests loop-based reasoning and partial matching for list membership.",
	"intent": "Were the following data sources used to compute the metrics for 05958BR: Historical hiring success data, Job description embeddings, Funnel conversion metrics?",
	"difficulty": "easy",
	"expected_output": {
	"response": "Yes, 'Historical hiring success data' and 'Funnel conversion metrics' were used. 'Job description embeddings' was not listed among the data sources.",
	"keywords": [
	"Historical hiring success data",
	"Funnel conversion metrics",
	"Job description embeddings"
	],
	"tool_calls": [
	{
	"name": "skills_data_sources_used",
	"args": {}
	}
	],
	"tool_call_results": [
	{
	"name": "skills_data_sources_used",
	"result": {
	"requisition_id": "05958BR",
	"datasets_used": [
	"Historical hiring success data",
	"Requisition skill tagging",
	"Funnel conversion metrics",
	"Candidate quality feedback"
	],
	"models_involved": [
	"Skill relevance classifier",
	"SLA impact regression model",
	"Funnel conversion recommender"
	]
	}
	}
	]
	}
	},
	{
	"name": "task_13",
	"description": "Combines model lookup, retrieves actual SLA delta, and returns SLA definition. \| Explanation: The SLA-impact API showed a 0 % delta for Python; data-sources-used API confirmed the 'SLA impact regression model' was involved; the methodology API supplied the formal SLA definition. These three pieces were combined into one coherent answer. \| Note: Agent must combine numerical result (delta), model lookup, and formal definition into unified answer.",
	"intent": "For 05958BR, when evaluating the SLA impact of Python, which models were used, what was the SLA delta, and what is the system definition of SLA?",
	"difficulty": "hard",
	"expected_output": {
	"response": "'SLA impact regression model' was used. The SLA delta for Python was 0.0%. SLA is defined as 'Percentage of candidates reviewed within the defined SLA window (e.g., 48 hours)'.",
	"keywords": [
	"SLA impact regression model",
	"0.0%\|0.0 %\|0.0\|0%\|0 %\|0",
	"SLA",
	"Percentage",
	"reviewed",
	"window"
	],
	"tool_calls": [
	{
	"name": "skills_skill_impact_sla",
	"args": {}
	},
	{
	"name": "skills_data_sources_used",
	"args": {}
	},
	{
	"name": "candidate_source_definitions_and_methodology",
	"args": {}
	}
	],
	"tool_call_results": [
	{
	"name": "skills_skill_impact_sla",
	"result": {
	"requisition_id": "05958BR",
	"skill_name": "Python",
	"sla_achievement_with_skill": 90,
	"sla_achievement_without_skill": 90,
	"delta": 0
	}
	},
	{
	"name": "skills_data_sources_used",
	"result": {
	"requisition_id": "05958BR",
	"datasets_used": [
	"Historical hiring success data",
	"Requisition skill tagging",
	"Funnel conversion metrics",
	"Candidate quality feedback"
	],
	"models_involved": [
	"Skill relevance classifier",
	"SLA impact regression model",
	"Funnel conversion recommender"
	]
	}
	},
	{
	"name": "candidate_source_definitions_and_methodology",
	"result": {
	"job_id": "05958BR",
	"definitions": {
	"sla": "Percentage of candidates reviewed within the defined SLA window (e.g., 48 hours)",
	"time_to_fill": "Average time from job posting to accepted offer",
	"success_rate": "Ratio of candidates who accepted offers out of those interviewed"
	},
	"calculation_notes": "Metrics are computed from 1047 requisitions over the last 1.4 years. Funnel stats are based on system timestamps and recruiter actions in ATS.",
	"top_metrics_considered": [
	"SLA %",
	"First round review %",
	"Offer acceptance rate",
	"Candidate volume",
	"Total hires"
	]
	}
	}
	]
	}
	},
	{
	"name": "task_14",
	"description": "States that Risk Analysis negatively affects SLA and lists the datasets that informed the analysis. \| Explanation: The skill-analysis API flagged Risk Analysis as negatively correlated with SLA. The data-sources-used API listed the four datasets underpinning the evaluation, and both results were consolidated in the response. \| Note: Correlation wording corrected to match API ('highly negative impact on SLA').",
	"intent": "Was 'Risk Analysis' considered historically effective, and what data sources informed this analysis for 05958BR?",
	"difficulty": "medium",
	"expected_output": {
	"response": "'Risk Analysis' is not considered effective: historical analysis shows it is correlated with a highly negative impact on SLA. The evaluation used these data sources: Historical hiring success data, Requisition skill tagging, Funnel conversion metrics, and Candidate quality feedback.",
	"keywords": [
	"Risk Analysis",
	"not",
	"effective",
	"highly negative impact on SLA",
	"SLA",
	"Historical hiring success data",
	"Requisition skill tagging"
	],
	"tool_calls": [
	{
	"name": "skills_skill_analysis",
	"args": {}
	},
	{
	"name": "skills_data_sources_used",
	"args": {}
	}
	],
	"tool_call_results": [
	{
	"name": "skills_skill_analysis",
	"result": {
	"historical_jobs": 40,
	"input_skills": [],
	"historical_skills_with_analysis": [
	{
	"name": "AWS",
	"skill_occurrence": 948,
	"correlation": "slightly positive impact on SLA"
	},
	{
	"name": "IT Support",
	"skill_occurrence": 868,
	"correlation": "slightly positive impact on SLA"
	},
	{
	"name": "NIST Cybersecurity Framework",
	"skill_occurrence": 816,
	"correlation": "slightly negative impact on SLA"
	},
	{
	"name": "Incident Management",
	"skill_occurrence": 748,
	"correlation": "slightly negative impact on SLA"
	},
	{
	"name": "Firewalls",
	"skill_occurrence": 744,
	"correlation": "slightly negative impact on SLA"
	},
	{
	"name": "Cloud Security",
	"skill_occurrence": 592,
	"correlation": "slightly negative impact on SLA"
	},
	{
	"name": "Risk Assessment",
	"skill_occurrence": 570,
	"correlation": "slightly negative impact on SLA"
	},
	{
	"name": "Risk Analysis",
	"skill_occurrence": 371,
	"correlation": "highly negative impact on SLA"
	},
	{
	"name": "Microsoft PowerShell",
	"skill_occurrence": 289,
	"correlation": "slightly negative impact on SLA"
	},
	{
	"name": "Python",
	"skill_occurrence": 200,
	"correlation": "slightly negative impact on SLA"
	}
	]
	}
	},
	{
	"name": "skills_data_sources_used",
	"result": {
	"requisition_id": "05958BR",
	"datasets_used": [
	"Historical hiring success data",
	"Requisition skill tagging",
	"Funnel conversion metrics",
	"Candidate quality feedback"
	],
	"models_involved": [
	"Skill relevance classifier",
	"SLA impact regression model",
	"Funnel conversion recommender"
	]
	}
	}
	]
	}
	},
	{
	"name": "task_15",
	"description": "Explains that no provided skills hurt SLA, no provided sources require avoidance, and lists datasets used. \| Explanation: The agent looped over each skill, querying SLA-impact and skill-analysis APIs; none yielded a negative delta. It then looped over each source, combining SLA-per-source and funnel-conversion data; all three met both SLA and offer benchmarks, so none were flagged. Finally, the datasets_used array was fetched once and reported. \| Note: Fully aligned with current API payloads—no unsupported claims.",
	"intent": "For requisition 05958BR, which of the following skills — Python, Quantum Physics, Wireshark — negatively impact SLA, and which of the following sources — Dice, GitHub, Referral — should be avoided based on low offer conversion or SLA performance? Also, which data sources were used to compute these insights?",
	"difficulty": "hard",
	"expected_output": {
	"response": "None of the listed skills show a negative SLA impact in the available data: Python's SLA delta is 0 %, and Quantum Physics and Wireshark are not present in the analysis. Likewise, Dice (SLA 95 %, offer-acceptance 79 %), GitHub (SLA 90 %, offer-acceptance 77 %), and Referral (SLA 95 %, offer-acceptance 62 %) all perform satisfactorily, so none of them need to be avoided. These insights were produced using the following datasets: Historical hiring success data, Requisition skill tagging, Funnel conversion metrics, and Candidate quality feedback.",
	"keywords": [
	"Python",
	"Quantum Physics",
	"Wireshark",
	"Dice",
	"GitHub",
	"Referral",
	"SLA",
	"0%\|0 %\|0",
	"95%\|95 %\|95",
	"79%\|79 %\|79",
	"90%\|90 %\|90",
	"77%\|77 %\|77",
	"62%\|62 %\|62",
	"Historical hiring success data",
	"Requisition skill tagging",
	"Funnel conversion metrics",
	"Candidate quality feedback"
	],
	"tool_calls": [
	{
	"name": "skills_skill_impact_sla",
	"args": {}
	},
	{
	"name": "skills_skill_analysis",
	"args": {}
	},
	{
	"name": "skills_data_sources_used",
	"args": {}
	},
	{
	"name": "candidate_source_funnel_conversion_by_source",
	"args": {}
	},
	{
	"name": "candidate_source_sla_per_source",
	"args": {}
	}
	],
	"tool_call_results": [
	{
	"name": "skills_skill_impact_sla",
	"result": {
	"requisition_id": "05958BR",
	"skill_name": "Python",
	"sla_achievement_with_skill": 90,
	"sla_achievement_without_skill": 90,
	"delta": 0
	}
	},
	{
	"name": "skills_skill_analysis",
	"result": {
	"historical_jobs": 40,
	"input_skills": [],
	"historical_skills_with_analysis": [
	{
	"name": "AWS",
	"skill_occurrence": 948,
	"correlation": "slightly positive impact on SLA"
	},
	{
	"name": "IT Support",
	"skill_occurrence": 868,
	"correlation": "slightly positive impact on SLA"
	},
	{
	"name": "NIST Cybersecurity Framework",
	"skill_occurrence": 816,
	"correlation": "slightly negative impact on SLA"
	},
	{
	"name": "Incident Management",
	"skill_occurrence": 748,
	"correlation": "slightly negative impact on SLA"
	},
	{
	"name": "Firewalls",
	"skill_occurrence": 744,
	"correlation": "slightly negative impact on SLA"
	},
	{
	"name": "Cloud Security",
	"skill_occurrence": 592,
	"correlation": "slightly negative impact on SLA"
	},
	{
	"name": "Risk Assessment",
	"skill_occurrence": 570,
	"correlation": "slightly negative impact on SLA"
	},
	{
	"name": "Risk Analysis",
	"skill_occurrence": 371,
	"correlation": "highly negative impact on SLA"
	},
	{
	"name": "Microsoft PowerShell",
	"skill_occurrence": 289,
	"correlation": "slightly negative impact on SLA"
	},
	{
	"name": "Python",
	"skill_occurrence": 200,
	"correlation": "slightly negative impact on SLA"
	}
	]
	}
	},
	{
	"name": "skills_data_sources_used",
	"result": {
	"requisition_id": "05958BR",
	"datasets_used": [
	"Historical hiring success data",
	"Requisition skill tagging",
	"Funnel conversion metrics",
	"Candidate quality feedback"
	],
	"models_involved": [
	"Skill relevance classifier",
	"SLA impact regression model",
	"Funnel conversion recommender"
	]
	}
	},
	{
	"name": "candidate_source_funnel_conversion_by_source",
	"result": {
	"job_id": "05958BR",
	"metrics": [
	{
	"source_name": "CyberSec Jobs",
	"first_round_review_percentage": 80.5,
	"interview_rate": 19.0,
	"offer_acceptance_rate": 3.0
	},
	{
	"source_name": "Dice",
	"first_round_review_percentage": 11.0,
	"interview_rate": 6.8,
	"offer_acceptance_rate": 2.7
	},
	{
	"source_name": "GitHub",
	"first_round_review_percentage": 76.1,
	"interview_rate": 23.7,
	"offer_acceptance_rate": 2.8
	},
	{
	"source_name": "Indeed",
	"first_round_review_percentage": 77.1,
	"interview_rate": 22.0,
	"offer_acceptance_rate": 0.0
	},
	{
	"source_name": "Internal",
	"first_round_review_percentage": 74.0,
	"interview_rate": 18.5,
	"offer_acceptance_rate": 2.5
	},
	{
	"source_name": "LinkedIn",
	"first_round_review_percentage": 75.1,
	"interview_rate": 20.4,
	"offer_acceptance_rate": 1.9
	},
	{
	"source_name": "Referral",
	"first_round_review_percentage": 70.0,
	"interview_rate": 17.0,
	"offer_acceptance_rate": 2.0
	}
	]
	}
	},
	{
	"name": "candidate_source_sla_per_source",
	"result": {
	"metrics": [
	{
	"source_name": "CyberSec Jobs",
	"sla_percentage": 67
	},
	{
	"source_name": "Indeed",
	"sla_percentage": 86
	},
	{
	"source_name": "GitHub",
	"sla_percentage": 90
	},
	{
	"source_name": "Dice",
	"sla_percentage": 95
	},
	{
	"source_name": "Internal",
	"sla_percentage": 95
	},
	{
	"source_name": "LinkedIn",
	"sla_percentage": 95
	},
	{
	"source_name": "Referral",
	"sla_percentage": 95
	}
	]
	}
	}
	]
	}
	},
	{
	"name": "task_16",
	"description": "Apologises for missing capability; optionally provides an illustrative optimisation if the feature existed. \| Explanation: No endpoint returns or accepts free-text job descriptions, so optimisation is unsupported. The hypothetical section shows what the answer could look like if such an endpoint were added. \| Note: Demonstrates graceful degradation plus a forward-looking example.",
	"intent": "Help me optimize the job description for 05959BR.",
	"difficulty": "easy",
	"expected_output": {
	"response": "Sorry — the available APIs do not expose the current job-description text, so I cannot directly optimise it.",
	"keywords": [
	"APIs\|API",
	"job-description\|job description",
	"cannot\|can't"
	],
	"tool_calls": []
	}
	},
	{
	"name": "task_17",
	"description": "Prompts the user for the missing job ID instead of guessing. \| Explanation: Illustrates a clarification turn when a required parameter (requisition_id) is missing. \| Note: Tests conversational error-handling with zero API usage.",
	"intent": "Which sourcing channel is the most effective for this job?",
	"difficulty": "easy",
	"expected_output": {
	"response": "Can you share the requisition ID (e.g., 05958BR or 05959BR)?",
	"keywords": [
	"requisition\|req",
	"ID\|id\|identifier",
	"missing\|without\|share\|provide\|required"
	],
	"tool_calls": []
	}
	},
	{
	"name": "task_18",
	"description": "Explains unsupported request; shows what a successful answer might include. \| Explanation: No endpoints expose hiring-manager data; a possible future answer is sketched for context. \| Note: Covers gap #3: hiring-manager analytics.",
	"intent": "Who's the hiring manager for 05959BR and how responsive is she?",
	"difficulty": "easy",
	"expected_output": {
	"response": "Sorry — current APIs don't return hiring-manager names or responsiveness metrics, so I can't answer that.",
	"keywords": [
	"APIs",
	"hiring-manager",
	"responsive",
	"can't\|cannot"
	],
	"tool_calls": []
	}
	},
	{
	"name": "task_19",
	"description": "States limitation; optional mock table shows desired granularity. \| Explanation: The existing funnel-conversion API only returns percentages by source, not absolute counts or durations per stage. \| Note: Covers gap #4: full funnel metrics.",
	"intent": "Show me the average candidate counts and days spent in each funnel stage for roles like 05959BR.",
	"difficulty": "easy",
	"expected_output": {
	"response": "I'm sorry, but none of the available APIs provide stage-by-stage candidate counts or time-in-status metrics, so I can't generate a funnel table.",
	"keywords": [
	"APIs\|API",
	"stage",
	"candidate counts",
	"time-in-status",
	"funnel"
	],
	"tool_calls": []
	}
	},
	{
	"name": "task_20",
	"description": "Returns start/end dates, last update date, and requisition count. \| Explanation: Pulled time_frame_start, time_frame_end, data_last_updated and total_requisitions_analysed from the metadata-and-timeframe endpoint. \| Note: Demonstrates a fully supported recency / frequency query (gap #5).",
	"intent": "What's the data timeframe for 05958BR and how many similar requisitions were analysed?",
	"difficulty": "easy",
	"expected_output": {
	"response": "The metrics cover 9 Oct 2023 – 15 Mar 2025 (≈ 18 months) and were last updated on 29 Apr 2025. A total of 40 similar requisitions were analysed.",
	"keywords": [
	"9 Oct 2023\|Oct 9, 2023",
	"15 Mar 2025\|Mar 15, 2025",
	"29 Apr 2025\|Apr 29, 2025",
	"40",
	"requisitions"
	],
	"tool_calls": [
	{
	"name": "candidate_source_metadata_and_timeframe",
	"args": {}
	}
	],
	"tool_call_results": [
	{
	"name": "candidate_source_metadata_and_timeframe",
	"result": {
	"job_id": "05958BR",
	"time_frame_start": "2023-10-09",
	"time_frame_end": "2025-03-15",
	"data_last_updated": "2025-04-29",
	"total_requisitions_analysed": 40
	}
	}
	]
	}
	},
	{
	"name": "task_21",
	"description": "Gracefully declines unsupported analysis; shows sample comparative table. \| Explanation: Only the definition endpoint describes what time-to-fill means; no endpoint provides per-source values to compute deltas. \| Note: Covers gap #6.",
	"intent": "How does average time-to-fill compare to SLA for each sourcing channel?",
	"difficulty": "easy",
	"expected_output": {
	"response": "I'm afraid I can't provide that comparison because the current APIs don't expose time-to-fill broken down by source.",
	"keywords": [
	"APIs",
	"time-to-fill",
	"source"
	],
	"tool_calls": []
	}
	},
	{
	"name": "task_22",
	"description": "Explains unsupported geographic/channel filter and gives mock estimation. \| Explanation: No endpoints accept country/channel parameters; the example illustrates desired behaviour. \| Note: Covers gap #7: region & channel-specific analytics.",
	"intent": "If we posted this role internally in France only, how long would it take to fill?",
	"difficulty": "easy",
	"expected_output": {
	"response": "Current APIs don't allow filtering by geography or posting channel, so I'm unable to estimate a France-only internal posting timeline.",
	"keywords": [
	"Sorry\|no\|unable\|can't\|cannot",
	"APIs\|API\|filtering\|filter",
	"France\|geography\|geographic\|region\|channel",
	"internal\|posting"
	],
	"tool_calls": []
	}
	},
	{
	"name": "task_23",
	"description": "States limitation and shows example list of near-deadline requisitions. \| Explanation: No endpoint surfaces open requisitions with SLA dates. The hypothetical section demonstrates the desired list format. \| Note: Covers gap #8 and introduces a potential future endpoint.",
	"intent": "List all requisitions that are within 30 days of their SLA deadline.",
	"difficulty": "easy",
	"expected_output": {
	"response": "Sorry — the API suite doesn't provide live requisition status or SLA countdowns, so I can't generate that list.",
	"keywords": [
	"API\|APIs",
	"SLA",
	"requisition\|req",
	"status",
	"countdown\|countdowns",
	"deadline\|list"
	],
	"tool_calls": []
	}
	},
	{
	"name": "task_24",
	"description": "Graceful 'ID not found' message with suggested alternatives. \| Explanation: Because 05960BR does not exist, the assistant returns a polite error plus four close-match IDs (simulating fuzzy search in the ATS). No API call is made for a bad ID. \| Note: Error-handling scenario for invalid requisition IDs.",
	"intent": "Show candidate funnel for job id 05960BR",
	"difficulty": "easy",
	"expected_output": {
	"response": "No job can be found with the ID 05960BR.\nDid you want to use one of the following job IDs instead?\n• UZLXBR — Sourcing Manager\n• F50HBR — Offering Manager\n• MJZ1BR — Offering Manager\n• 5TTKBR — Delivery Analyst",
	"keywords": [
	"05960BR",
	"No job",
	"can be found\|not found"
	],
	"tool_calls": []
	}
	},
	{
	"name": "task_25",
	"description": "Explains unsupported request and sketches desired output. \| Explanation: There is no /job-details/ endpoint. The hypothetical section illustrates what the response would look like if such an endpoint became available. \| Note: Completes coverage for full requisition card' requests.",
	"intent": "Show me the details of UZLXBR",
	"difficulty": "easy",
	"expected_output": {
	"response": "Sorry — none of the current APIs provide full job-card details (title, location, hiring-manager email, etc.), so I can't display that information.",
	"keywords": [
	"APIs",
	"job-card",
	"details"
	],
	"tool_calls": []
	}
	},
	{
	"name": "task_26",
	"description": "Returns average candidate count for comparable requisitions. \| Explanation: candidate-volume-by-source returns `total_candidate_volume = 2913`; metadata-and-timeframe shows `total_requisitions_analysed = 40`. Dividing 2913 ÷ 40 ≈ 73 yields the average. \| Note: Covers the repeated average candidate volume questions.",
	"intent": "How many candidates do we usually get for postings similar to 05959BR?",
	"difficulty": "medium",
	"expected_output": {
	"response": "On average, similar postings attract 73 candidates.",
	"keywords": [
	"73",
	"candidates",
	"average"
	],
	"tool_calls": [
	{
	"name": "candidate_source_candidate_volume_by_source",
	"args": {}
	},
	{
	"name": "candidate_source_metadata_and_timeframe",
	"args": {}
	}
	],
	"tool_call_results": [
	{
	"name": "candidate_source_candidate_volume_by_source",
	"result": {
	"job_id": "05959BR",
	"total_candidate_volume": 2913,
	"metrics": [
	{
	"source_name": "Dice",
	"candidate_volume": 525,
	"percentage": 18
	},
	{
	"source_name": "LinkedIn",
	"candidate_volume": 525,
	"percentage": 18
	},
	{
	"source_name": "GitHub",
	"candidate_volume": 465,
	"percentage": 16
	},
	{
	"source_name": "Internal",
	"candidate_volume": 403,
	"percentage": 14
	},
	{
	"source_name": "Indeed",
	"candidate_volume": 400,
	"percentage": 14
	},
	{
	"source_name": "Referral",
	"candidate_volume": 400,
	"percentage": 14
	},
	{
	"source_name": "CyberSec Jobs",
	"candidate_volume": 195,
	"percentage": 7
	}
	],
	"heading": "For requisitions similar to 05959BR, there were 2913 candidates over the past three years. Here's how many candidates came from each source (with percentages from the total number):"
	}
	},
	{
	"name": "candidate_source_metadata_and_timeframe",
	"result": {
	"job_id": "05959BR",
	"time_frame_start": "2023-10-09",
	"time_frame_end": "2025-03-15",
	"data_last_updated": "2025-04-29",
	"total_requisitions_analysed": 40
	}
	}
	]
	}
	}
	]
	}
	]