[ { "name": "bpo-benchmark", "user_info": [], "test_cases": [ { "name": "task_1", "description": "Lists sources ranked by SLA success rate. | Explanation: CyberSec Jobs was identified as the lowest-performing source because its SLA success rate is 67 %, well below Dice (80 %), LinkedIn (79 %), GitHub (78 %), and the other sources returned by the API.", "intent": "For requisition 05958BR, which source has the lowest SLA performance?", "difficulty": "easy", "expected_output": { "response": "CyberSec Jobs with 67%", "keywords": [ "CyberSec Jobs", "67%|67 %|67" ], "tool_calls": [ { "name": "candidate_source_sla_per_source", "args": {} } ], "tool_call_results": [ { "name": "candidate_source_sla_per_source", "result": { "metrics": [ { "source_name": "CyberSec Jobs", "sla_percentage": 67 }, { "source_name": "Indeed", "sla_percentage": 86 }, { "source_name": "GitHub", "sla_percentage": 90 }, { "source_name": "Dice", "sla_percentage": 95 }, { "source_name": "Internal", "sla_percentage": 95 }, { "source_name": "LinkedIn", "sla_percentage": 95 }, { "source_name": "Referral", "sla_percentage": 95 } ] } } ] } }, { "name": "task_2", "description": "Asks for the missing requisition id. | Explanation: The query lacks a requisition ID which is required for the API call.", "intent": "What's the percentage of hires and the total hires per source?", "difficulty": "easy", "expected_output": { "response": "Can you share the requisition ID (e.g., 05958BR or 05959BR)?", "keywords": [ "requisition|req", "ID|id|identifier", "missing|without|share|provide|required" ], "tool_calls": [] } }, { "name": "task_3", "description": "Shows each source's candidate volume and offer/hire success metrics for jobs similar to 05958BR. | Explanation: Candidate counts and percentages were taken from the candidate-volume API; hire counts and offer-acceptance rates were taken from the recommendation-summary API. The two tables were joined on \"source_name\", producing a combined view of volume and effectiveness for the three leading sources. | Note: Cross-references performance and volume per source. Requires joining APIs on 'source_name'.", "intent": "For requisitions like 05958BR, which sources provided the most candidates, and how effective were they at converting to hires?", "difficulty": "medium", "expected_output": { "response": "LinkedIn: 519 candidates (18%), 7 hires. Offer acceptance rate: 70%. Dice: 516 candidates (18%), 11 hires. Offer acceptance rate: 79%. GitHub: 468 candidates (16%), 10 hires. Offer acceptance rate: 77%.", "keywords": [ "LinkedIn", "Dice", "GitHub", "Offer acceptance rate", "519", "516", "468", "18%|18 %|18", "70%|70 %|70", "79%|79 %|79", "77%|77 %|77", "hires" ], "tool_calls": [ { "name": "candidate_source_candidate_volume_by_source", "args": {} }, { "name": "candidate_source_source_recommendation_summary", "args": {} } ], "tool_call_results": [ { "name": "candidate_source_candidate_volume_by_source", "result": { "job_id": "05958BR", "total_candidate_volume": 2913, "metrics": [ { "source_name": "LinkedIn", "candidate_volume": 519, "percentage": 18 }, { "source_name": "Dice", "candidate_volume": 516, "percentage": 18 }, { "source_name": "GitHub", "candidate_volume": 468, "percentage": 16 }, { "source_name": "Indeed", "candidate_volume": 410, "percentage": 14 }, { "source_name": "Internal", "candidate_volume": 400, "percentage": 14 }, { "source_name": "Referral", "candidate_volume": 400, "percentage": 14 }, { "source_name": "CyberSec Jobs", "candidate_volume": 200, "percentage": 7 } ], "heading": "For requisitions similar to 05958BR, there were 2913 candidates over the past three years. Here's how many candidates came from each source (with percentages from the total number):" } }, { "name": "candidate_source_source_recommendation_summary", "result": { "total_requisitions": 40, "metrics": [ { "source_name": "CyberSec Jobs", "jobs_filled_percentage": 2, "first_round_review_percentage": 80, "offer_acceptance_rate": 67, "total_hires": 3 }, { "source_name": "Dice", "jobs_filled_percentage": 2, "first_round_review_percentage": 11, "offer_acceptance_rate": 79, "total_hires": 11 }, { "source_name": "GitHub", "jobs_filled_percentage": 2, "first_round_review_percentage": 76, "offer_acceptance_rate": 77, "total_hires": 10 }, { "source_name": "Indeed", "jobs_filled_percentage": 0, "first_round_review_percentage": 77, "offer_acceptance_rate": 0, "total_hires": 0 }, { "source_name": "Internal", "jobs_filled_percentage": 2, "first_round_review_percentage": 74, "offer_acceptance_rate": 70, "total_hires": 5 }, { "source_name": "LinkedIn", "jobs_filled_percentage": 2, "first_round_review_percentage": 75, "offer_acceptance_rate": 70, "total_hires": 7 }, { "source_name": "Referral", "jobs_filled_percentage": 2, "first_round_review_percentage": 70, "offer_acceptance_rate": 62, "total_hires": 4 } ] } } ] } }, { "name": "task_4", "description": "Asks for the missing requisition id. | Explanation: The query lacks a requisition ID which is required for the API call.", "intent": "Did Dice provide a good funnel conversion rate?", "difficulty": "easy", "expected_output": { "response": "Can you share the requisition ID (e.g., 05958BR or 05959BR)?", "keywords": [ "requisition|req", "ID|id|identifier", "missing|without|share|provide|required" ], "tool_calls": [] } }, { "name": "task_5", "description": "Asks for the missing requisition id. | Explanation: The query lacks a requisition ID which is required for the API call.", "intent": "Should I include the skill Python? What is its impact on SLA, fill rate, and overall relevance?", "difficulty": "easy", "expected_output": { "response": "Can you share the requisition ID (e.g., 05958BR or 05959BR)?", "keywords": [ "requisition|req", "ID|id|identifier", "missing|without|share|provide|required" ], "tool_calls": [] } }, { "name": "task_6", "description": "Recommends top-performing sources by combining SLA success, candidate volume, and funnel effectiveness. | Explanation: Each source received a weighted score (50 % SLA success, 30 % candidate volume share, 20 % offer-conversion rate). Dice and LinkedIn tied for top SLA (100 %) and high volume; GitHub's best-in-class conversion (2.8 %) offset its 80 % SLA. Indeed scored 0 on SLA and offers, so it was excluded. | Note: This benchmark tests multi-criteria decision-making and cross-API synthesis.", "intent": "What are the best sources to prioritize for 05959BR?", "difficulty": "hard", "expected_output": { "response": "You should prioritize Dice, GitHub, and LinkedIn. Dice and LinkedIn both met SLA 100% of the time and brought in 18% of all candidates. Dice had a strong offer conversion rate (2.7%), and GitHub had the highest conversion (2.8%) despite slightly lower SLA. Indeed should be avoided due to 0% SLA and 0% offer conversion.", "keywords": [ "Dice", "GitHub", "LinkedIn", "SLA", "Indeed" ], "tool_calls": [ { "name": "candidate_source_sla_per_source", "args": {} }, { "name": "candidate_source_candidate_volume_by_source", "args": {} }, { "name": "candidate_source_funnel_conversion_by_source", "args": {} } ], "tool_call_results": [ { "name": "candidate_source_sla_per_source", "result": { "metrics": [ { "source_name": "Indeed", "sla_percentage": 0 }, { "source_name": "CyberSec Jobs", "sla_percentage": 70 }, { "source_name": "GitHub", "sla_percentage": 80 }, { "source_name": "Internal", "sla_percentage": 85 }, { "source_name": "Dice", "sla_percentage": 100 }, { "source_name": "LinkedIn", "sla_percentage": 100 }, { "source_name": "Referral", "sla_percentage": 100 } ] } }, { "name": "candidate_source_candidate_volume_by_source", "result": { "job_id": "05959BR", "total_candidate_volume": 2913, "metrics": [ { "source_name": "Dice", "candidate_volume": 525, "percentage": 18 }, { "source_name": "LinkedIn", "candidate_volume": 525, "percentage": 18 }, { "source_name": "GitHub", "candidate_volume": 465, "percentage": 16 }, { "source_name": "Internal", "candidate_volume": 403, "percentage": 14 }, { "source_name": "Indeed", "candidate_volume": 400, "percentage": 14 }, { "source_name": "Referral", "candidate_volume": 400, "percentage": 14 }, { "source_name": "CyberSec Jobs", "candidate_volume": 195, "percentage": 7 } ], "heading": "For requisitions similar to 05959BR, there were 2913 candidates over the past three years. Here's how many candidates came from each source (with percentages from the total number):" } }, { "name": "candidate_source_funnel_conversion_by_source", "result": { "job_id": "05959BR", "metrics": [ { "source_name": "CyberSec Jobs", "first_round_review_percentage": 80.5, "interview_rate": 18.5, "offer_acceptance_rate": 3.1 }, { "source_name": "Dice", "first_round_review_percentage": 76.0, "interview_rate": 9.9, "offer_acceptance_rate": 2.7 }, { "source_name": "GitHub", "first_round_review_percentage": 72.0, "interview_rate": 16.6, "offer_acceptance_rate": 2.8 }, { "source_name": "Indeed", "first_round_review_percentage": 72.2, "interview_rate": 14.8, "offer_acceptance_rate": 0.0 }, { "source_name": "Internal", "first_round_review_percentage": 76.9, "interview_rate": 19.6, "offer_acceptance_rate": 2.5 }, { "source_name": "LinkedIn", "first_round_review_percentage": 70.1, "interview_rate": 21.0, "offer_acceptance_rate": 1.9 }, { "source_name": "Referral", "first_round_review_percentage": 74.5, "interview_rate": 20.5, "offer_acceptance_rate": 2.0 } ] } } ] } }, { "name": "task_7", "description": "Asks for the missing requisition id. | Explanation: The query lacks a requisition ID which is required for the API call.", "intent": "Out of these skills — Python, Quantum Physics, Cyber Engineering, Risk Analysis, Wireshark — which ones negatively affect SLA performance?", "difficulty": "easy", "expected_output": { "response": "Can you share the requisition ID (e.g., 05958BR or 05959BR)?", "keywords": [ "requisition|req", "ID|id|identifier", "missing|without|share|provide|required" ], "tool_calls": [] } }, { "name": "task_8", "description": "Returns the definition of the SLA metric for the given requisition. | Explanation: The definitions-and-methodology endpoint contains a JSON field \"sla\" holding the textual definition; the agent extracted that string verbatim. | Note: Tests the agent's ability to locate and return a specific definition.", "intent": "How is the SLA metric defined for 05958BR?", "difficulty": "easy", "expected_output": { "response": "SLA is defined as 'Percentage of candidates reviewed within the defined SLA window (e.g., 48 hours)'.", "keywords": [ "SLA", "Percentage", "reviewed", "window" ], "tool_calls": [ { "name": "candidate_source_definitions_and_methodology", "args": {} } ], "tool_call_results": [ { "name": "candidate_source_definitions_and_methodology", "result": { "job_id": "05958BR", "definitions": { "sla": "Percentage of candidates reviewed within the defined SLA window (e.g., 48 hours)", "time_to_fill": "Average time from job posting to accepted offer", "success_rate": "Ratio of candidates who accepted offers out of those interviewed" }, "calculation_notes": "Metrics are computed from 1047 requisitions over the last 1.4 years. Funnel stats are based on system timestamps and recruiter actions in ATS.", "top_metrics_considered": [ "SLA %", "First round review %", "Offer acceptance rate", "Candidate volume", "Total hires" ] } } ] } }, { "name": "task_9", "description": "Returns the number of requisitions used to compute the reported metrics. | Explanation: The methodology response includes a note like \"Metrics calculated over N = 1047 requisitions\"; the agent parsed the integer 1047 and returned it. | Note: Tests string parsing / information extraction from notes field.", "intent": "How many requisitions were used to compute these metrics for 05958BR?", "difficulty": "easy", "expected_output": { "response": "Metrics are computed from 1047 requisitions.", "keywords": [ "1047", "requisitions" ], "tool_calls": [ { "name": "candidate_source_definitions_and_methodology", "args": {} } ], "tool_call_results": [ { "name": "candidate_source_definitions_and_methodology", "result": { "job_id": "05958BR", "definitions": { "sla": "Percentage of candidates reviewed within the defined SLA window (e.g., 48 hours)", "time_to_fill": "Average time from job posting to accepted offer", "success_rate": "Ratio of candidates who accepted offers out of those interviewed" }, "calculation_notes": "Metrics are computed from 1047 requisitions over the last 1.4 years. Funnel stats are based on system timestamps and recruiter actions in ATS.", "top_metrics_considered": [ "SLA %", "First round review %", "Offer acceptance rate", "Candidate volume", "Total hires" ] } } ] } }, { "name": "task_10", "description": "Returns the list of top metrics considered for source evaluation. | Explanation: The agent read the \"top_metrics_considered\" array from the methodology API response and returned the metrics in the same order. | Note: Tests structured list extraction and formatting.", "intent": "What are the top metrics considered when evaluating candidate sources for 05958BR?", "difficulty": "easy", "expected_output": { "response": "The top metrics considered are: SLA %, First round review %, Offer acceptance rate, Candidate volume, Total hires.", "keywords": [ "SLA", "First round review", "Offer acceptance", "Candidate volume", "Total hires" ], "tool_calls": [ { "name": "candidate_source_definitions_and_methodology", "args": {} } ], "tool_call_results": [ { "name": "candidate_source_definitions_and_methodology", "result": { "job_id": "05958BR", "definitions": { "sla": "Percentage of candidates reviewed within the defined SLA window (e.g., 48 hours)", "time_to_fill": "Average time from job posting to accepted offer", "success_rate": "Ratio of candidates who accepted offers out of those interviewed" }, "calculation_notes": "Metrics are computed from 1047 requisitions over the last 1.4 years. Funnel stats are based on system timestamps and recruiter actions in ATS.", "top_metrics_considered": [ "SLA %", "First round review %", "Offer acceptance rate", "Candidate volume", "Total hires" ] } } ] } }, { "name": "task_11", "description": "Loops through the provided list of models and reports which ones were used. | Explanation: The agent compared each provided model name against the \"models_involved\" array returned by data-sources-used API and reported matches (used) or non-matches (not used). | Note: Tests loop-based reasoning and partial matching for list membership.", "intent": "Were the following models used to generate metrics for 05958BR: SLA impact regression model, Candidate ranking model, Skill relevance classifier?", "difficulty": "easy", "expected_output": { "response": "Yes, 'SLA impact regression model' and 'Skill relevance classifier' were used. 'Candidate ranking model' was not listed among the models involved.", "keywords": [ "SLA impact regression model", "Skill relevance classifier", "Candidate ranking model" ], "tool_calls": [ { "name": "skills_data_sources_used", "args": {} } ], "tool_call_results": [ { "name": "skills_data_sources_used", "result": { "requisition_id": "05958BR", "datasets_used": [ "Historical hiring success data", "Requisition skill tagging", "Funnel conversion metrics", "Candidate quality feedback" ], "models_involved": [ "Skill relevance classifier", "SLA impact regression model", "Funnel conversion recommender" ] } } ] } }, { "name": "task_12", "description": "Loops through the provided list of data sources and reports which ones were used. | Explanation: Each candidate data source was checked against the \"datasets_used\" array from data-sources-used API; two matched and one did not, which the agent reported accordingly. | Note: Tests loop-based reasoning and partial matching for list membership.", "intent": "Were the following data sources used to compute the metrics for 05958BR: Historical hiring success data, Job description embeddings, Funnel conversion metrics?", "difficulty": "easy", "expected_output": { "response": "Yes, 'Historical hiring success data' and 'Funnel conversion metrics' were used. 'Job description embeddings' was not listed among the data sources.", "keywords": [ "Historical hiring success data", "Funnel conversion metrics", "Job description embeddings" ], "tool_calls": [ { "name": "skills_data_sources_used", "args": {} } ], "tool_call_results": [ { "name": "skills_data_sources_used", "result": { "requisition_id": "05958BR", "datasets_used": [ "Historical hiring success data", "Requisition skill tagging", "Funnel conversion metrics", "Candidate quality feedback" ], "models_involved": [ "Skill relevance classifier", "SLA impact regression model", "Funnel conversion recommender" ] } } ] } }, { "name": "task_13", "description": "Combines model lookup, retrieves actual SLA delta, and returns SLA definition. | Explanation: The SLA-impact API showed a 0 % delta for Python; data-sources-used API confirmed the 'SLA impact regression model' was involved; the methodology API supplied the formal SLA definition. These three pieces were combined into one coherent answer. | Note: Agent must combine numerical result (delta), model lookup, and formal definition into unified answer.", "intent": "For 05958BR, when evaluating the SLA impact of Python, which models were used, what was the SLA delta, and what is the system definition of SLA?", "difficulty": "hard", "expected_output": { "response": "'SLA impact regression model' was used. The SLA delta for Python was 0.0%. SLA is defined as 'Percentage of candidates reviewed within the defined SLA window (e.g., 48 hours)'.", "keywords": [ "SLA impact regression model", "0.0%|0.0 %|0.0|0%|0 %|0", "SLA", "Percentage", "reviewed", "window" ], "tool_calls": [ { "name": "skills_skill_impact_sla", "args": {} }, { "name": "skills_data_sources_used", "args": {} }, { "name": "candidate_source_definitions_and_methodology", "args": {} } ], "tool_call_results": [ { "name": "skills_skill_impact_sla", "result": { "requisition_id": "05958BR", "skill_name": "Python", "sla_achievement_with_skill": 90, "sla_achievement_without_skill": 90, "delta": 0 } }, { "name": "skills_data_sources_used", "result": { "requisition_id": "05958BR", "datasets_used": [ "Historical hiring success data", "Requisition skill tagging", "Funnel conversion metrics", "Candidate quality feedback" ], "models_involved": [ "Skill relevance classifier", "SLA impact regression model", "Funnel conversion recommender" ] } }, { "name": "candidate_source_definitions_and_methodology", "result": { "job_id": "05958BR", "definitions": { "sla": "Percentage of candidates reviewed within the defined SLA window (e.g., 48 hours)", "time_to_fill": "Average time from job posting to accepted offer", "success_rate": "Ratio of candidates who accepted offers out of those interviewed" }, "calculation_notes": "Metrics are computed from 1047 requisitions over the last 1.4 years. Funnel stats are based on system timestamps and recruiter actions in ATS.", "top_metrics_considered": [ "SLA %", "First round review %", "Offer acceptance rate", "Candidate volume", "Total hires" ] } } ] } }, { "name": "task_14", "description": "States that Risk Analysis negatively affects SLA and lists the datasets that informed the analysis. | Explanation: The skill-analysis API flagged Risk Analysis as negatively correlated with SLA. The data-sources-used API listed the four datasets underpinning the evaluation, and both results were consolidated in the response. | Note: Correlation wording corrected to match API ('highly negative impact on SLA').", "intent": "Was 'Risk Analysis' considered historically effective, and what data sources informed this analysis for 05958BR?", "difficulty": "medium", "expected_output": { "response": "'Risk Analysis' is **not** considered effective: historical analysis shows it is correlated with a **highly negative impact on SLA**. The evaluation used these data sources: Historical hiring success data, Requisition skill tagging, Funnel conversion metrics, and Candidate quality feedback.", "keywords": [ "Risk Analysis", "not", "effective", "highly negative impact on SLA", "SLA", "Historical hiring success data", "Requisition skill tagging" ], "tool_calls": [ { "name": "skills_skill_analysis", "args": {} }, { "name": "skills_data_sources_used", "args": {} } ], "tool_call_results": [ { "name": "skills_skill_analysis", "result": { "historical_jobs": 40, "input_skills": [], "historical_skills_with_analysis": [ { "name": "AWS", "skill_occurrence": 948, "correlation": "slightly positive impact on SLA" }, { "name": "IT Support", "skill_occurrence": 868, "correlation": "slightly positive impact on SLA" }, { "name": "NIST Cybersecurity Framework", "skill_occurrence": 816, "correlation": "slightly negative impact on SLA" }, { "name": "Incident Management", "skill_occurrence": 748, "correlation": "slightly negative impact on SLA" }, { "name": "Firewalls", "skill_occurrence": 744, "correlation": "slightly negative impact on SLA" }, { "name": "Cloud Security", "skill_occurrence": 592, "correlation": "slightly negative impact on SLA" }, { "name": "Risk Assessment", "skill_occurrence": 570, "correlation": "slightly negative impact on SLA" }, { "name": "Risk Analysis", "skill_occurrence": 371, "correlation": "highly negative impact on SLA" }, { "name": "Microsoft PowerShell", "skill_occurrence": 289, "correlation": "slightly negative impact on SLA" }, { "name": "Python", "skill_occurrence": 200, "correlation": "slightly negative impact on SLA" } ] } }, { "name": "skills_data_sources_used", "result": { "requisition_id": "05958BR", "datasets_used": [ "Historical hiring success data", "Requisition skill tagging", "Funnel conversion metrics", "Candidate quality feedback" ], "models_involved": [ "Skill relevance classifier", "SLA impact regression model", "Funnel conversion recommender" ] } } ] } }, { "name": "task_15", "description": "Explains that no provided skills hurt SLA, no provided sources require avoidance, and lists datasets used. | Explanation: The agent looped over each skill, querying SLA-impact and skill-analysis APIs; none yielded a negative delta. It then looped over each source, combining SLA-per-source and funnel-conversion data; all three met both SLA and offer benchmarks, so none were flagged. Finally, the datasets_used array was fetched once and reported. | Note: Fully aligned with current API payloads—no unsupported claims.", "intent": "For requisition 05958BR, which of the following skills — Python, Quantum Physics, Wireshark — negatively impact SLA, and which of the following sources — Dice, GitHub, Referral — should be avoided based on low offer conversion or SLA performance? Also, which data sources were used to compute these insights?", "difficulty": "hard", "expected_output": { "response": "None of the listed skills show a negative SLA impact in the available data: Python's SLA delta is 0 %, and Quantum Physics and Wireshark are not present in the analysis. Likewise, Dice (SLA 95 %, offer-acceptance 79 %), GitHub (SLA 90 %, offer-acceptance 77 %), and Referral (SLA 95 %, offer-acceptance 62 %) all perform satisfactorily, so none of them need to be avoided. These insights were produced using the following datasets: Historical hiring success data, Requisition skill tagging, Funnel conversion metrics, and Candidate quality feedback.", "keywords": [ "Python", "Quantum Physics", "Wireshark", "Dice", "GitHub", "Referral", "SLA", "0%|0 %|0", "95%|95 %|95", "79%|79 %|79", "90%|90 %|90", "77%|77 %|77", "62%|62 %|62", "Historical hiring success data", "Requisition skill tagging", "Funnel conversion metrics", "Candidate quality feedback" ], "tool_calls": [ { "name": "skills_skill_impact_sla", "args": {} }, { "name": "skills_skill_analysis", "args": {} }, { "name": "skills_data_sources_used", "args": {} }, { "name": "candidate_source_funnel_conversion_by_source", "args": {} }, { "name": "candidate_source_sla_per_source", "args": {} } ], "tool_call_results": [ { "name": "skills_skill_impact_sla", "result": { "requisition_id": "05958BR", "skill_name": "Python", "sla_achievement_with_skill": 90, "sla_achievement_without_skill": 90, "delta": 0 } }, { "name": "skills_skill_analysis", "result": { "historical_jobs": 40, "input_skills": [], "historical_skills_with_analysis": [ { "name": "AWS", "skill_occurrence": 948, "correlation": "slightly positive impact on SLA" }, { "name": "IT Support", "skill_occurrence": 868, "correlation": "slightly positive impact on SLA" }, { "name": "NIST Cybersecurity Framework", "skill_occurrence": 816, "correlation": "slightly negative impact on SLA" }, { "name": "Incident Management", "skill_occurrence": 748, "correlation": "slightly negative impact on SLA" }, { "name": "Firewalls", "skill_occurrence": 744, "correlation": "slightly negative impact on SLA" }, { "name": "Cloud Security", "skill_occurrence": 592, "correlation": "slightly negative impact on SLA" }, { "name": "Risk Assessment", "skill_occurrence": 570, "correlation": "slightly negative impact on SLA" }, { "name": "Risk Analysis", "skill_occurrence": 371, "correlation": "highly negative impact on SLA" }, { "name": "Microsoft PowerShell", "skill_occurrence": 289, "correlation": "slightly negative impact on SLA" }, { "name": "Python", "skill_occurrence": 200, "correlation": "slightly negative impact on SLA" } ] } }, { "name": "skills_data_sources_used", "result": { "requisition_id": "05958BR", "datasets_used": [ "Historical hiring success data", "Requisition skill tagging", "Funnel conversion metrics", "Candidate quality feedback" ], "models_involved": [ "Skill relevance classifier", "SLA impact regression model", "Funnel conversion recommender" ] } }, { "name": "candidate_source_funnel_conversion_by_source", "result": { "job_id": "05958BR", "metrics": [ { "source_name": "CyberSec Jobs", "first_round_review_percentage": 80.5, "interview_rate": 19.0, "offer_acceptance_rate": 3.0 }, { "source_name": "Dice", "first_round_review_percentage": 11.0, "interview_rate": 6.8, "offer_acceptance_rate": 2.7 }, { "source_name": "GitHub", "first_round_review_percentage": 76.1, "interview_rate": 23.7, "offer_acceptance_rate": 2.8 }, { "source_name": "Indeed", "first_round_review_percentage": 77.1, "interview_rate": 22.0, "offer_acceptance_rate": 0.0 }, { "source_name": "Internal", "first_round_review_percentage": 74.0, "interview_rate": 18.5, "offer_acceptance_rate": 2.5 }, { "source_name": "LinkedIn", "first_round_review_percentage": 75.1, "interview_rate": 20.4, "offer_acceptance_rate": 1.9 }, { "source_name": "Referral", "first_round_review_percentage": 70.0, "interview_rate": 17.0, "offer_acceptance_rate": 2.0 } ] } }, { "name": "candidate_source_sla_per_source", "result": { "metrics": [ { "source_name": "CyberSec Jobs", "sla_percentage": 67 }, { "source_name": "Indeed", "sla_percentage": 86 }, { "source_name": "GitHub", "sla_percentage": 90 }, { "source_name": "Dice", "sla_percentage": 95 }, { "source_name": "Internal", "sla_percentage": 95 }, { "source_name": "LinkedIn", "sla_percentage": 95 }, { "source_name": "Referral", "sla_percentage": 95 } ] } } ] } }, { "name": "task_16", "description": "Apologises for missing capability; optionally provides an illustrative optimisation if the feature existed. | Explanation: No endpoint returns or accepts free-text job descriptions, so optimisation is unsupported. The hypothetical section shows what the answer could look like if such an endpoint were added. | Note: Demonstrates graceful degradation plus a forward-looking example.", "intent": "Help me optimize the job description for 05959BR.", "difficulty": "easy", "expected_output": { "response": "Sorry — the available APIs do not expose the current job-description text, so I cannot directly optimise it.", "keywords": [ "APIs|API", "job-description|job description", "cannot|can't" ], "tool_calls": [] } }, { "name": "task_17", "description": "Prompts the user for the missing job ID instead of guessing. | Explanation: Illustrates a clarification turn when a required parameter (requisition_id) is missing. | Note: Tests conversational error-handling with zero API usage.", "intent": "Which sourcing channel is the most effective for this job?", "difficulty": "easy", "expected_output": { "response": "Can you share the requisition ID (e.g., 05958BR or 05959BR)?", "keywords": [ "requisition|req", "ID|id|identifier", "missing|without|share|provide|required" ], "tool_calls": [] } }, { "name": "task_18", "description": "Explains unsupported request; shows what a successful answer might include. | Explanation: No endpoints expose hiring-manager data; a possible future answer is sketched for context. | Note: Covers gap #3: hiring-manager analytics.", "intent": "Who's the hiring manager for 05959BR and how responsive is she?", "difficulty": "easy", "expected_output": { "response": "Sorry — current APIs don't return hiring-manager names or responsiveness metrics, so I can't answer that.", "keywords": [ "APIs", "hiring-manager", "responsive", "can't|cannot" ], "tool_calls": [] } }, { "name": "task_19", "description": "States limitation; optional mock table shows desired granularity. | Explanation: The existing funnel-conversion API only returns percentages by source, not absolute counts or durations per stage. | Note: Covers gap #4: full funnel metrics.", "intent": "Show me the average candidate counts and days spent in each funnel stage for roles like 05959BR.", "difficulty": "easy", "expected_output": { "response": "I'm sorry, but none of the available APIs provide stage-by-stage candidate counts or time-in-status metrics, so I can't generate a funnel table.", "keywords": [ "APIs|API", "stage", "candidate counts", "time-in-status", "funnel" ], "tool_calls": [] } }, { "name": "task_20", "description": "Returns start/end dates, last update date, and requisition count. | Explanation: Pulled time_frame_start, time_frame_end, data_last_updated and total_requisitions_analysed from the metadata-and-timeframe endpoint. | Note: Demonstrates a fully supported recency / frequency query (gap #5).", "intent": "What's the data timeframe for 05958BR and how many similar requisitions were analysed?", "difficulty": "easy", "expected_output": { "response": "The metrics cover 9 Oct 2023 – 15 Mar 2025 (≈ 18 months) and were last updated on 29 Apr 2025. A total of 40 similar requisitions were analysed.", "keywords": [ "9 Oct 2023|Oct 9, 2023", "15 Mar 2025|Mar 15, 2025", "29 Apr 2025|Apr 29, 2025", "40", "requisitions" ], "tool_calls": [ { "name": "candidate_source_metadata_and_timeframe", "args": {} } ], "tool_call_results": [ { "name": "candidate_source_metadata_and_timeframe", "result": { "job_id": "05958BR", "time_frame_start": "2023-10-09", "time_frame_end": "2025-03-15", "data_last_updated": "2025-04-29", "total_requisitions_analysed": 40 } } ] } }, { "name": "task_21", "description": "Gracefully declines unsupported analysis; shows sample comparative table. | Explanation: Only the definition endpoint describes *what* time-to-fill means; no endpoint provides per-source values to compute deltas. | Note: Covers gap #6.", "intent": "How does average time-to-fill compare to SLA for each sourcing channel?", "difficulty": "easy", "expected_output": { "response": "I'm afraid I can't provide that comparison because the current APIs don't expose time-to-fill broken down by source.", "keywords": [ "APIs", "time-to-fill", "source" ], "tool_calls": [] } }, { "name": "task_22", "description": "Explains unsupported geographic/channel filter and gives mock estimation. | Explanation: No endpoints accept country/channel parameters; the example illustrates desired behaviour. | Note: Covers gap #7: region & channel-specific analytics.", "intent": "If we posted this role internally in France only, how long would it take to fill?", "difficulty": "easy", "expected_output": { "response": "Current APIs don't allow filtering by geography or posting channel, so I'm unable to estimate a France-only internal posting timeline.", "keywords": [ "Sorry|no|unable|can't|cannot", "APIs|API|filtering|filter", "France|geography|geographic|region|channel", "internal|posting" ], "tool_calls": [] } }, { "name": "task_23", "description": "States limitation and shows example list of near-deadline requisitions. | Explanation: No endpoint surfaces open requisitions with SLA dates. The hypothetical section demonstrates the desired list format. | Note: Covers gap #8 and introduces a potential future endpoint.", "intent": "List all requisitions that are within 30 days of their SLA deadline.", "difficulty": "easy", "expected_output": { "response": "Sorry — the API suite doesn't provide live requisition status or SLA countdowns, so I can't generate that list.", "keywords": [ "API|APIs", "SLA", "requisition|req", "status", "countdown|countdowns", "deadline|list" ], "tool_calls": [] } }, { "name": "task_24", "description": "Graceful 'ID not found' message with suggested alternatives. | Explanation: Because 05960BR does not exist, the assistant returns a polite error plus four close-match IDs (simulating fuzzy search in the ATS). No API call is made for a bad ID. | Note: Error-handling scenario for invalid requisition IDs.", "intent": "Show candidate funnel for job id 05960BR", "difficulty": "easy", "expected_output": { "response": "No job can be found with the ID 05960BR.\nDid you want to use one of the following job IDs instead?\n• UZLXBR — Sourcing Manager\n• F50HBR — Offering Manager\n• MJZ1BR — Offering Manager\n• 5TTKBR — Delivery Analyst", "keywords": [ "05960BR", "No job", "can be found|not found" ], "tool_calls": [] } }, { "name": "task_25", "description": "Explains unsupported request and sketches desired output. | Explanation: There is no /job-details/ endpoint. The hypothetical section illustrates what the response would look like if such an endpoint became available. | Note: Completes coverage for full requisition card' requests.", "intent": "Show me the details of UZLXBR", "difficulty": "easy", "expected_output": { "response": "Sorry — none of the current APIs provide full job-card details (title, location, hiring-manager email, etc.), so I can't display that information.", "keywords": [ "APIs", "job-card", "details" ], "tool_calls": [] } }, { "name": "task_26", "description": "Returns average candidate count for comparable requisitions. | Explanation: candidate-volume-by-source returns `total_candidate_volume = 2913`; metadata-and-timeframe shows `total_requisitions_analysed = 40`. Dividing 2913 ÷ 40 ≈ 73 yields the average. | Note: Covers the repeated average candidate volume questions.", "intent": "How many candidates do we usually get for postings similar to 05959BR?", "difficulty": "medium", "expected_output": { "response": "On average, similar postings attract **73 candidates**.", "keywords": [ "73", "candidates", "average" ], "tool_calls": [ { "name": "candidate_source_candidate_volume_by_source", "args": {} }, { "name": "candidate_source_metadata_and_timeframe", "args": {} } ], "tool_call_results": [ { "name": "candidate_source_candidate_volume_by_source", "result": { "job_id": "05959BR", "total_candidate_volume": 2913, "metrics": [ { "source_name": "Dice", "candidate_volume": 525, "percentage": 18 }, { "source_name": "LinkedIn", "candidate_volume": 525, "percentage": 18 }, { "source_name": "GitHub", "candidate_volume": 465, "percentage": 16 }, { "source_name": "Internal", "candidate_volume": 403, "percentage": 14 }, { "source_name": "Indeed", "candidate_volume": 400, "percentage": 14 }, { "source_name": "Referral", "candidate_volume": 400, "percentage": 14 }, { "source_name": "CyberSec Jobs", "candidate_volume": 195, "percentage": 7 } ], "heading": "For requisitions similar to 05959BR, there were 2913 candidates over the past three years. Here's how many candidates came from each source (with percentages from the total number):" } }, { "name": "candidate_source_metadata_and_timeframe", "result": { "job_id": "05959BR", "time_frame_start": "2023-10-09", "time_frame_end": "2025-03-15", "data_last_updated": "2025-04-29", "total_requisitions_analysed": 40 } } ] } } ] } ]