Spaces:

ibm-research
/

BPO-Bench

Running

App Files Files Community

BPO-Bench / data /tasks_undocumented.json

haroldshipibm

Upload folder using huggingface_hub

d075a5b verified 7 days ago

raw

history blame contribute delete

6.24 kB

	[
	{
	"name": "bpo-benchmark",
	"user_info": [],
	"test_cases": [
	{
	"name": "task_43",
	"description": "Tests agent handling of non-standard error format. The candidate_source_requisition_details tool returns {\"err\": \"not_found\"} instead of standard RequisitionNotFoundResponse when given an invalid ID.",
	"intent": "Get detailed info for requisition 99999BR",
	"difficulty": "medium",
	"expected_output": {
	"response": "Requisition 99999BR was not found. The system returned a non-standard error indicating the requisition does not exist.",
	"keywords": [
	"99999BR",
	"not found\|not_found\|doesn't exist\|does not exist\|invalid"
	],
	"tool_calls": [
	{
	"name": "candidate_source_requisition_details",
	"args": {
	"requisition_id": "99999BR"
	}
	}
	],
	"error_handling": {
	"error_type": "undocumented_error_format",
	"should_report_error": true,
	"should_retry": false,
	"expected_behavior": "Agent should parse non-standard error format and report not found"
	},
	"tool_call_results": [
	{
	"name": "candidate_source_requisition_details",
	"result": {
	"err": "not_found",
	"req": "99999BR"
	}
	}
	]
	}
	},
	{
	"name": "task_44",
	"description": "Tests agent handling of undocumented pagination. The candidate_source_list_all_sources tool returns only a page of results with a next_page token not described in any schema.",
	"intent": "List all available sourcing channels for 05958BR",
	"difficulty": "hard",
	"expected_output": {
	"response": "The available sourcing channels for 05958BR include multiple sources. The results show a total count of all channels, though the response is paginated.",
	"keywords": [
	"source\|sources\|channels",
	"05958BR"
	],
	"tool_calls": [
	{
	"name": "candidate_source_list_all_sources",
	"args": {
	"requisition_id": "05958BR"
	}
	}
	],
	"error_handling": {
	"error_type": "undocumented_pagination",
	"should_report_error": false,
	"should_retry": false,
	"expected_behavior": "Agent should detect and handle pagination, noting there are more results"
	},
	"tool_call_results": [
	{
	"name": "candidate_source_list_all_sources",
	"result": {
	"requisition_id": "05958BR",
	"sources": [
	{
	"name": "CyberSec Jobs",
	"index": 0
	},
	{
	"name": "Dice",
	"index": 1
	},
	{
	"name": "GitHub",
	"index": 2
	}
	],
	"total_count": 7,
	"page_size": 3,
	"page": 1,
	"next_page": "eyJvZmZzZXQiOjMsInJlcV9pZCI6IjA1OTU4QlIifQ==",
	"has_more": true
	}
	}
	]
	}
	},
	{
	"name": "task_45",
	"description": "Tests agent handling of undocumented rate limiting info in response body. The candidate_source_batch_metrics tool includes X-RateLimit headers embedded in the JSON response.",
	"intent": "Fetch batch metrics for all sources on 05958BR",
	"difficulty": "medium",
	"expected_output": {
	"response": "Batch metrics for 05958BR show candidate counts and hire data across all sourcing channels including LinkedIn, Dice, Indeed, GitHub, Referral, CyberSec Jobs, and TechCareers.",
	"keywords": [
	"metrics\|batch",
	"candidates\|hires\|sources",
	"05958BR"
	],
	"tool_calls": [
	{
	"name": "candidate_source_batch_metrics",
	"args": {
	"requisition_id": "05958BR"
	}
	}
	],
	"error_handling": {
	"error_type": "undocumented_rate_limiting",
	"should_report_error": false,
	"should_retry": false,
	"expected_behavior": "Agent should process the metrics data and optionally note rate limit information"
	},
	"tool_call_results": [
	{
	"name": "candidate_source_batch_metrics",
	"result": {
	"requisition_id": "05958BR",
	"metrics": {
	"CyberSec Jobs": {
	"candidates": 200,
	"hires": 3,
	"reviewed": 161
	},
	"Dice": {
	"candidates": 516,
	"hires": 11,
	"reviewed": 57
	},
	"GitHub": {
	"candidates": 468,
	"hires": 10,
	"reviewed": 356
	},
	"Indeed": {
	"candidates": 410,
	"hires": 0,
	"reviewed": 316
	},
	"Internal": {
	"candidates": 400,
	"hires": 5,
	"reviewed": 296
	},
	"LinkedIn": {
	"candidates": 519,
	"hires": 7,
	"reviewed": 390
	},
	"Referral": {
	"candidates": 400,
	"hires": 4,
	"reviewed": 280
	}
	},
	"X-RateLimit-Limit": 100,
	"X-RateLimit-Remaining": 97,
	"X-RateLimit-Reset": "2025-05-01T00:00:00Z",
	"X-RateLimit-Window": "1h"
	}
	}
	]
	}
	}
	]
	}
	]