BPO-Bench / data /tasks_undocumented.json
haroldshipibm's picture
Upload folder using huggingface_hub
d075a5b verified
[
{
"name": "bpo-benchmark",
"user_info": [],
"test_cases": [
{
"name": "task_43",
"description": "Tests agent handling of non-standard error format. The candidate_source_requisition_details tool returns {\"err\": \"not_found\"} instead of standard RequisitionNotFoundResponse when given an invalid ID.",
"intent": "Get detailed info for requisition 99999BR",
"difficulty": "medium",
"expected_output": {
"response": "Requisition 99999BR was not found. The system returned a non-standard error indicating the requisition does not exist.",
"keywords": [
"99999BR",
"not found|not_found|doesn't exist|does not exist|invalid"
],
"tool_calls": [
{
"name": "candidate_source_requisition_details",
"args": {
"requisition_id": "99999BR"
}
}
],
"error_handling": {
"error_type": "undocumented_error_format",
"should_report_error": true,
"should_retry": false,
"expected_behavior": "Agent should parse non-standard error format and report not found"
},
"tool_call_results": [
{
"name": "candidate_source_requisition_details",
"result": {
"err": "not_found",
"req": "99999BR"
}
}
]
}
},
{
"name": "task_44",
"description": "Tests agent handling of undocumented pagination. The candidate_source_list_all_sources tool returns only a page of results with a next_page token not described in any schema.",
"intent": "List all available sourcing channels for 05958BR",
"difficulty": "hard",
"expected_output": {
"response": "The available sourcing channels for 05958BR include multiple sources. The results show a total count of all channels, though the response is paginated.",
"keywords": [
"source|sources|channels",
"05958BR"
],
"tool_calls": [
{
"name": "candidate_source_list_all_sources",
"args": {
"requisition_id": "05958BR"
}
}
],
"error_handling": {
"error_type": "undocumented_pagination",
"should_report_error": false,
"should_retry": false,
"expected_behavior": "Agent should detect and handle pagination, noting there are more results"
},
"tool_call_results": [
{
"name": "candidate_source_list_all_sources",
"result": {
"requisition_id": "05958BR",
"sources": [
{
"name": "CyberSec Jobs",
"index": 0
},
{
"name": "Dice",
"index": 1
},
{
"name": "GitHub",
"index": 2
}
],
"total_count": 7,
"page_size": 3,
"page": 1,
"next_page": "eyJvZmZzZXQiOjMsInJlcV9pZCI6IjA1OTU4QlIifQ==",
"has_more": true
}
}
]
}
},
{
"name": "task_45",
"description": "Tests agent handling of undocumented rate limiting info in response body. The candidate_source_batch_metrics tool includes X-RateLimit headers embedded in the JSON response.",
"intent": "Fetch batch metrics for all sources on 05958BR",
"difficulty": "medium",
"expected_output": {
"response": "Batch metrics for 05958BR show candidate counts and hire data across all sourcing channels including LinkedIn, Dice, Indeed, GitHub, Referral, CyberSec Jobs, and TechCareers.",
"keywords": [
"metrics|batch",
"candidates|hires|sources",
"05958BR"
],
"tool_calls": [
{
"name": "candidate_source_batch_metrics",
"args": {
"requisition_id": "05958BR"
}
}
],
"error_handling": {
"error_type": "undocumented_rate_limiting",
"should_report_error": false,
"should_retry": false,
"expected_behavior": "Agent should process the metrics data and optionally note rate limit information"
},
"tool_call_results": [
{
"name": "candidate_source_batch_metrics",
"result": {
"requisition_id": "05958BR",
"metrics": {
"CyberSec Jobs": {
"candidates": 200,
"hires": 3,
"reviewed": 161
},
"Dice": {
"candidates": 516,
"hires": 11,
"reviewed": 57
},
"GitHub": {
"candidates": 468,
"hires": 10,
"reviewed": 356
},
"Indeed": {
"candidates": 410,
"hires": 0,
"reviewed": 316
},
"Internal": {
"candidates": 400,
"hires": 5,
"reviewed": 296
},
"LinkedIn": {
"candidates": 519,
"hires": 7,
"reviewed": 390
},
"Referral": {
"candidates": 400,
"hires": 4,
"reviewed": 280
}
},
"X-RateLimit-Limit": 100,
"X-RateLimit-Remaining": 97,
"X-RateLimit-Reset": "2025-05-01T00:00:00Z",
"X-RateLimit-Window": "1h"
}
}
]
}
}
]
}
]