[ { "name": "bpo-benchmark", "user_info": [], "test_cases": [ { "name": "task_43", "description": "Tests agent handling of non-standard error format. The candidate_source_requisition_details tool returns {\"err\": \"not_found\"} instead of standard RequisitionNotFoundResponse when given an invalid ID.", "intent": "Get detailed info for requisition 99999BR", "difficulty": "medium", "expected_output": { "response": "Requisition 99999BR was not found. The system returned a non-standard error indicating the requisition does not exist.", "keywords": [ "99999BR", "not found|not_found|doesn't exist|does not exist|invalid" ], "tool_calls": [ { "name": "candidate_source_requisition_details", "args": { "requisition_id": "99999BR" } } ], "error_handling": { "error_type": "undocumented_error_format", "should_report_error": true, "should_retry": false, "expected_behavior": "Agent should parse non-standard error format and report not found" }, "tool_call_results": [ { "name": "candidate_source_requisition_details", "result": { "err": "not_found", "req": "99999BR" } } ] } }, { "name": "task_44", "description": "Tests agent handling of undocumented pagination. The candidate_source_list_all_sources tool returns only a page of results with a next_page token not described in any schema.", "intent": "List all available sourcing channels for 05958BR", "difficulty": "hard", "expected_output": { "response": "The available sourcing channels for 05958BR include multiple sources. The results show a total count of all channels, though the response is paginated.", "keywords": [ "source|sources|channels", "05958BR" ], "tool_calls": [ { "name": "candidate_source_list_all_sources", "args": { "requisition_id": "05958BR" } } ], "error_handling": { "error_type": "undocumented_pagination", "should_report_error": false, "should_retry": false, "expected_behavior": "Agent should detect and handle pagination, noting there are more results" }, "tool_call_results": [ { "name": "candidate_source_list_all_sources", "result": { "requisition_id": "05958BR", "sources": [ { "name": "CyberSec Jobs", "index": 0 }, { "name": "Dice", "index": 1 }, { "name": "GitHub", "index": 2 } ], "total_count": 7, "page_size": 3, "page": 1, "next_page": "eyJvZmZzZXQiOjMsInJlcV9pZCI6IjA1OTU4QlIifQ==", "has_more": true } } ] } }, { "name": "task_45", "description": "Tests agent handling of undocumented rate limiting info in response body. The candidate_source_batch_metrics tool includes X-RateLimit headers embedded in the JSON response.", "intent": "Fetch batch metrics for all sources on 05958BR", "difficulty": "medium", "expected_output": { "response": "Batch metrics for 05958BR show candidate counts and hire data across all sourcing channels including LinkedIn, Dice, Indeed, GitHub, Referral, CyberSec Jobs, and TechCareers.", "keywords": [ "metrics|batch", "candidates|hires|sources", "05958BR" ], "tool_calls": [ { "name": "candidate_source_batch_metrics", "args": { "requisition_id": "05958BR" } } ], "error_handling": { "error_type": "undocumented_rate_limiting", "should_report_error": false, "should_retry": false, "expected_behavior": "Agent should process the metrics data and optionally note rate limit information" }, "tool_call_results": [ { "name": "candidate_source_batch_metrics", "result": { "requisition_id": "05958BR", "metrics": { "CyberSec Jobs": { "candidates": 200, "hires": 3, "reviewed": 161 }, "Dice": { "candidates": 516, "hires": 11, "reviewed": 57 }, "GitHub": { "candidates": 468, "hires": 10, "reviewed": 356 }, "Indeed": { "candidates": 410, "hires": 0, "reviewed": 316 }, "Internal": { "candidates": 400, "hires": 5, "reviewed": 296 }, "LinkedIn": { "candidates": 519, "hires": 7, "reviewed": 390 }, "Referral": { "candidates": 400, "hires": 4, "reviewed": 280 } }, "X-RateLimit-Limit": 100, "X-RateLimit-Remaining": 97, "X-RateLimit-Reset": "2025-05-01T00:00:00Z", "X-RateLimit-Window": "1h" } } ] } } ] } ]