BPO-Bench / api_candidate_source_error.py
haroldshipibm's picture
Upload folder using huggingface_hub
d075a5b verified
"""
Error-prone candidate source API variants for testing agent resilience.
Each function has a unique, plausible intent and embeds a specific error behavior.
Completely independent from original APIs β€” accesses DataLoader directly.
AUTO-GENERATED by scripts/generate_hf.sh - DO NOT EDIT DIRECTLY
Edit candidate_source_error.py in main repo and regenerate.
"""
import json
import random
from pathlib import Path
from typing import Any, Dict, List, Optional
from data_loader import get_data_loader
# Seeded RNG for reproducible probabilistic behavior
_rng = random.Random(42)
# Track call counts for rate-limiting behavior
_call_counts: Dict[str, int] = {}
def _check_requisition(requisition_id: str) -> Optional[Dict[str, Any]]:
"""Return error dict if requisition invalid, else None."""
loader = get_data_loader()
if not loader.is_valid_requisition(requisition_id):
return {
"error": "requisition_not_found",
"message": f"Requisition {requisition_id} not found",
}
return None
# ── Test 28: Type mismatch β€” int instead of float ───────────────────────────
def get_source_sla_score(requisition_id: str, source_name: str = "Dice") -> Any:
"""Get the SLA score for a specific sourcing channel.
Returns the SLA achievement score for the given source.
ERROR BEHAVIOR: Returns int (e.g., 80) instead of float (e.g., 80.0).
Tests type handling for numeric values.
"""
err = _check_requisition(requisition_id)
if err:
return err
loader = get_data_loader()
data = loader.get_similar_requisitions(requisition_id)
reviewed = data[(data["reviewed"]) & (data["source_name"] == source_name)]
if len(reviewed) == 0:
return {"error": "no_data", "message": f"No reviewed candidates from {source_name}"}
sla_pct = int(round(reviewed["sla_met"].mean() * 100))
return sla_pct # Returns bare int instead of {"sla_score": 80.0}
# ── Test 29: Type mismatch β€” None instead of empty list ─────────────────────
def get_inactive_sources(requisition_id: str) -> Any:
"""Show any inactive sourcing channels with no candidates.
Returns a list of sources that produced zero candidates.
ERROR BEHAVIOR: Returns None instead of empty list when no
inactive sources exist. Tests null handling.
"""
err = _check_requisition(requisition_id)
if err:
return err
loader = get_data_loader()
data = loader.get_similar_requisitions(requisition_id)
active_sources = set(data["source_name"].unique())
# Use all sources from the full dataset as the reference set
all_possible = set(loader.data["source_name"].unique())
inactive = all_possible - active_sources
if not inactive:
return None # Returns None instead of []
return list(inactive)
# ── Test 30: HTTP 404 (probabilistic, 20% chance) ───────────────────────────
def get_candidate_pipeline_status(requisition_id: str) -> Dict[str, Any]:
"""Get candidate pipeline status for a requisition.
Returns current pipeline status showing candidate distribution by source.
ERROR BEHAVIOR: 20% chance of returning a 404-style error dict.
Tests retry logic and error recovery.
"""
if _rng.random() < 0.2:
return {
"status_code": 404,
"error": True,
"message": "Resource temporarily unavailable",
}
err = _check_requisition(requisition_id)
if err:
return err
loader = get_data_loader()
data = loader.get_similar_requisitions(requisition_id)
volume_by_source = data.groupby("source_name").size().to_dict()
return {
"requisition_id": requisition_id,
"pipeline": {k: int(v) for k, v in volume_by_source.items()},
"total_candidates": int(len(data)),
}
# ── Test 31: HTTP 500 with valid body ────────────────────────────────────────
def get_source_sla_check(requisition_id: str) -> Dict[str, Any]:
"""Run a quick SLA status check across all sourcing channels.
Returns SLA metrics per source for rapid status assessment.
ERROR BEHAVIOR: Returns HTTP 500-style error dict but includes valid
data in the body. Tests agent ability to use response body despite
error status code.
"""
err = _check_requisition(requisition_id)
if err:
return err
loader = get_data_loader()
data = loader.get_similar_requisitions(requisition_id)
reviewed = data[data["reviewed"]]
metrics = []
for source, group in reviewed.groupby("source_name"):
sla_pct = int(round(group["sla_met"].mean() * 100))
metrics.append({"source_name": source, "sla_percentage": sla_pct})
return {
"status_code": 500,
"error": True,
"message": "Internal server error",
"body": {"metrics": metrics},
}
# ── Test 32: HTTP 503 Service Unavailable ────────────────────────────────────
def get_funnel_status(requisition_id: str) -> Dict[str, Any]:
"""Get the current funnel status for a requisition.
Returns real-time funnel pipeline status showing conversion at each stage.
ERROR BEHAVIOR: Always returns 503 with retry-after info.
Tests service unavailable handling.
"""
return {
"status_code": 503,
"error": True,
"message": "Service temporarily unavailable. The funnel analytics engine is undergoing maintenance.",
"retry_after_seconds": 300,
"expected_recovery": "2025-05-01T12:00:00Z",
}
# ── Test 33: HTTP 429 Rate Limited ──────────────────────────────────────────
def get_bulk_source_data(requisition_id: str) -> Dict[str, Any]:
"""Pull bulk source data for all requisitions.
Returns comprehensive source data across all requisitions in the system.
ERROR BEHAVIOR: Returns 429 after 3rd call (tracked via module-level counter).
Tests rate limit handling.
"""
key = "get_bulk_source_data"
_call_counts[key] = _call_counts.get(key, 0) + 1
if _call_counts[key] > 3:
return {
"status_code": 429,
"error": True,
"message": "Rate limit exceeded. Maximum 3 calls per session.",
"retry_after_seconds": 60,
"limit": 3,
"remaining": 0,
}
err = _check_requisition(requisition_id)
if err:
return err
loader = get_data_loader()
data = loader.get_similar_requisitions(requisition_id)
summary = {}
for source, group in data.groupby("source_name"):
summary[source] = {
"total_candidates": int(len(group)),
"total_hires": int(group["hired"].sum()),
"reviewed": int(group["reviewed"].sum()),
}
return {
"requisition_id": requisition_id,
"sources": summary,
"call_number": _call_counts[key],
}
# ── Test 36: Missing required fields ────────────────────────────────────────
def get_source_metrics_lite(requisition_id: str) -> Dict[str, Any]:
"""Get a lightweight summary of source metrics.
Returns a compact view of per-source metrics for quick analysis.
ERROR BEHAVIOR: Response missing `source_name` field in metrics entries.
Tests agent handling of incomplete/partial data.
"""
err = _check_requisition(requisition_id)
if err:
return err
loader = get_data_loader()
data = loader.get_similar_requisitions(requisition_id)
metrics = []
for source, group in data.groupby("source_name"):
# Intentionally omit source_name
metrics.append({
"candidate_count": int(len(group)),
"hire_count": int(group["hired"].sum()),
"sla_met_count": int(group[group["reviewed"]]["sla_met"].sum()),
})
return {
"requisition_id": requisition_id,
"metrics": metrics,
"note": "Lightweight view β€” some fields may be omitted for performance.",
}
# ── Test 37: Wrong field types in response ──────────────────────────────────
def get_volume_report(requisition_id: str) -> Dict[str, Any]:
"""Generate a volume report for a requisition.
Returns candidate volume statistics broken down by source.
ERROR BEHAVIOR: `candidate_count` returned as string instead of int.
Tests type coercion handling.
"""
err = _check_requisition(requisition_id)
if err:
return err
loader = get_data_loader()
data = loader.get_similar_requisitions(requisition_id)
metrics = []
for source, group in data.groupby("source_name"):
metrics.append({
"source_name": source,
"candidate_count": str(len(group)), # String instead of int
"hire_count": str(int(group["hired"].sum())), # String instead of int
"review_rate": f"{group['reviewed'].mean() * 100:.1f}%",
})
return {
"requisition_id": requisition_id,
"metrics": metrics,
"total_candidates": str(len(data)), # String instead of int
}
# ── Test 38: Large response (1000 records) ──────────────────────────────────
def get_full_candidate_details(requisition_id: str) -> Dict[str, Any]:
"""Get full candidate details for a requisition.
Returns comprehensive candidate-level data for detailed analysis.
ERROR BEHAVIOR: Response contains 1000 pre-generated candidate records.
Tests agent handling of large payloads.
"""
err = _check_requisition(requisition_id)
if err:
return err
# Load pre-generated fixture
fixture_paths = [
Path(__file__).parent.parent.parent / "data" / "large_response_fixture.json",
Path("./data/large_response_fixture.json"),
]
for path in fixture_paths:
if path.exists():
with open(path, "r") as f:
records = json.load(f)
return {
"requisition_id": requisition_id,
"total_records": len(records),
"candidates": records,
}
# Fallback: generate minimal records if fixture missing
return {
"requisition_id": requisition_id,
"total_records": 0,
"candidates": [],
"warning": "Large response fixture not found",
}
# ── Test 39: Unicode and special characters ─────────────────────────────────
def get_source_directory(requisition_id: str) -> Dict[str, Any]:
"""Show the source directory for a requisition.
Returns a directory listing of all sourcing channels with their metadata.
ERROR BEHAVIOR: Source names contain emoji, CJK characters, Arabic text.
Tests unicode handling.
"""
err = _check_requisition(requisition_id)
if err:
return err
return {
"requisition_id": requisition_id,
"sources": [
{"name": "LinkedIn \U0001F4BC", "region": "Global", "status": "active"},
{"name": "Dice \U0001F3B2", "region": "North America", "status": "active"},
{"name": "\u62db\u8058\u7f51 (Zhaopin)", "region": "\u4e2d\u56fd", "status": "active"},
{"name": "\u0628\u064a\u062a.\u0643\u0648\u0645 (Bayt)", "region": "\u0627\u0644\u0634\u0631\u0642 \u0627\u0644\u0623\u0648\u0633\u0637", "status": "active"},
{"name": "GitHub \U0001F431\u200D\U0001F4BB", "region": "Global", "status": "active"},
{"name": "R\u00e9f\u00e9rence\u2122", "region": "Europe", "status": "inactive"},
{"name": "\u2605 Top Talent \u2605", "region": "APAC", "status": "active"},
],
"total_sources": 7,
}
# ── Test 41: Extra undocumented fields (20 extra fields) ─────────────────────
def get_sla_extended(requisition_id: str, source_name: str = "Dice") -> Dict[str, Any]:
"""Get extended SLA data for a specific sourcing channel.
Returns SLA metrics with additional analytics for the given source.
ERROR BEHAVIOR: Response includes 20 undocumented extra fields
beyond what the schema describes. Tests agent ability to ignore
noise and extract relevant data.
"""
err = _check_requisition(requisition_id)
if err:
return err
loader = get_data_loader()
data = loader.get_similar_requisitions(requisition_id)
source_data = data[(data["reviewed"]) & (data["source_name"] == source_name)]
sla_pct = int(round(source_data["sla_met"].mean() * 100)) if len(source_data) > 0 else 0
return {
"requisition_id": requisition_id,
"source_name": source_name,
"sla_percentage": sla_pct,
# Undocumented extra fields
"_internal_id": "sla-ext-7f3a9b2c",
"_cache_ttl": 3600,
"_version": "2.3.1",
"_debug_query_ms": 42,
"_shard_id": 3,
"_region": "us-east-1",
"_feature_flags": ["sla_v2", "extended_metrics"],
"_experiment_group": "control",
"_sampling_rate": 0.95,
"_data_quality_score": 0.98,
"_last_recomputed": "2025-04-29T03:00:00Z",
"_computation_engine": "spark-3.5",
"_model_version": "sla-impact-v1.4.2",
"_confidence_interval": [sla_pct - 3, sla_pct + 3],
"_p_value": 0.023,
"_sample_size": int(len(source_data)),
"_outliers_removed": 2,
"_normalization_method": "min-max",
"_correlation_with_hires": 0.67,
"_seasonal_adjustment": True,
}
# ── Test 43: Undocumented error format ──────────────────────────────────────
def get_requisition_details(requisition_id: str) -> Dict[str, Any]:
"""Get detailed information for a specific requisition.
Returns comprehensive requisition metadata and status.
ERROR BEHAVIOR: Returns non-standard error format `{"err": "not_found"}`
instead of the standard `RequisitionNotFoundResponse`.
Tests non-standard error parsing.
"""
loader = get_data_loader()
if not loader.is_valid_requisition(requisition_id):
return {"err": "not_found", "req": requisition_id}
data = loader.get_by_requisition(requisition_id)
row = data.iloc[0]
return {
"requisition_id": requisition_id,
"department": str(row.get("department", "Unknown")),
"seniority_level": str(row.get("seniority_level", "Unknown")),
"total_candidates": int(len(data)),
"sources_used": list(data["source_name"].unique()),
}
# ── Test 44: Undocumented pagination ─────────────────────────────────────────
def list_all_sources(requisition_id: str) -> Dict[str, Any]:
"""List all available sourcing channels.
Returns a paginated list of all sourcing channels in the system.
ERROR BEHAVIOR: Response includes `next_page` token not described
in any schema. Tests pagination detection and handling.
"""
err = _check_requisition(requisition_id)
if err:
return err
loader = get_data_loader()
data = loader.get_similar_requisitions(requisition_id)
sources = sorted(data["source_name"].unique())
# Return first 3 with pagination token
page_size = 3
page = sources[:page_size]
result: Dict[str, Any] = {
"requisition_id": requisition_id,
"sources": [{"name": s, "index": i} for i, s in enumerate(page)],
"total_count": len(sources),
"page_size": page_size,
"page": 1,
}
if len(sources) > page_size:
result["next_page"] = "eyJvZmZzZXQiOjMsInJlcV9pZCI6IjA1OTU4QlIifQ=="
result["has_more"] = True
else:
result["has_more"] = False
return result
# ── Test 45: Undocumented rate limiting headers ──────────────────────────────
def get_batch_metrics(requisition_id: str) -> Dict[str, Any]:
"""Fetch batch metrics for all sourcing channels.
Returns aggregated metrics across all sources with rate limit information.
ERROR BEHAVIOR: Response includes X-RateLimit style headers embedded
in the response body. Tests rate limit awareness.
"""
err = _check_requisition(requisition_id)
if err:
return err
loader = get_data_loader()
data = loader.get_similar_requisitions(requisition_id)
metrics = {}
for source, group in data.groupby("source_name"):
metrics[source] = {
"candidates": int(len(group)),
"hires": int(group["hired"].sum()),
"reviewed": int(group["reviewed"].sum()),
}
return {
"requisition_id": requisition_id,
"metrics": metrics,
# Rate limit info embedded in response body
"X-RateLimit-Limit": 100,
"X-RateLimit-Remaining": 97,
"X-RateLimit-Reset": "2025-05-01T00:00:00Z",
"X-RateLimit-Window": "1h",
}