BPO-Bench / api_skills_error.py
haroldshipibm's picture
Upload folder using huggingface_hub
d075a5b verified
"""
Error-prone skills API variants for testing agent resilience.
Each function has a unique, plausible intent and embeds a specific error behavior.
Completely independent from original APIs β€” accesses DataLoader directly.
AUTO-GENERATED by scripts/generate_hf.sh - DO NOT EDIT DIRECTLY
Edit skills_error.py in main repo and regenerate.
"""
import json
import random
from typing import Any, Dict, Optional
from data_loader import get_data_loader
# Seeded RNG for reproducible probabilistic behavior
_rng = random.Random(42)
def _check_requisition(requisition_id: str) -> Optional[Dict[str, Any]]:
"""Return error dict if requisition invalid, else None."""
loader = get_data_loader()
if not loader.is_valid_requisition(requisition_id):
return {
"error": "requisition_not_found",
"message": f"Requisition {requisition_id} not found",
}
return None
# ── Test 27: Type mismatch β€” string instead of structured list ──────────────
def get_skill_summary(requisition_id: str) -> str:
"""Get a quick text summary of skills needed for a requisition.
Returns a concise comma-separated skill overview.
ERROR BEHAVIOR: Returns a plain comma-separated string instead of
structured SkillAnalysisResponse. Tests type mismatch handling.
"""
err = _check_requisition(requisition_id)
if err:
return json.dumps(err)
loader = get_data_loader()
data = loader.get_similar_requisitions(requisition_id)
all_skills: set = set()
for skills_list in data["skills_parsed"].dropna():
if isinstance(skills_list, list):
all_skills.update(skills_list)
return ", ".join(sorted(all_skills))
# ── Test 34: Missing output schema β€” untyped dict ───────────────────────────
def get_model_registry(requisition_id: str) -> Dict[str, Any]:
"""Check which ML models are registered for a given requisition.
Returns model registry information including versions and status.
ERROR BEHAVIOR: No Pydantic output schema β€” returns a plain dict
with dynamically typed fields. Tests schema inference.
"""
err = _check_requisition(requisition_id)
if err:
return err
return {
"requisition_id": requisition_id,
"models": [
{
"name": "Skill relevance classifier",
"version": "2.1.0",
"status": "active",
"last_trained": "2024-11-15",
"accuracy": 0.87,
},
{
"name": "SLA impact regression model",
"version": "1.4.2",
"status": "active",
"last_trained": "2024-10-01",
"r_squared": 0.72,
},
{
"name": "Funnel conversion recommender",
"version": "3.0.0-beta",
"status": "staging",
"last_trained": "2025-01-20",
"precision": 0.81,
},
],
"registry_updated": "2025-04-29",
}
# ── Test 35: Missing input schema β€” undocumented params ─────────────────────
def get_skill_lookup(requisition_id: str, skill_name: str = None,
include_history: bool = False,
format: str = "json") -> Dict[str, Any]:
"""Look up a specific skill and its metrics for a requisition.
ERROR BEHAVIOR: Accepts undocumented parameters (include_history, format)
not described in the tool schema. Tests agent handling of extra params.
"""
err = _check_requisition(requisition_id)
if err:
return err
loader = get_data_loader()
data = loader.get_similar_requisitions(requisition_id)
# Find skill occurrence
total = 0
for skills_list in data["skills_parsed"].dropna():
if isinstance(skills_list, list) and skill_name in skills_list:
total += 1
result = {
"requisition_id": requisition_id,
"skill_name": skill_name,
"occurrence_count": total,
"total_candidates": len(data),
"occurrence_rate": round(total / len(data) * 100, 1) if len(data) > 0 else 0,
}
if include_history:
result["history"] = {
"first_seen": "2023-10-09",
"trend": "stable",
"quarterly_counts": [total // 4] * 4,
}
return result
# ── Test 40: Deeply nested JSON (15 levels) ─────────────────────────────────
def get_skill_deep_analysis(requisition_id: str) -> Dict[str, Any]:
"""Get a deep analysis breakdown of skills with detailed sub-categories.
Returns comprehensive multi-level skill categorization and metrics.
ERROR BEHAVIOR: Response is nested 15 levels deep.
Tests agent ability to navigate deeply nested structures.
"""
err = _check_requisition(requisition_id)
if err:
return err
loader = get_data_loader()
data = loader.get_similar_requisitions(requisition_id)
# Collect top skills
all_skills: list = []
for skills_list in data["skills_parsed"].dropna():
if isinstance(skills_list, list):
all_skills.extend(skills_list)
from collections import Counter
skill_counts = Counter(all_skills)
top_skills = skill_counts.most_common(5)
# Build deeply nested structure (15 levels)
def nest(depth: int, skill_name: str, count: int) -> Dict[str, Any]:
if depth <= 0:
return {"skill": skill_name, "count": count}
return {
"level": depth,
"metadata": {"type": f"analysis_layer_{depth}"},
"data": nest(depth - 1, skill_name, count),
}
skills_nested = [
nest(15, name, count) for name, count in top_skills
]
return {
"requisition_id": requisition_id,
"analysis_version": "3.0",
"results": {
"nested_skills": skills_nested,
"total_depth": 15,
},
}
# ── Test 42: Input schema mismatch β€” expects skill_id but docs say skill_name
def analyze_skill_match(requisition_id: str, skill_id: str) -> Dict[str, Any]:
"""Check if a skill is a good match for a requisition.
Args:
requisition_id: The job requisition ID.
skill_id: The skill identifier to check.
ERROR BEHAVIOR: Function signature says `skill_id` but tool description
and documentation say `skill_name`. Tests agent adaptation to mismatched
parameter names.
"""
err = _check_requisition(requisition_id)
if err:
return err
# Treat skill_id as skill_name (the mismatch)
skill_name = skill_id
loader = get_data_loader()
data = loader.get_similar_requisitions(requisition_id)
reviewed = data[data["reviewed"]]
has_skill = reviewed[reviewed["skills_parsed"].apply(lambda x: skill_name in x)]
no_skill = reviewed[reviewed["skills_parsed"].apply(lambda x: skill_name not in x)]
sla_with = round(has_skill["sla_met"].mean() * 100) if len(has_skill) > 0 else 0
sla_without = round(no_skill["sla_met"].mean() * 100) if len(no_skill) > 0 else 0
total_with_skill = sum(
1 for sl in data["skills_parsed"].dropna()
if isinstance(sl, list) and skill_name in sl
)
match_score = min(100, int(
(total_with_skill / len(data) * 50 if len(data) > 0 else 0)
+ (max(0, sla_with - sla_without))
))
return {
"requisition_id": requisition_id,
"skill_id": skill_name,
"match_score": match_score,
"sla_delta": sla_with - sla_without,
"occurrence_rate": round(total_with_skill / len(data) * 100, 1) if len(data) > 0 else 0,
"recommendation": "good match" if match_score >= 50 else "weak match",
}