Spaces:

ibm-research
/

BPO-Bench

Running

File size: 17,565 Bytes

d075a5b

"""
Error-prone candidate source API variants for testing agent resilience.

Each function has a unique, plausible intent and embeds a specific error behavior.
Completely independent from original APIs — accesses DataLoader directly.

AUTO-GENERATED by scripts/generate_hf.sh - DO NOT EDIT DIRECTLY
Edit candidate_source_error.py in main repo and regenerate.
"""

import json
import random
from pathlib import Path
from typing import Any, Dict, List, Optional

from data_loader import get_data_loader

# Seeded RNG for reproducible probabilistic behavior
_rng = random.Random(42)

# Track call counts for rate-limiting behavior
_call_counts: Dict[str, int] = {}


def _check_requisition(requisition_id: str) -> Optional[Dict[str, Any]]:
    """Return error dict if requisition invalid, else None."""
    loader = get_data_loader()
    if not loader.is_valid_requisition(requisition_id):
        return {
            "error": "requisition_not_found",
            "message": f"Requisition {requisition_id} not found",
        }
    return None


# ── Test 28: Type mismatch — int instead of float ───────────────────────────

def get_source_sla_score(requisition_id: str, source_name: str = "Dice") -> Any:
    """Get the SLA score for a specific sourcing channel.

    Returns the SLA achievement score for the given source.

    ERROR BEHAVIOR: Returns int (e.g., 80) instead of float (e.g., 80.0).
    Tests type handling for numeric values.
    """
    err = _check_requisition(requisition_id)
    if err:
        return err

    loader = get_data_loader()
    data = loader.get_similar_requisitions(requisition_id)
    reviewed = data[(data["reviewed"]) & (data["source_name"] == source_name)]

    if len(reviewed) == 0:
        return {"error": "no_data", "message": f"No reviewed candidates from {source_name}"}

    sla_pct = int(round(reviewed["sla_met"].mean() * 100))
    return sla_pct  # Returns bare int instead of {"sla_score": 80.0}


# ── Test 29: Type mismatch — None instead of empty list ─────────────────────

def get_inactive_sources(requisition_id: str) -> Any:
    """Show any inactive sourcing channels with no candidates.

    Returns a list of sources that produced zero candidates.

    ERROR BEHAVIOR: Returns None instead of empty list when no
    inactive sources exist. Tests null handling.
    """
    err = _check_requisition(requisition_id)
    if err:
        return err

    loader = get_data_loader()
    data = loader.get_similar_requisitions(requisition_id)
    active_sources = set(data["source_name"].unique())

    # Use all sources from the full dataset as the reference set
    all_possible = set(loader.data["source_name"].unique())
    inactive = all_possible - active_sources

    if not inactive:
        return None  # Returns None instead of []

    return list(inactive)


# ── Test 30: HTTP 404 (probabilistic, 20% chance) ───────────────────────────

def get_candidate_pipeline_status(requisition_id: str) -> Dict[str, Any]:
    """Get candidate pipeline status for a requisition.

    Returns current pipeline status showing candidate distribution by source.

    ERROR BEHAVIOR: 20% chance of returning a 404-style error dict.
    Tests retry logic and error recovery.
    """
    if _rng.random() < 0.2:
        return {
            "status_code": 404,
            "error": True,
            "message": "Resource temporarily unavailable",
        }

    err = _check_requisition(requisition_id)
    if err:
        return err

    loader = get_data_loader()
    data = loader.get_similar_requisitions(requisition_id)
    volume_by_source = data.groupby("source_name").size().to_dict()

    return {
        "requisition_id": requisition_id,
        "pipeline": {k: int(v) for k, v in volume_by_source.items()},
        "total_candidates": int(len(data)),
    }


# ── Test 31: HTTP 500 with valid body ────────────────────────────────────────

def get_source_sla_check(requisition_id: str) -> Dict[str, Any]:
    """Run a quick SLA status check across all sourcing channels.

    Returns SLA metrics per source for rapid status assessment.

    ERROR BEHAVIOR: Returns HTTP 500-style error dict but includes valid
    data in the body. Tests agent ability to use response body despite
    error status code.
    """
    err = _check_requisition(requisition_id)
    if err:
        return err

    loader = get_data_loader()
    data = loader.get_similar_requisitions(requisition_id)
    reviewed = data[data["reviewed"]]
    metrics = []
    for source, group in reviewed.groupby("source_name"):
        sla_pct = int(round(group["sla_met"].mean() * 100))
        metrics.append({"source_name": source, "sla_percentage": sla_pct})

    return {
        "status_code": 500,
        "error": True,
        "message": "Internal server error",
        "body": {"metrics": metrics},
    }


# ── Test 32: HTTP 503 Service Unavailable ────────────────────────────────────

def get_funnel_status(requisition_id: str) -> Dict[str, Any]:
    """Get the current funnel status for a requisition.

    Returns real-time funnel pipeline status showing conversion at each stage.

    ERROR BEHAVIOR: Always returns 503 with retry-after info.
    Tests service unavailable handling.
    """
    return {
        "status_code": 503,
        "error": True,
        "message": "Service temporarily unavailable. The funnel analytics engine is undergoing maintenance.",
        "retry_after_seconds": 300,
        "expected_recovery": "2025-05-01T12:00:00Z",
    }


# ── Test 33: HTTP 429 Rate Limited ──────────────────────────────────────────

def get_bulk_source_data(requisition_id: str) -> Dict[str, Any]:
    """Pull bulk source data for all requisitions.

    Returns comprehensive source data across all requisitions in the system.

    ERROR BEHAVIOR: Returns 429 after 3rd call (tracked via module-level counter).
    Tests rate limit handling.
    """
    key = "get_bulk_source_data"
    _call_counts[key] = _call_counts.get(key, 0) + 1

    if _call_counts[key] > 3:
        return {
            "status_code": 429,
            "error": True,
            "message": "Rate limit exceeded. Maximum 3 calls per session.",
            "retry_after_seconds": 60,
            "limit": 3,
            "remaining": 0,
        }

    err = _check_requisition(requisition_id)
    if err:
        return err

    loader = get_data_loader()
    data = loader.get_similar_requisitions(requisition_id)
    summary = {}
    for source, group in data.groupby("source_name"):
        summary[source] = {
            "total_candidates": int(len(group)),
            "total_hires": int(group["hired"].sum()),
            "reviewed": int(group["reviewed"].sum()),
        }

    return {
        "requisition_id": requisition_id,
        "sources": summary,
        "call_number": _call_counts[key],
    }


# ── Test 36: Missing required fields ────────────────────────────────────────

def get_source_metrics_lite(requisition_id: str) -> Dict[str, Any]:
    """Get a lightweight summary of source metrics.

    Returns a compact view of per-source metrics for quick analysis.

    ERROR BEHAVIOR: Response missing `source_name` field in metrics entries.
    Tests agent handling of incomplete/partial data.
    """
    err = _check_requisition(requisition_id)
    if err:
        return err

    loader = get_data_loader()
    data = loader.get_similar_requisitions(requisition_id)
    metrics = []
    for source, group in data.groupby("source_name"):
        # Intentionally omit source_name
        metrics.append({
            "candidate_count": int(len(group)),
            "hire_count": int(group["hired"].sum()),
            "sla_met_count": int(group[group["reviewed"]]["sla_met"].sum()),
        })

    return {
        "requisition_id": requisition_id,
        "metrics": metrics,
        "note": "Lightweight view — some fields may be omitted for performance.",
    }


# ── Test 37: Wrong field types in response ──────────────────────────────────

def get_volume_report(requisition_id: str) -> Dict[str, Any]:
    """Generate a volume report for a requisition.

    Returns candidate volume statistics broken down by source.

    ERROR BEHAVIOR: `candidate_count` returned as string instead of int.
    Tests type coercion handling.
    """
    err = _check_requisition(requisition_id)
    if err:
        return err

    loader = get_data_loader()
    data = loader.get_similar_requisitions(requisition_id)
    metrics = []
    for source, group in data.groupby("source_name"):
        metrics.append({
            "source_name": source,
            "candidate_count": str(len(group)),  # String instead of int
            "hire_count": str(int(group["hired"].sum())),  # String instead of int
            "review_rate": f"{group['reviewed'].mean() * 100:.1f}%",
        })

    return {
        "requisition_id": requisition_id,
        "metrics": metrics,
        "total_candidates": str(len(data)),  # String instead of int
    }


# ── Test 38: Large response (1000 records) ──────────────────────────────────

def get_full_candidate_details(requisition_id: str) -> Dict[str, Any]:
    """Get full candidate details for a requisition.

    Returns comprehensive candidate-level data for detailed analysis.

    ERROR BEHAVIOR: Response contains 1000 pre-generated candidate records.
    Tests agent handling of large payloads.
    """
    err = _check_requisition(requisition_id)
    if err:
        return err

    # Load pre-generated fixture
    fixture_paths = [
        Path(__file__).parent.parent.parent / "data" / "large_response_fixture.json",
        Path("./data/large_response_fixture.json"),
    ]

    for path in fixture_paths:
        if path.exists():
            with open(path, "r") as f:
                records = json.load(f)
            return {
                "requisition_id": requisition_id,
                "total_records": len(records),
                "candidates": records,
            }

    # Fallback: generate minimal records if fixture missing
    return {
        "requisition_id": requisition_id,
        "total_records": 0,
        "candidates": [],
        "warning": "Large response fixture not found",
    }


# ── Test 39: Unicode and special characters ─────────────────────────────────

def get_source_directory(requisition_id: str) -> Dict[str, Any]:
    """Show the source directory for a requisition.

    Returns a directory listing of all sourcing channels with their metadata.

    ERROR BEHAVIOR: Source names contain emoji, CJK characters, Arabic text.
    Tests unicode handling.
    """
    err = _check_requisition(requisition_id)
    if err:
        return err

    return {
        "requisition_id": requisition_id,
        "sources": [
            {"name": "LinkedIn \U0001F4BC", "region": "Global", "status": "active"},
            {"name": "Dice \U0001F3B2", "region": "North America", "status": "active"},
            {"name": "\u62db\u8058\u7f51 (Zhaopin)", "region": "\u4e2d\u56fd", "status": "active"},
            {"name": "\u0628\u064a\u062a.\u0643\u0648\u0645 (Bayt)", "region": "\u0627\u0644\u0634\u0631\u0642 \u0627\u0644\u0623\u0648\u0633\u0637", "status": "active"},
            {"name": "GitHub \U0001F431\u200D\U0001F4BB", "region": "Global", "status": "active"},
            {"name": "R\u00e9f\u00e9rence\u2122", "region": "Europe", "status": "inactive"},
            {"name": "\u2605 Top Talent \u2605", "region": "APAC", "status": "active"},
        ],
        "total_sources": 7,
    }


# ── Test 41: Extra undocumented fields (20 extra fields) ─────────────────────

def get_sla_extended(requisition_id: str, source_name: str = "Dice") -> Dict[str, Any]:
    """Get extended SLA data for a specific sourcing channel.

    Returns SLA metrics with additional analytics for the given source.

    ERROR BEHAVIOR: Response includes 20 undocumented extra fields
    beyond what the schema describes. Tests agent ability to ignore
    noise and extract relevant data.
    """
    err = _check_requisition(requisition_id)
    if err:
        return err

    loader = get_data_loader()
    data = loader.get_similar_requisitions(requisition_id)
    source_data = data[(data["reviewed"]) & (data["source_name"] == source_name)]

    sla_pct = int(round(source_data["sla_met"].mean() * 100)) if len(source_data) > 0 else 0

    return {
        "requisition_id": requisition_id,
        "source_name": source_name,
        "sla_percentage": sla_pct,
        # Undocumented extra fields
        "_internal_id": "sla-ext-7f3a9b2c",
        "_cache_ttl": 3600,
        "_version": "2.3.1",
        "_debug_query_ms": 42,
        "_shard_id": 3,
        "_region": "us-east-1",
        "_feature_flags": ["sla_v2", "extended_metrics"],
        "_experiment_group": "control",
        "_sampling_rate": 0.95,
        "_data_quality_score": 0.98,
        "_last_recomputed": "2025-04-29T03:00:00Z",
        "_computation_engine": "spark-3.5",
        "_model_version": "sla-impact-v1.4.2",
        "_confidence_interval": [sla_pct - 3, sla_pct + 3],
        "_p_value": 0.023,
        "_sample_size": int(len(source_data)),
        "_outliers_removed": 2,
        "_normalization_method": "min-max",
        "_correlation_with_hires": 0.67,
        "_seasonal_adjustment": True,
    }


# ── Test 43: Undocumented error format ──────────────────────────────────────

def get_requisition_details(requisition_id: str) -> Dict[str, Any]:
    """Get detailed information for a specific requisition.

    Returns comprehensive requisition metadata and status.

    ERROR BEHAVIOR: Returns non-standard error format `{"err": "not_found"}`
    instead of the standard `RequisitionNotFoundResponse`.
    Tests non-standard error parsing.
    """
    loader = get_data_loader()
    if not loader.is_valid_requisition(requisition_id):
        return {"err": "not_found", "req": requisition_id}

    data = loader.get_by_requisition(requisition_id)
    row = data.iloc[0]

    return {
        "requisition_id": requisition_id,
        "department": str(row.get("department", "Unknown")),
        "seniority_level": str(row.get("seniority_level", "Unknown")),
        "total_candidates": int(len(data)),
        "sources_used": list(data["source_name"].unique()),
    }


# ── Test 44: Undocumented pagination ─────────────────────────────────────────

def list_all_sources(requisition_id: str) -> Dict[str, Any]:
    """List all available sourcing channels.

    Returns a paginated list of all sourcing channels in the system.

    ERROR BEHAVIOR: Response includes `next_page` token not described
    in any schema. Tests pagination detection and handling.
    """
    err = _check_requisition(requisition_id)
    if err:
        return err

    loader = get_data_loader()
    data = loader.get_similar_requisitions(requisition_id)
    sources = sorted(data["source_name"].unique())

    # Return first 3 with pagination token
    page_size = 3
    page = sources[:page_size]

    result: Dict[str, Any] = {
        "requisition_id": requisition_id,
        "sources": [{"name": s, "index": i} for i, s in enumerate(page)],
        "total_count": len(sources),
        "page_size": page_size,
        "page": 1,
    }

    if len(sources) > page_size:
        result["next_page"] = "eyJvZmZzZXQiOjMsInJlcV9pZCI6IjA1OTU4QlIifQ=="
        result["has_more"] = True
    else:
        result["has_more"] = False

    return result


# ── Test 45: Undocumented rate limiting headers ──────────────────────────────

def get_batch_metrics(requisition_id: str) -> Dict[str, Any]:
    """Fetch batch metrics for all sourcing channels.

    Returns aggregated metrics across all sources with rate limit information.

    ERROR BEHAVIOR: Response includes X-RateLimit style headers embedded
    in the response body. Tests rate limit awareness.
    """
    err = _check_requisition(requisition_id)
    if err:
        return err

    loader = get_data_loader()
    data = loader.get_similar_requisitions(requisition_id)

    metrics = {}
    for source, group in data.groupby("source_name"):
        metrics[source] = {
            "candidates": int(len(group)),
            "hires": int(group["hired"].sum()),
            "reviewed": int(group["reviewed"].sum()),
        }

    return {
        "requisition_id": requisition_id,
        "metrics": metrics,
        # Rate limit info embedded in response body
        "X-RateLimit-Limit": 100,
        "X-RateLimit-Remaining": 97,
        "X-RateLimit-Reset": "2025-05-01T00:00:00Z",
        "X-RateLimit-Window": "1h",
    }