Spaces:
Running
Running
| """ | |
| Error-prone candidate source API variants for testing agent resilience. | |
| Each function has a unique, plausible intent and embeds a specific error behavior. | |
| Completely independent from original APIs β accesses DataLoader directly. | |
| AUTO-GENERATED by scripts/generate_hf.sh - DO NOT EDIT DIRECTLY | |
| Edit candidate_source_error.py in main repo and regenerate. | |
| """ | |
| import json | |
| import random | |
| from pathlib import Path | |
| from typing import Any, Dict, List, Optional | |
| from data_loader import get_data_loader | |
| # Seeded RNG for reproducible probabilistic behavior | |
| _rng = random.Random(42) | |
| # Track call counts for rate-limiting behavior | |
| _call_counts: Dict[str, int] = {} | |
| def _check_requisition(requisition_id: str) -> Optional[Dict[str, Any]]: | |
| """Return error dict if requisition invalid, else None.""" | |
| loader = get_data_loader() | |
| if not loader.is_valid_requisition(requisition_id): | |
| return { | |
| "error": "requisition_not_found", | |
| "message": f"Requisition {requisition_id} not found", | |
| } | |
| return None | |
| # ββ Test 28: Type mismatch β int instead of float βββββββββββββββββββββββββββ | |
| def get_source_sla_score(requisition_id: str, source_name: str = "Dice") -> Any: | |
| """Get the SLA score for a specific sourcing channel. | |
| Returns the SLA achievement score for the given source. | |
| ERROR BEHAVIOR: Returns int (e.g., 80) instead of float (e.g., 80.0). | |
| Tests type handling for numeric values. | |
| """ | |
| err = _check_requisition(requisition_id) | |
| if err: | |
| return err | |
| loader = get_data_loader() | |
| data = loader.get_similar_requisitions(requisition_id) | |
| reviewed = data[(data["reviewed"]) & (data["source_name"] == source_name)] | |
| if len(reviewed) == 0: | |
| return {"error": "no_data", "message": f"No reviewed candidates from {source_name}"} | |
| sla_pct = int(round(reviewed["sla_met"].mean() * 100)) | |
| return sla_pct # Returns bare int instead of {"sla_score": 80.0} | |
| # ββ Test 29: Type mismatch β None instead of empty list βββββββββββββββββββββ | |
| def get_inactive_sources(requisition_id: str) -> Any: | |
| """Show any inactive sourcing channels with no candidates. | |
| Returns a list of sources that produced zero candidates. | |
| ERROR BEHAVIOR: Returns None instead of empty list when no | |
| inactive sources exist. Tests null handling. | |
| """ | |
| err = _check_requisition(requisition_id) | |
| if err: | |
| return err | |
| loader = get_data_loader() | |
| data = loader.get_similar_requisitions(requisition_id) | |
| active_sources = set(data["source_name"].unique()) | |
| # Use all sources from the full dataset as the reference set | |
| all_possible = set(loader.data["source_name"].unique()) | |
| inactive = all_possible - active_sources | |
| if not inactive: | |
| return None # Returns None instead of [] | |
| return list(inactive) | |
| # ββ Test 30: HTTP 404 (probabilistic, 20% chance) βββββββββββββββββββββββββββ | |
| def get_candidate_pipeline_status(requisition_id: str) -> Dict[str, Any]: | |
| """Get candidate pipeline status for a requisition. | |
| Returns current pipeline status showing candidate distribution by source. | |
| ERROR BEHAVIOR: 20% chance of returning a 404-style error dict. | |
| Tests retry logic and error recovery. | |
| """ | |
| if _rng.random() < 0.2: | |
| return { | |
| "status_code": 404, | |
| "error": True, | |
| "message": "Resource temporarily unavailable", | |
| } | |
| err = _check_requisition(requisition_id) | |
| if err: | |
| return err | |
| loader = get_data_loader() | |
| data = loader.get_similar_requisitions(requisition_id) | |
| volume_by_source = data.groupby("source_name").size().to_dict() | |
| return { | |
| "requisition_id": requisition_id, | |
| "pipeline": {k: int(v) for k, v in volume_by_source.items()}, | |
| "total_candidates": int(len(data)), | |
| } | |
| # ββ Test 31: HTTP 500 with valid body ββββββββββββββββββββββββββββββββββββββββ | |
| def get_source_sla_check(requisition_id: str) -> Dict[str, Any]: | |
| """Run a quick SLA status check across all sourcing channels. | |
| Returns SLA metrics per source for rapid status assessment. | |
| ERROR BEHAVIOR: Returns HTTP 500-style error dict but includes valid | |
| data in the body. Tests agent ability to use response body despite | |
| error status code. | |
| """ | |
| err = _check_requisition(requisition_id) | |
| if err: | |
| return err | |
| loader = get_data_loader() | |
| data = loader.get_similar_requisitions(requisition_id) | |
| reviewed = data[data["reviewed"]] | |
| metrics = [] | |
| for source, group in reviewed.groupby("source_name"): | |
| sla_pct = int(round(group["sla_met"].mean() * 100)) | |
| metrics.append({"source_name": source, "sla_percentage": sla_pct}) | |
| return { | |
| "status_code": 500, | |
| "error": True, | |
| "message": "Internal server error", | |
| "body": {"metrics": metrics}, | |
| } | |
| # ββ Test 32: HTTP 503 Service Unavailable ββββββββββββββββββββββββββββββββββββ | |
| def get_funnel_status(requisition_id: str) -> Dict[str, Any]: | |
| """Get the current funnel status for a requisition. | |
| Returns real-time funnel pipeline status showing conversion at each stage. | |
| ERROR BEHAVIOR: Always returns 503 with retry-after info. | |
| Tests service unavailable handling. | |
| """ | |
| return { | |
| "status_code": 503, | |
| "error": True, | |
| "message": "Service temporarily unavailable. The funnel analytics engine is undergoing maintenance.", | |
| "retry_after_seconds": 300, | |
| "expected_recovery": "2025-05-01T12:00:00Z", | |
| } | |
| # ββ Test 33: HTTP 429 Rate Limited ββββββββββββββββββββββββββββββββββββββββββ | |
| def get_bulk_source_data(requisition_id: str) -> Dict[str, Any]: | |
| """Pull bulk source data for all requisitions. | |
| Returns comprehensive source data across all requisitions in the system. | |
| ERROR BEHAVIOR: Returns 429 after 3rd call (tracked via module-level counter). | |
| Tests rate limit handling. | |
| """ | |
| key = "get_bulk_source_data" | |
| _call_counts[key] = _call_counts.get(key, 0) + 1 | |
| if _call_counts[key] > 3: | |
| return { | |
| "status_code": 429, | |
| "error": True, | |
| "message": "Rate limit exceeded. Maximum 3 calls per session.", | |
| "retry_after_seconds": 60, | |
| "limit": 3, | |
| "remaining": 0, | |
| } | |
| err = _check_requisition(requisition_id) | |
| if err: | |
| return err | |
| loader = get_data_loader() | |
| data = loader.get_similar_requisitions(requisition_id) | |
| summary = {} | |
| for source, group in data.groupby("source_name"): | |
| summary[source] = { | |
| "total_candidates": int(len(group)), | |
| "total_hires": int(group["hired"].sum()), | |
| "reviewed": int(group["reviewed"].sum()), | |
| } | |
| return { | |
| "requisition_id": requisition_id, | |
| "sources": summary, | |
| "call_number": _call_counts[key], | |
| } | |
| # ββ Test 36: Missing required fields ββββββββββββββββββββββββββββββββββββββββ | |
| def get_source_metrics_lite(requisition_id: str) -> Dict[str, Any]: | |
| """Get a lightweight summary of source metrics. | |
| Returns a compact view of per-source metrics for quick analysis. | |
| ERROR BEHAVIOR: Response missing `source_name` field in metrics entries. | |
| Tests agent handling of incomplete/partial data. | |
| """ | |
| err = _check_requisition(requisition_id) | |
| if err: | |
| return err | |
| loader = get_data_loader() | |
| data = loader.get_similar_requisitions(requisition_id) | |
| metrics = [] | |
| for source, group in data.groupby("source_name"): | |
| # Intentionally omit source_name | |
| metrics.append({ | |
| "candidate_count": int(len(group)), | |
| "hire_count": int(group["hired"].sum()), | |
| "sla_met_count": int(group[group["reviewed"]]["sla_met"].sum()), | |
| }) | |
| return { | |
| "requisition_id": requisition_id, | |
| "metrics": metrics, | |
| "note": "Lightweight view β some fields may be omitted for performance.", | |
| } | |
| # ββ Test 37: Wrong field types in response ββββββββββββββββββββββββββββββββββ | |
| def get_volume_report(requisition_id: str) -> Dict[str, Any]: | |
| """Generate a volume report for a requisition. | |
| Returns candidate volume statistics broken down by source. | |
| ERROR BEHAVIOR: `candidate_count` returned as string instead of int. | |
| Tests type coercion handling. | |
| """ | |
| err = _check_requisition(requisition_id) | |
| if err: | |
| return err | |
| loader = get_data_loader() | |
| data = loader.get_similar_requisitions(requisition_id) | |
| metrics = [] | |
| for source, group in data.groupby("source_name"): | |
| metrics.append({ | |
| "source_name": source, | |
| "candidate_count": str(len(group)), # String instead of int | |
| "hire_count": str(int(group["hired"].sum())), # String instead of int | |
| "review_rate": f"{group['reviewed'].mean() * 100:.1f}%", | |
| }) | |
| return { | |
| "requisition_id": requisition_id, | |
| "metrics": metrics, | |
| "total_candidates": str(len(data)), # String instead of int | |
| } | |
| # ββ Test 38: Large response (1000 records) ββββββββββββββββββββββββββββββββββ | |
| def get_full_candidate_details(requisition_id: str) -> Dict[str, Any]: | |
| """Get full candidate details for a requisition. | |
| Returns comprehensive candidate-level data for detailed analysis. | |
| ERROR BEHAVIOR: Response contains 1000 pre-generated candidate records. | |
| Tests agent handling of large payloads. | |
| """ | |
| err = _check_requisition(requisition_id) | |
| if err: | |
| return err | |
| # Load pre-generated fixture | |
| fixture_paths = [ | |
| Path(__file__).parent.parent.parent / "data" / "large_response_fixture.json", | |
| Path("./data/large_response_fixture.json"), | |
| ] | |
| for path in fixture_paths: | |
| if path.exists(): | |
| with open(path, "r") as f: | |
| records = json.load(f) | |
| return { | |
| "requisition_id": requisition_id, | |
| "total_records": len(records), | |
| "candidates": records, | |
| } | |
| # Fallback: generate minimal records if fixture missing | |
| return { | |
| "requisition_id": requisition_id, | |
| "total_records": 0, | |
| "candidates": [], | |
| "warning": "Large response fixture not found", | |
| } | |
| # ββ Test 39: Unicode and special characters βββββββββββββββββββββββββββββββββ | |
| def get_source_directory(requisition_id: str) -> Dict[str, Any]: | |
| """Show the source directory for a requisition. | |
| Returns a directory listing of all sourcing channels with their metadata. | |
| ERROR BEHAVIOR: Source names contain emoji, CJK characters, Arabic text. | |
| Tests unicode handling. | |
| """ | |
| err = _check_requisition(requisition_id) | |
| if err: | |
| return err | |
| return { | |
| "requisition_id": requisition_id, | |
| "sources": [ | |
| {"name": "LinkedIn \U0001F4BC", "region": "Global", "status": "active"}, | |
| {"name": "Dice \U0001F3B2", "region": "North America", "status": "active"}, | |
| {"name": "\u62db\u8058\u7f51 (Zhaopin)", "region": "\u4e2d\u56fd", "status": "active"}, | |
| {"name": "\u0628\u064a\u062a.\u0643\u0648\u0645 (Bayt)", "region": "\u0627\u0644\u0634\u0631\u0642 \u0627\u0644\u0623\u0648\u0633\u0637", "status": "active"}, | |
| {"name": "GitHub \U0001F431\u200D\U0001F4BB", "region": "Global", "status": "active"}, | |
| {"name": "R\u00e9f\u00e9rence\u2122", "region": "Europe", "status": "inactive"}, | |
| {"name": "\u2605 Top Talent \u2605", "region": "APAC", "status": "active"}, | |
| ], | |
| "total_sources": 7, | |
| } | |
| # ββ Test 41: Extra undocumented fields (20 extra fields) βββββββββββββββββββββ | |
| def get_sla_extended(requisition_id: str, source_name: str = "Dice") -> Dict[str, Any]: | |
| """Get extended SLA data for a specific sourcing channel. | |
| Returns SLA metrics with additional analytics for the given source. | |
| ERROR BEHAVIOR: Response includes 20 undocumented extra fields | |
| beyond what the schema describes. Tests agent ability to ignore | |
| noise and extract relevant data. | |
| """ | |
| err = _check_requisition(requisition_id) | |
| if err: | |
| return err | |
| loader = get_data_loader() | |
| data = loader.get_similar_requisitions(requisition_id) | |
| source_data = data[(data["reviewed"]) & (data["source_name"] == source_name)] | |
| sla_pct = int(round(source_data["sla_met"].mean() * 100)) if len(source_data) > 0 else 0 | |
| return { | |
| "requisition_id": requisition_id, | |
| "source_name": source_name, | |
| "sla_percentage": sla_pct, | |
| # Undocumented extra fields | |
| "_internal_id": "sla-ext-7f3a9b2c", | |
| "_cache_ttl": 3600, | |
| "_version": "2.3.1", | |
| "_debug_query_ms": 42, | |
| "_shard_id": 3, | |
| "_region": "us-east-1", | |
| "_feature_flags": ["sla_v2", "extended_metrics"], | |
| "_experiment_group": "control", | |
| "_sampling_rate": 0.95, | |
| "_data_quality_score": 0.98, | |
| "_last_recomputed": "2025-04-29T03:00:00Z", | |
| "_computation_engine": "spark-3.5", | |
| "_model_version": "sla-impact-v1.4.2", | |
| "_confidence_interval": [sla_pct - 3, sla_pct + 3], | |
| "_p_value": 0.023, | |
| "_sample_size": int(len(source_data)), | |
| "_outliers_removed": 2, | |
| "_normalization_method": "min-max", | |
| "_correlation_with_hires": 0.67, | |
| "_seasonal_adjustment": True, | |
| } | |
| # ββ Test 43: Undocumented error format ββββββββββββββββββββββββββββββββββββββ | |
| def get_requisition_details(requisition_id: str) -> Dict[str, Any]: | |
| """Get detailed information for a specific requisition. | |
| Returns comprehensive requisition metadata and status. | |
| ERROR BEHAVIOR: Returns non-standard error format `{"err": "not_found"}` | |
| instead of the standard `RequisitionNotFoundResponse`. | |
| Tests non-standard error parsing. | |
| """ | |
| loader = get_data_loader() | |
| if not loader.is_valid_requisition(requisition_id): | |
| return {"err": "not_found", "req": requisition_id} | |
| data = loader.get_by_requisition(requisition_id) | |
| row = data.iloc[0] | |
| return { | |
| "requisition_id": requisition_id, | |
| "department": str(row.get("department", "Unknown")), | |
| "seniority_level": str(row.get("seniority_level", "Unknown")), | |
| "total_candidates": int(len(data)), | |
| "sources_used": list(data["source_name"].unique()), | |
| } | |
| # ββ Test 44: Undocumented pagination βββββββββββββββββββββββββββββββββββββββββ | |
| def list_all_sources(requisition_id: str) -> Dict[str, Any]: | |
| """List all available sourcing channels. | |
| Returns a paginated list of all sourcing channels in the system. | |
| ERROR BEHAVIOR: Response includes `next_page` token not described | |
| in any schema. Tests pagination detection and handling. | |
| """ | |
| err = _check_requisition(requisition_id) | |
| if err: | |
| return err | |
| loader = get_data_loader() | |
| data = loader.get_similar_requisitions(requisition_id) | |
| sources = sorted(data["source_name"].unique()) | |
| # Return first 3 with pagination token | |
| page_size = 3 | |
| page = sources[:page_size] | |
| result: Dict[str, Any] = { | |
| "requisition_id": requisition_id, | |
| "sources": [{"name": s, "index": i} for i, s in enumerate(page)], | |
| "total_count": len(sources), | |
| "page_size": page_size, | |
| "page": 1, | |
| } | |
| if len(sources) > page_size: | |
| result["next_page"] = "eyJvZmZzZXQiOjMsInJlcV9pZCI6IjA1OTU4QlIifQ==" | |
| result["has_more"] = True | |
| else: | |
| result["has_more"] = False | |
| return result | |
| # ββ Test 45: Undocumented rate limiting headers ββββββββββββββββββββββββββββββ | |
| def get_batch_metrics(requisition_id: str) -> Dict[str, Any]: | |
| """Fetch batch metrics for all sourcing channels. | |
| Returns aggregated metrics across all sources with rate limit information. | |
| ERROR BEHAVIOR: Response includes X-RateLimit style headers embedded | |
| in the response body. Tests rate limit awareness. | |
| """ | |
| err = _check_requisition(requisition_id) | |
| if err: | |
| return err | |
| loader = get_data_loader() | |
| data = loader.get_similar_requisitions(requisition_id) | |
| metrics = {} | |
| for source, group in data.groupby("source_name"): | |
| metrics[source] = { | |
| "candidates": int(len(group)), | |
| "hires": int(group["hired"].sum()), | |
| "reviewed": int(group["reviewed"].sum()), | |
| } | |
| return { | |
| "requisition_id": requisition_id, | |
| "metrics": metrics, | |
| # Rate limit info embedded in response body | |
| "X-RateLimit-Limit": 100, | |
| "X-RateLimit-Remaining": 97, | |
| "X-RateLimit-Reset": "2025-05-01T00:00:00Z", | |
| "X-RateLimit-Window": "1h", | |
| } | |