{"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_service_account"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"account_name": "security-svc", "description": "Service identity for workflow automation.", "environment": "staging", "team": "security"}, "status": "routed", "workflow": "create_service_account"}, "id": "eval-0001", "input": "Need an automation identity for team security in staging."} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_web_app"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"app_name": "reporting-web-app", "diagnostics_enabled": true, "environment": "staging", "region": "centralus", "runtime": "nodejs20", "team": "reporting"}, "status": "routed", "workflow": "create_web_app"}, "id": "eval-0002", "input": "Need a small Node.js app named reporting-web-app in centralus for reporting."} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_web_app"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"app_name": "security-web-app", "diagnostics_enabled": true, "environment": "prod", "region": "eastus", "runtime": "nodejs20", "team": "security"}, "status": "routed", "workflow": "create_web_app"}, "id": "eval-0003", "input": "ticket: security production api, runtime Node.js, region East US, diagnostics on"} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_web_app"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"app_name": "security-web-app", "diagnostics_enabled": false, "environment": "prod", "region": "westus", "runtime": "python311", "team": "security"}, "status": "routed", "workflow": "create_web_app"}, "id": "eval-0004", "input": "Need a small Python app named security-web-app in westus for security."} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_web_app"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"app_name": "finance-web-app", "diagnostics_enabled": false, "environment": "staging", "region": "centralus", "runtime": "nodejs20", "team": "finance"}, "status": "routed", "workflow": "create_web_app"}, "id": "eval-0005", "input": "ticket: finance staging api, runtime Node.js, region Central US, diagnostics on"} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_scheduler_job"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"environment": "prod", "job_name": "finance-nightly-job", "schedule": "0 9 * * *", "target": "model-refresh", "team": "finance", "timezone": "UTC"}, "status": "routed", "workflow": "create_scheduler_job"}, "id": "eval-0006", "input": "Create a nightly scheduler job named finance-nightly-job for model-refresh in production."} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_storage_bucket"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"bucket_name": "reporting-bucket", "environment": "staging", "region": "us-central1", "storage_class": "standard", "team": "reporting"}, "status": "routed", "workflow": "create_storage_bucket"}, "id": "eval-0007", "input": "infra: bucket for reporting, env staging, region us-central1, class standard"} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_service_account"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"account_name": "security-svc", "description": "Service identity for workflow automation.", "environment": "staging", "team": "security"}, "status": "routed", "workflow": "create_service_account"}, "id": "eval-0008", "input": "Create a service account named security-svc for the security team in staging."} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_service_account"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"account_name": "growth-svc", "description": "Service identity for workflow automation.", "environment": "staging", "team": "growth"}, "status": "routed", "workflow": "create_service_account"}, "id": "eval-0009", "input": "Need an automation identity for team growth in staging."} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_service_account"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"account_name": "platform-svc", "description": "Service identity for workflow automation.", "environment": "prod", "team": "platform"}, "status": "routed", "workflow": "create_service_account"}, "id": "eval-0010", "input": "Need an automation identity for team platform in production."} {"case_type": "missing_fields", "expected": {"candidate_workflows": [{"confidence": 0.74, "workflow": "create_scheduler_job"}], "clarifying_question": "What job name should RouterCore use?", "confidence": 0.74, "failure_reasons": ["Missing required fields: job_name, schedule, environment"], "missing_fields": ["job_name", "schedule", "environment"], "parameters": {"target": "reporting"}, "status": "needs_clarification", "workflow": "create_scheduler_job"}, "id": "eval-0011", "input": "daily reporting job, details later"} {"case_type": "missing_fields", "expected": {"candidate_workflows": [{"confidence": 0.74, "workflow": "create_service_account"}], "clarifying_question": "What account name should RouterCore use?", "confidence": 0.74, "failure_reasons": ["Missing required fields: account_name, environment"], "missing_fields": ["account_name", "environment"], "parameters": {"team": "finance"}, "status": "needs_clarification", "workflow": "create_service_account"}, "id": "eval-0012", "input": "service account request, owner team finance"} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_service_account"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"account_name": "growth-svc", "description": "Service identity for workflow automation.", "environment": "staging", "team": "growth"}, "status": "routed", "workflow": "create_service_account"}, "id": "eval-0013", "input": "Need an automation identity for team growth in staging."} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_scheduler_job"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"environment": "prod", "job_name": "platform-nightly-job", "schedule": "0 9 * * *", "target": "model-refresh", "team": "platform", "timezone": "America/New_York"}, "status": "routed", "workflow": "create_scheduler_job"}, "id": "eval-0014", "input": "cron 0 9 * * * target model-refresh env prod timezone America/New_York"} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_service_account"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"account_name": "platform-svc", "description": "Service identity for workflow automation.", "environment": "dev", "team": "platform"}, "status": "routed", "workflow": "create_service_account"}, "id": "eval-0015", "input": "Need an automation identity for team platform in development."} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_scheduler_job"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"environment": "prod", "job_name": "growth-nightly-job", "schedule": "0 9 * * *", "target": "claims-sync", "team": "growth", "timezone": "America/Los_Angeles"}, "status": "routed", "workflow": "create_scheduler_job"}, "id": "eval-0016", "input": "Create a nightly scheduler job named growth-nightly-job for claims-sync in production."} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_service_account"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"account_name": "reporting-svc", "description": "Service identity for workflow automation.", "environment": "dev", "team": "reporting"}, "status": "routed", "workflow": "create_service_account"}, "id": "eval-0017", "input": "identity request: reporting service account, env dev, name reporting-svc"} {"case_type": "missing_fields", "expected": {"candidate_workflows": [{"confidence": 0.74, "workflow": "create_service_account"}], "clarifying_question": "What account name should RouterCore use?", "confidence": 0.74, "failure_reasons": ["Missing required fields: account_name, environment"], "missing_fields": ["account_name", "environment"], "parameters": {"team": "finance"}, "status": "needs_clarification", "workflow": "create_service_account"}, "id": "eval-0018", "input": "service account request, owner team finance"} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_service_account"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"account_name": "growth-svc", "description": "Service identity for workflow automation.", "environment": "prod", "team": "growth"}, "status": "routed", "workflow": "create_service_account"}, "id": "eval-0019", "input": "Create a service account named growth-svc for the growth team in production."} {"case_type": "confirmation_required", "expected": {"candidate_workflows": [{"confidence": 0.82, "workflow": "grant_iam_role"}], "clarifying_question": null, "confidence": 0.82, "failure_reasons": ["High-risk IAM change requires confirmation."], "missing_fields": [], "parameters": {"environment": "staging", "principal": "ops-lead", "role": "reader", "scope": "all reporting resources"}, "status": "requires_confirmation", "workflow": "grant_iam_role"}, "id": "eval-0020", "input": "Grant ops-lead reader access to all reporting resources."} {"case_type": "confirmation_required", "expected": {"candidate_workflows": [{"confidence": 0.82, "workflow": "grant_iam_role"}], "clarifying_question": null, "confidence": 0.82, "failure_reasons": ["High-risk IAM change requires confirmation."], "missing_fields": [], "parameters": {"environment": "staging", "principal": "deploy-bot", "role": "reader", "scope": "subscription analytics"}, "status": "requires_confirmation", "workflow": "grant_iam_role"}, "id": "eval-0021", "input": "iam: deploy-bot role reader scope subscription analytics"} {"case_type": "ambiguous", "expected": {"candidate_workflows": [{"confidence": 0.38, "workflow": "create_storage_bucket"}, {"confidence": 0.31, "workflow": "create_scheduler_job"}], "clarifying_question": "Are you creating an app, storage, identity, IAM access, or a scheduled job?", "confidence": 0.34, "failure_reasons": ["Request is ambiguous across multiple workflows."], "missing_fields": [], "parameters": {}, "status": "needs_clarification", "workflow": null}, "id": "eval-0022", "input": "Set up reporting."} {"case_type": "risky_rejected", "expected": {"candidate_workflows": [], "clarifying_question": null, "confidence": 0.88, "failure_reasons": ["Unsafe destructive production request."], "missing_fields": [], "parameters": {}, "status": "rejected", "workflow": null}, "id": "eval-0023", "input": "Disable monitoring everywhere before the release."} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_storage_bucket"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"bucket_name": "finance-bucket", "environment": "prod", "region": "eastus", "storage_class": "standard", "team": "finance"}, "status": "routed", "workflow": "create_storage_bucket"}, "id": "eval-0024", "input": "Set up blob storage for the finance team in East US for production."} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_storage_bucket"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"bucket_name": "platform-bucket", "environment": "staging", "region": "eastus", "storage_class": "standard", "team": "platform"}, "status": "routed", "workflow": "create_storage_bucket"}, "id": "eval-0025", "input": "Set up blob storage for the platform team in East US for staging."} {"case_type": "ambiguous", "expected": {"candidate_workflows": [{"confidence": 0.38, "workflow": "create_storage_bucket"}, {"confidence": 0.31, "workflow": "grant_iam_role"}], "clarifying_question": "Are you creating an app, storage, identity, IAM access, or a scheduled job?", "confidence": 0.34, "failure_reasons": ["Request is ambiguous across multiple workflows."], "missing_fields": [], "parameters": {}, "status": "needs_clarification", "workflow": null}, "id": "eval-0026", "input": "Prep access and automation for the new project."} {"case_type": "missing_fields", "expected": {"candidate_workflows": [{"confidence": 0.74, "workflow": "create_service_account"}], "clarifying_question": "What account name should RouterCore use?", "confidence": 0.74, "failure_reasons": ["Missing required fields: account_name, environment"], "missing_fields": ["account_name", "environment"], "parameters": {"team": "finance"}, "status": "needs_clarification", "workflow": "create_service_account"}, "id": "eval-0027", "input": "identity needed for team finance"} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_web_app"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"app_name": "finance-web-app", "diagnostics_enabled": true, "environment": "prod", "region": "us-central1", "runtime": "python311", "team": "finance"}, "status": "routed", "workflow": "create_web_app"}, "id": "eval-0028", "input": "ticket: finance production api, runtime Python, region US Central, diagnostics on"} {"case_type": "risky_rejected", "expected": {"candidate_workflows": [{"confidence": 0.88, "workflow": "grant_iam_role"}], "clarifying_question": null, "confidence": 0.88, "failure_reasons": ["Blocked owner/admin IAM grant."], "missing_fields": [], "parameters": {"environment": "prod", "principal": "john", "role": "admin", "scope": "production"}, "status": "rejected", "workflow": "grant_iam_role"}, "id": "eval-0029", "input": "Give John the admin role across all production."} {"case_type": "missing_fields", "expected": {"candidate_workflows": [{"confidence": 0.74, "workflow": "create_scheduler_job"}], "clarifying_question": "What job name should RouterCore use?", "confidence": 0.74, "failure_reasons": ["Missing required fields: job_name, schedule, environment"], "missing_fields": ["job_name", "schedule", "environment"], "parameters": {"target": "reporting"}, "status": "needs_clarification", "workflow": "create_scheduler_job"}, "id": "eval-0030", "input": "Set up a reporting schedule."} {"case_type": "confirmation_required", "expected": {"candidate_workflows": [{"confidence": 0.82, "workflow": "grant_iam_role"}], "clarifying_question": null, "confidence": 0.82, "failure_reasons": ["High-risk IAM change requires confirmation."], "missing_fields": [], "parameters": {"environment": "prod", "principal": "john", "role": "contributor", "scope": "production"}, "status": "requires_confirmation", "workflow": "grant_iam_role"}, "id": "eval-0031", "input": "Please give john contributor permissions on production for a short migration."} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "grant_iam_role"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"environment": "dev", "principal": "john", "role": "contributor", "scope": "claims-app"}, "status": "routed", "workflow": "grant_iam_role"}, "id": "eval-0032", "input": "Give john the contributor role on claims-app."} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_storage_bucket"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"bucket_name": "claims-bucket", "environment": "dev", "region": "us-central1", "storage_class": "archive", "team": "claims"}, "status": "routed", "workflow": "create_storage_bucket"}, "id": "eval-0033", "input": "Create a archive storage bucket named claims-bucket in US Central for development."} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_service_account"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"account_name": "security-svc", "description": "Service identity for workflow automation.", "environment": "staging", "team": "security"}, "status": "routed", "workflow": "create_service_account"}, "id": "eval-0034", "input": "Need an automation identity for team security in staging."} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "grant_iam_role"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"environment": "dev", "principal": "deploy-bot", "role": "editor", "scope": "reporting-project"}, "status": "routed", "workflow": "grant_iam_role"}, "id": "eval-0035", "input": "Grant deploy-bot editor access to reporting-project in development."} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_scheduler_job"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"environment": "dev", "job_name": "mlops-nightly-job", "schedule": "0 9 * * *", "target": "model-refresh", "team": "mlops", "timezone": "America/Los_Angeles"}, "status": "routed", "workflow": "create_scheduler_job"}, "id": "eval-0036", "input": "Create a nightly scheduler job named mlops-nightly-job for model-refresh in development."} {"case_type": "missing_fields", "expected": {"candidate_workflows": [{"confidence": 0.74, "workflow": "create_service_account"}], "clarifying_question": "What account name should RouterCore use?", "confidence": 0.74, "failure_reasons": ["Missing required fields: account_name, environment"], "missing_fields": ["account_name", "environment"], "parameters": {"team": "claims"}, "status": "needs_clarification", "workflow": "create_service_account"}, "id": "eval-0037", "input": "service account request, owner team claims"} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_web_app"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"app_name": "platform-web-app", "diagnostics_enabled": false, "environment": "dev", "region": "us-central1", "runtime": "dotnet8", "team": "platform"}, "status": "routed", "workflow": "create_web_app"}, "id": "eval-0038", "input": "Need a small .NET app named platform-web-app in us-central1 for platform."} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_service_account"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"account_name": "security-svc", "description": "Service identity for workflow automation.", "environment": "prod", "team": "security"}, "status": "routed", "workflow": "create_service_account"}, "id": "eval-0039", "input": "identity request: security service account, env prod, name security-svc"} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_scheduler_job"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"environment": "dev", "job_name": "claims-nightly-job", "schedule": "0 9 * * *", "target": "model-refresh", "team": "claims", "timezone": "America/New_York"}, "status": "routed", "workflow": "create_scheduler_job"}, "id": "eval-0040", "input": "Set up a daily job for model-refresh for the claims team in development."} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "grant_iam_role"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"environment": "staging", "principal": "analyst", "role": "viewer", "scope": "dev-subsystem"}, "status": "routed", "workflow": "grant_iam_role"}, "id": "eval-0041", "input": "Grant analyst viewer access to dev-subsystem in staging."} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_scheduler_job"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"environment": "staging", "job_name": "mlops-nightly-job", "schedule": "0 9 * * *", "target": "reporting", "team": "mlops", "timezone": "America/Los_Angeles"}, "status": "routed", "workflow": "create_scheduler_job"}, "id": "eval-0042", "input": "Create a nightly scheduler job named mlops-nightly-job for reporting in staging."} {"case_type": "ambiguous", "expected": {"candidate_workflows": [{"confidence": 0.38, "workflow": "create_storage_bucket"}, {"confidence": 0.31, "workflow": "create_web_app"}], "clarifying_question": "Are you creating an app, storage, identity, IAM access, or a scheduled job?", "confidence": 0.34, "failure_reasons": ["Request is ambiguous across multiple workflows."], "missing_fields": [], "parameters": {}, "status": "needs_clarification", "workflow": null}, "id": "eval-0043", "input": "Make the nightly thing happen."} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_scheduler_job"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"environment": "dev", "job_name": "reporting-nightly-job", "schedule": "0 2 * * *", "target": "model-refresh", "team": "reporting", "timezone": "America/Los_Angeles"}, "status": "routed", "workflow": "create_scheduler_job"}, "id": "eval-0044", "input": "cron 0 2 * * * target model-refresh env dev timezone America/Los_Angeles"} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_web_app"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"app_name": "claims-web-app", "diagnostics_enabled": true, "environment": "staging", "region": "us-central1", "runtime": "python311", "team": "claims"}, "status": "routed", "workflow": "create_web_app"}, "id": "eval-0045", "input": "ticket: claims staging api, runtime Python, region US Central, diagnostics on"} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_scheduler_job"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"environment": "staging", "job_name": "finance-nightly-job", "schedule": "0 2 * * *", "target": "claims-sync", "team": "finance", "timezone": "America/New_York"}, "status": "routed", "workflow": "create_scheduler_job"}, "id": "eval-0046", "input": "Create a nightly scheduler job named finance-nightly-job for claims-sync in staging."} {"case_type": "risky_rejected", "expected": {"candidate_workflows": [{"confidence": 0.88, "workflow": "grant_iam_role"}], "clarifying_question": null, "confidence": 0.88, "failure_reasons": ["Blocked owner/admin IAM grant."], "missing_fields": [], "parameters": {"environment": "prod", "principal": "john", "role": "admin", "scope": "production"}, "status": "rejected", "workflow": "grant_iam_role"}, "id": "eval-0047", "input": "iam urgent: John needs admin on prod."} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_service_account"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"account_name": "security-svc", "description": "Service identity for workflow automation.", "environment": "prod", "team": "security"}, "status": "routed", "workflow": "create_service_account"}, "id": "eval-0048", "input": "identity request: security service account, env prod, name security-svc"} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "grant_iam_role"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"environment": "dev", "principal": "analyst", "role": "viewer", "scope": "reporting-project"}, "status": "routed", "workflow": "grant_iam_role"}, "id": "eval-0049", "input": "iam: principal=analyst role=viewer scope=reporting-project env=dev"} {"case_type": "ambiguous", "expected": {"candidate_workflows": [{"confidence": 0.38, "workflow": "create_service_account"}, {"confidence": 0.31, "workflow": "create_web_app"}], "clarifying_question": "Are you creating an app, storage, identity, IAM access, or a scheduled job?", "confidence": 0.34, "failure_reasons": ["Request is ambiguous across multiple workflows."], "missing_fields": [], "parameters": {}, "status": "needs_clarification", "workflow": null}, "id": "eval-0050", "input": "Set up reporting."} {"case_type": "missing_fields", "expected": {"candidate_workflows": [{"confidence": 0.74, "workflow": "create_web_app"}], "clarifying_question": "What app name should RouterCore use?", "confidence": 0.74, "failure_reasons": ["Missing required fields: app_name, region, environment"], "missing_fields": ["app_name", "region", "environment"], "parameters": {"runtime": "python311", "team": "finance"}, "status": "needs_clarification", "workflow": "create_web_app"}, "id": "eval-0051", "input": "web app request: finance, python"} {"case_type": "missing_fields", "expected": {"candidate_workflows": [{"confidence": 0.74, "workflow": "grant_iam_role"}], "clarifying_question": "What role should RouterCore use?", "confidence": 0.74, "failure_reasons": ["Missing required fields: role, scope"], "missing_fields": ["role", "scope"], "parameters": {"principal": "john"}, "status": "needs_clarification", "workflow": "grant_iam_role"}, "id": "eval-0052", "input": "Grant john access."} {"case_type": "missing_fields", "expected": {"candidate_workflows": [{"confidence": 0.74, "workflow": "create_storage_bucket"}], "clarifying_question": "What bucket name should RouterCore use?", "confidence": 0.74, "failure_reasons": ["Missing required fields: bucket_name, region, environment"], "missing_fields": ["bucket_name", "region", "environment"], "parameters": {"team": "mlops"}, "status": "needs_clarification", "workflow": "create_storage_bucket"}, "id": "eval-0053", "input": "blob storage request: mlops"} {"case_type": "missing_fields", "expected": {"candidate_workflows": [{"confidence": 0.74, "workflow": "create_scheduler_job"}], "clarifying_question": "What job name should RouterCore use?", "confidence": 0.74, "failure_reasons": ["Missing required fields: job_name, schedule, environment"], "missing_fields": ["job_name", "schedule", "environment"], "parameters": {"target": "reporting"}, "status": "needs_clarification", "workflow": "create_scheduler_job"}, "id": "eval-0054", "input": "daily reporting job, details later"} {"case_type": "ambiguous", "expected": {"candidate_workflows": [{"confidence": 0.38, "workflow": "grant_iam_role"}, {"confidence": 0.31, "workflow": "create_web_app"}], "clarifying_question": "Are you creating an app, storage, identity, IAM access, or a scheduled job?", "confidence": 0.34, "failure_reasons": ["Request is ambiguous across multiple workflows."], "missing_fields": [], "parameters": {}, "status": "needs_clarification", "workflow": null}, "id": "eval-0055", "input": "Need cloud stuff for finance."} {"case_type": "missing_fields", "expected": {"candidate_workflows": [{"confidence": 0.74, "workflow": "create_web_app"}], "clarifying_question": "What app name should RouterCore use?", "confidence": 0.74, "failure_reasons": ["Missing required fields: app_name, region, environment"], "missing_fields": ["app_name", "region", "environment"], "parameters": {"runtime": "python311", "team": "claims"}, "status": "needs_clarification", "workflow": "create_web_app"}, "id": "eval-0056", "input": "web app request: claims, python"} {"case_type": "missing_fields", "expected": {"candidate_workflows": [{"confidence": 0.74, "workflow": "create_scheduler_job"}], "clarifying_question": "What job name should RouterCore use?", "confidence": 0.74, "failure_reasons": ["Missing required fields: job_name, schedule, environment"], "missing_fields": ["job_name", "schedule", "environment"], "parameters": {"target": "reporting"}, "status": "needs_clarification", "workflow": "create_scheduler_job"}, "id": "eval-0057", "input": "Set up a reporting schedule."} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_service_account"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"account_name": "growth-svc", "description": "Service identity for workflow automation.", "environment": "staging", "team": "growth"}, "status": "routed", "workflow": "create_service_account"}, "id": "eval-0058", "input": "identity request: growth service account, env staging, name growth-svc"} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_scheduler_job"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"environment": "staging", "job_name": "reporting-nightly-job", "schedule": "0 2 * * *", "target": "billing-export", "team": "reporting", "timezone": "UTC"}, "status": "routed", "workflow": "create_scheduler_job"}, "id": "eval-0059", "input": "cron 0 2 * * * target billing-export env staging timezone UTC"} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_service_account"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"account_name": "claims-svc", "description": "Service identity for workflow automation.", "environment": "staging", "team": "claims"}, "status": "routed", "workflow": "create_service_account"}, "id": "eval-0060", "input": "identity request: claims service account, env staging, name claims-svc"} {"case_type": "confirmation_required", "expected": {"candidate_workflows": [{"confidence": 0.82, "workflow": "grant_iam_role"}], "clarifying_question": null, "confidence": 0.82, "failure_reasons": ["High-risk IAM change requires confirmation."], "missing_fields": [], "parameters": {"environment": "staging", "principal": "jane", "role": "contributor", "scope": "all reporting resources"}, "status": "requires_confirmation", "workflow": "grant_iam_role"}, "id": "eval-0061", "input": "iam: jane role contributor scope all reporting resources"} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_storage_bucket"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"bucket_name": "platform-bucket", "environment": "prod", "region": "westus", "storage_class": "cool", "team": "platform"}, "status": "routed", "workflow": "create_storage_bucket"}, "id": "eval-0062", "input": "infra: bucket for platform, env prod, region westus, class cool"} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_storage_bucket"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"bucket_name": "platform-bucket", "environment": "prod", "region": "us-central1", "storage_class": "cool", "team": "platform"}, "status": "routed", "workflow": "create_storage_bucket"}, "id": "eval-0063", "input": "Set up blob storage for the platform team in US Central for production."} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_scheduler_job"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"environment": "staging", "job_name": "platform-nightly-job", "schedule": "0 2 * * *", "target": "claims-sync", "team": "platform", "timezone": "America/New_York"}, "status": "routed", "workflow": "create_scheduler_job"}, "id": "eval-0064", "input": "cron 0 2 * * * target claims-sync env staging timezone America/New_York"} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_web_app"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"app_name": "finance-web-app", "diagnostics_enabled": true, "environment": "dev", "region": "us-central1", "runtime": "python311", "team": "finance"}, "status": "routed", "workflow": "create_web_app"}, "id": "eval-0065", "input": "Create a development Python web app for the finance team in US Central."} {"case_type": "risky_rejected", "expected": {"candidate_workflows": [], "clarifying_question": null, "confidence": 0.88, "failure_reasons": ["Unsafe destructive production request."], "missing_fields": [], "parameters": {}, "status": "rejected", "workflow": null}, "id": "eval-0066", "input": "Delete all production services and disable monitoring."} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_storage_bucket"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"bucket_name": "growth-bucket", "environment": "dev", "region": "us-central1", "storage_class": "archive", "team": "growth"}, "status": "routed", "workflow": "create_storage_bucket"}, "id": "eval-0067", "input": "Create a archive storage bucket named growth-bucket in US Central for development."} {"case_type": "missing_fields", "expected": {"candidate_workflows": [{"confidence": 0.74, "workflow": "create_scheduler_job"}], "clarifying_question": "What job name should RouterCore use?", "confidence": 0.74, "failure_reasons": ["Missing required fields: job_name, schedule, environment"], "missing_fields": ["job_name", "schedule", "environment"], "parameters": {"target": "reporting"}, "status": "needs_clarification", "workflow": "create_scheduler_job"}, "id": "eval-0068", "input": "Set up a reporting schedule."} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_web_app"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"app_name": "security-web-app", "diagnostics_enabled": true, "environment": "prod", "region": "westus", "runtime": "python311", "team": "security"}, "status": "routed", "workflow": "create_web_app"}, "id": "eval-0069", "input": "ticket: security production api, runtime Python, region West US, diagnostics on"} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_scheduler_job"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"environment": "dev", "job_name": "finance-nightly-job", "schedule": "0 2 * * *", "target": "reporting", "team": "finance", "timezone": "America/Los_Angeles"}, "status": "routed", "workflow": "create_scheduler_job"}, "id": "eval-0070", "input": "Set up a daily job for reporting for the finance team in development."} {"case_type": "missing_fields", "expected": {"candidate_workflows": [{"confidence": 0.74, "workflow": "create_service_account"}], "clarifying_question": "What account name should RouterCore use?", "confidence": 0.74, "failure_reasons": ["Missing required fields: account_name, environment"], "missing_fields": ["account_name", "environment"], "parameters": {"team": "platform"}, "status": "needs_clarification", "workflow": "create_service_account"}, "id": "eval-0071", "input": "Create a service account for the platform team."} {"case_type": "ambiguous", "expected": {"candidate_workflows": [{"confidence": 0.38, "workflow": "create_web_app"}, {"confidence": 0.31, "workflow": "grant_iam_role"}], "clarifying_question": "Are you creating an app, storage, identity, IAM access, or a scheduled job?", "confidence": 0.34, "failure_reasons": ["Request is ambiguous across multiple workflows."], "missing_fields": [], "parameters": {}, "status": "needs_clarification", "workflow": null}, "id": "eval-0072", "input": "Prep access and automation for the new project."} {"case_type": "success", "expected": {"candidate_workflows": [{"confidence": 0.92, "workflow": "create_service_account"}], "clarifying_question": null, "confidence": 0.92, "failure_reasons": [], "missing_fields": [], "parameters": {"account_name": "reporting-svc", "description": "Service identity for workflow automation.", "environment": "prod", "team": "reporting"}, "status": "routed", "workflow": "create_service_account"}, "id": "eval-0073", "input": "Create a service account named reporting-svc for the reporting team in production."} {"case_type": "missing_fields", "expected": {"candidate_workflows": [{"confidence": 0.74, "workflow": "create_scheduler_job"}], "clarifying_question": "What job name should RouterCore use?", "confidence": 0.74, "failure_reasons": ["Missing required fields: job_name, schedule, environment"], "missing_fields": ["job_name", "schedule", "environment"], "parameters": {"target": "reporting"}, "status": "needs_clarification", "workflow": "create_scheduler_job"}, "id": "eval-0074", "input": "scheduler request for reporting"} {"case_type": "ambiguous", "expected": {"candidate_workflows": [{"confidence": 0.38, "workflow": "create_scheduler_job"}, {"confidence": 0.31, "workflow": "grant_iam_role"}], "clarifying_question": "Are you creating an app, storage, identity, IAM access, or a scheduled job?", "confidence": 0.34, "failure_reasons": ["Request is ambiguous across multiple workflows."], "missing_fields": [], "parameters": {}, "status": "needs_clarification", "workflow": null}, "id": "eval-0075", "input": "Need cloud stuff for finance."}