routercore / eval /results /fakerouter_eval.json
Jayteare's picture
Deploy RouterCore Gradio demo
1137e50 verified
{
"summary_metrics": {
"json_validity_rate": 1.0,
"workflow_accuracy": 0.9701492537313433,
"status_accuracy": 0.5733333333333334,
"required_field_presence_accuracy": 0.2857142857142857,
"unsafe_rejection_accuracy": 1.0,
"false_route_rate": 0.0
},
"per_example_results": [
{
"id": "eval-0001",
"case_type": "success",
"input": "cron 0 2 * * * target claims-sync env dev timezone America/New_York",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "create_scheduler_job"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"environment": "dev",
"job_name": "finance-nightly-job",
"schedule": "0 2 * * *",
"target": "claims-sync",
"team": "finance",
"timezone": "America/New_York"
},
"status": "routed",
"workflow": "create_scheduler_job"
},
"actual_router_output": {
"status": "requires_confirmation",
"workflow": "create_scheduler_job",
"confidence": 0.69,
"parameters": {
"job_name": "claims-sync-scheduled-job",
"schedule": "0 2 * * *",
"target": "claims-sync",
"environment": "dev"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_scheduler_job",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": "Please confirm the selected workflow and parameters."
},
"validation_result": {
"valid": true,
"workflow": "create_scheduler_job",
"missing_fields": [],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [],
"clarifying_question": null
},
"policy_decision": {
"status": "requires_confirmation",
"workflow": "create_scheduler_job",
"confidence": 0.69,
"accepted": false,
"requires_confirmation": true,
"execution_allowed": false,
"reasons": [
"Router confidence is between 0.55 and 0.80."
],
"clarifying_question": null
},
"actual": {
"status": "requires_confirmation",
"workflow": "create_scheduler_job",
"confidence": 0.69,
"parameters": {
"job_name": "claims-sync-scheduled-job",
"schedule": "0 2 * * *",
"target": "claims-sync",
"environment": "dev"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_scheduler_job",
"confidence": 0.69
}
],
"failure_reasons": [
"Router confidence is between 0.55 and 0.80."
],
"clarifying_question": null
},
"pass_fail_notes": [
"status mismatch: expected routed, got requires_confirmation",
"missing expected parameter keys: team, timezone"
]
},
{
"id": "eval-0002",
"case_type": "success",
"input": "ticket: mlops production api, runtime .NET, region Central US, diagnostics on",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "create_web_app"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"app_name": "mlops-web-app",
"diagnostics_enabled": true,
"environment": "prod",
"region": "centralus",
"runtime": "dotnet8",
"team": "mlops"
},
"status": "routed",
"workflow": "create_web_app"
},
"actual_router_output": {
"status": "requires_confirmation",
"workflow": "create_web_app",
"confidence": 0.69,
"parameters": {
"region": "centralus",
"runtime": "dotnet8",
"environment": "prod",
"diagnostics_enabled": true
},
"missing_fields": [
"app_name"
],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": "What app name should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "create_web_app",
"missing_fields": [
"app_name"
],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Missing required fields: app_name"
],
"clarifying_question": "What app name should RouterCore use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": "create_web_app",
"confidence": 0.69,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Missing required fields: app_name"
],
"clarifying_question": "What app name should RouterCore use?"
},
"actual": {
"status": "needs_clarification",
"workflow": "create_web_app",
"confidence": 0.69,
"parameters": {
"region": "centralus",
"runtime": "dotnet8",
"environment": "prod",
"diagnostics_enabled": true
},
"missing_fields": [
"app_name"
],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.69
}
],
"failure_reasons": [
"Missing required fields: app_name"
],
"clarifying_question": "What app name should RouterCore use?"
},
"pass_fail_notes": [
"status mismatch: expected routed, got needs_clarification",
"missing expected parameter keys: app_name, team"
]
},
{
"id": "eval-0003",
"case_type": "success",
"input": "Create a nightly scheduler job named reporting-nightly-job for claims-sync in production.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "create_scheduler_job"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"environment": "prod",
"job_name": "reporting-nightly-job",
"schedule": "0 9 * * *",
"target": "claims-sync",
"team": "reporting",
"timezone": "America/Los_Angeles"
},
"status": "routed",
"workflow": "create_scheduler_job"
},
"actual_router_output": {
"status": "routed",
"workflow": "create_scheduler_job",
"confidence": 0.95,
"parameters": {
"job_name": "reporting-nightly-job",
"schedule": "0 2 * * *",
"target": "claims-sync",
"environment": "prod"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_scheduler_job",
"confidence": 0.95
}
],
"failure_reasons": [],
"clarifying_question": null
},
"validation_result": {
"valid": true,
"workflow": "create_scheduler_job",
"missing_fields": [],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [],
"clarifying_question": null
},
"policy_decision": {
"status": "routed",
"workflow": "create_scheduler_job",
"confidence": 0.95,
"accepted": true,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Route accepted for execution preview only."
],
"clarifying_question": null
},
"actual": {
"status": "routed",
"workflow": "create_scheduler_job",
"confidence": 0.95,
"parameters": {
"job_name": "reporting-nightly-job",
"schedule": "0 2 * * *",
"target": "claims-sync",
"environment": "prod"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_scheduler_job",
"confidence": 0.95
}
],
"failure_reasons": [
"Route accepted for execution preview only."
],
"clarifying_question": null
},
"pass_fail_notes": [
"missing expected parameter keys: team, timezone"
]
},
{
"id": "eval-0004",
"case_type": "success",
"input": "identity request: growth service account, env staging, name growth-svc",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "create_service_account"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"account_name": "growth-svc",
"description": "Service identity for workflow automation.",
"environment": "staging",
"team": "growth"
},
"status": "routed",
"workflow": "create_service_account"
},
"actual_router_output": {
"status": "routed",
"workflow": "create_service_account",
"confidence": 0.93,
"parameters": {
"environment": "staging",
"description": "Generated from RouterCore request preview."
},
"missing_fields": [
"account_name",
"team"
],
"candidate_workflows": [
{
"workflow": "create_service_account",
"confidence": 0.93
}
],
"failure_reasons": [],
"clarifying_question": "What account name should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "create_service_account",
"missing_fields": [
"account_name",
"team"
],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Missing required fields: account_name, team"
],
"clarifying_question": "What account name should RouterCore use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": "create_service_account",
"confidence": 0.93,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Missing required fields: account_name, team"
],
"clarifying_question": "What account name should RouterCore use?"
},
"actual": {
"status": "needs_clarification",
"workflow": "create_service_account",
"confidence": 0.93,
"parameters": {
"environment": "staging",
"description": "Generated from RouterCore request preview."
},
"missing_fields": [
"account_name",
"team"
],
"candidate_workflows": [
{
"workflow": "create_service_account",
"confidence": 0.93
}
],
"failure_reasons": [
"Missing required fields: account_name, team"
],
"clarifying_question": "What account name should RouterCore use?"
},
"pass_fail_notes": [
"status mismatch: expected routed, got needs_clarification",
"missing expected parameter keys: account_name, team"
]
},
{
"id": "eval-0005",
"case_type": "missing_fields",
"input": "daily reporting job, details later",
"expected": {
"candidate_workflows": [
{
"confidence": 0.74,
"workflow": "create_scheduler_job"
}
],
"clarifying_question": "What job name should RouterCore use?",
"confidence": 0.74,
"failure_reasons": [
"Missing required fields: job_name, schedule, environment"
],
"missing_fields": [
"job_name",
"schedule",
"environment"
],
"parameters": {
"target": "reporting"
},
"status": "needs_clarification",
"workflow": "create_scheduler_job"
},
"actual_router_output": {
"status": "requires_confirmation",
"workflow": "create_scheduler_job",
"confidence": 0.69,
"parameters": {
"schedule": "0 9 * * *"
},
"missing_fields": [
"job_name",
"target",
"environment"
],
"candidate_workflows": [
{
"workflow": "create_scheduler_job",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": "What job name should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "create_scheduler_job",
"missing_fields": [
"job_name",
"target",
"environment"
],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Missing required fields: job_name, target, environment"
],
"clarifying_question": "What job name should RouterCore use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": "create_scheduler_job",
"confidence": 0.69,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Missing required fields: job_name, target, environment"
],
"clarifying_question": "What job name should RouterCore use?"
},
"actual": {
"status": "needs_clarification",
"workflow": "create_scheduler_job",
"confidence": 0.69,
"parameters": {
"schedule": "0 9 * * *"
},
"missing_fields": [
"job_name",
"target",
"environment"
],
"candidate_workflows": [
{
"workflow": "create_scheduler_job",
"confidence": 0.69
}
],
"failure_reasons": [
"Missing required fields: job_name, target, environment"
],
"clarifying_question": "What job name should RouterCore use?"
},
"pass_fail_notes": [
"pass"
]
},
{
"id": "eval-0006",
"case_type": "success",
"input": "Create a cool storage bucket named platform-bucket in West US for development.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "create_storage_bucket"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"bucket_name": "platform-bucket",
"environment": "dev",
"region": "westus",
"storage_class": "cool",
"team": "platform"
},
"status": "routed",
"workflow": "create_storage_bucket"
},
"actual_router_output": {
"status": "routed",
"workflow": "create_storage_bucket",
"confidence": 0.93,
"parameters": {
"bucket_name": "platform-bucket",
"region": "westus",
"environment": "dev",
"storage_class": "cool"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_storage_bucket",
"confidence": 0.93
}
],
"failure_reasons": [],
"clarifying_question": null
},
"validation_result": {
"valid": true,
"workflow": "create_storage_bucket",
"missing_fields": [],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [],
"clarifying_question": null
},
"policy_decision": {
"status": "routed",
"workflow": "create_storage_bucket",
"confidence": 0.93,
"accepted": true,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Route accepted for execution preview only."
],
"clarifying_question": null
},
"actual": {
"status": "routed",
"workflow": "create_storage_bucket",
"confidence": 0.93,
"parameters": {
"bucket_name": "platform-bucket",
"region": "westus",
"environment": "dev",
"storage_class": "cool"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_storage_bucket",
"confidence": 0.93
}
],
"failure_reasons": [
"Route accepted for execution preview only."
],
"clarifying_question": null
},
"pass_fail_notes": [
"missing expected parameter keys: team"
]
},
{
"id": "eval-0007",
"case_type": "success",
"input": "Grant reporting-user reader access to staging-bucket in development.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "grant_iam_role"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"environment": "dev",
"principal": "reporting-user",
"role": "reader",
"scope": "staging-bucket"
},
"status": "routed",
"workflow": "grant_iam_role"
},
"actual_router_output": {
"status": "routed",
"workflow": "grant_iam_role",
"confidence": 0.93,
"parameters": {
"principal": "reporting-user",
"role": "reader",
"scope": "staging-bucket",
"environment": "staging"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.93
},
{
"workflow": "create_storage_bucket",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": null
},
"validation_result": {
"valid": true,
"workflow": "grant_iam_role",
"missing_fields": [],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [],
"clarifying_question": null
},
"policy_decision": {
"status": "requires_confirmation",
"workflow": "grant_iam_role",
"confidence": 0.93,
"accepted": false,
"requires_confirmation": true,
"execution_allowed": false,
"reasons": [
"Workflow is high risk and requires human confirmation."
],
"clarifying_question": null
},
"actual": {
"status": "requires_confirmation",
"workflow": "grant_iam_role",
"confidence": 0.93,
"parameters": {
"principal": "reporting-user",
"role": "reader",
"scope": "staging-bucket",
"environment": "staging"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.93
},
{
"workflow": "create_storage_bucket",
"confidence": 0.69
}
],
"failure_reasons": [
"Workflow is high risk and requires human confirmation."
],
"clarifying_question": null
},
"pass_fail_notes": [
"status mismatch: expected routed, got requires_confirmation"
]
},
{
"id": "eval-0008",
"case_type": "missing_fields",
"input": "bucket needed for reporting, no location picked yet",
"expected": {
"candidate_workflows": [
{
"confidence": 0.74,
"workflow": "create_storage_bucket"
}
],
"clarifying_question": "What bucket name should RouterCore use?",
"confidence": 0.74,
"failure_reasons": [
"Missing required fields: bucket_name, region, environment"
],
"missing_fields": [
"bucket_name",
"region",
"environment"
],
"parameters": {
"team": "reporting"
},
"status": "needs_clarification",
"workflow": "create_storage_bucket"
},
"actual_router_output": {
"status": "requires_confirmation",
"workflow": "create_storage_bucket",
"confidence": 0.69,
"parameters": {},
"missing_fields": [
"bucket_name",
"region",
"environment"
],
"candidate_workflows": [
{
"workflow": "create_storage_bucket",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": "What bucket name should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "create_storage_bucket",
"missing_fields": [
"bucket_name",
"region",
"environment"
],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Missing required fields: bucket_name, region, environment"
],
"clarifying_question": "What bucket name should RouterCore use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": "create_storage_bucket",
"confidence": 0.69,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Missing required fields: bucket_name, region, environment"
],
"clarifying_question": "What bucket name should RouterCore use?"
},
"actual": {
"status": "needs_clarification",
"workflow": "create_storage_bucket",
"confidence": 0.69,
"parameters": {},
"missing_fields": [
"bucket_name",
"region",
"environment"
],
"candidate_workflows": [
{
"workflow": "create_storage_bucket",
"confidence": 0.69
}
],
"failure_reasons": [
"Missing required fields: bucket_name, region, environment"
],
"clarifying_question": "What bucket name should RouterCore use?"
},
"pass_fail_notes": [
"pass"
]
},
{
"id": "eval-0009",
"case_type": "success",
"input": "cron 0 9 * * * target model-refresh env dev timezone UTC",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "create_scheduler_job"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"environment": "dev",
"job_name": "finance-nightly-job",
"schedule": "0 9 * * *",
"target": "model-refresh",
"team": "finance",
"timezone": "UTC"
},
"status": "routed",
"workflow": "create_scheduler_job"
},
"actual_router_output": {
"status": "requires_confirmation",
"workflow": "create_scheduler_job",
"confidence": 0.69,
"parameters": {
"job_name": "model-refresh-scheduled-job",
"schedule": "0 9 * * *",
"target": "model-refresh",
"environment": "dev",
"timezone": "UTC"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_scheduler_job",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": "Please confirm the selected workflow and parameters."
},
"validation_result": {
"valid": true,
"workflow": "create_scheduler_job",
"missing_fields": [],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [],
"clarifying_question": null
},
"policy_decision": {
"status": "requires_confirmation",
"workflow": "create_scheduler_job",
"confidence": 0.69,
"accepted": false,
"requires_confirmation": true,
"execution_allowed": false,
"reasons": [
"Router confidence is between 0.55 and 0.80."
],
"clarifying_question": null
},
"actual": {
"status": "requires_confirmation",
"workflow": "create_scheduler_job",
"confidence": 0.69,
"parameters": {
"job_name": "model-refresh-scheduled-job",
"schedule": "0 9 * * *",
"target": "model-refresh",
"environment": "dev",
"timezone": "UTC"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_scheduler_job",
"confidence": 0.69
}
],
"failure_reasons": [
"Router confidence is between 0.55 and 0.80."
],
"clarifying_question": null
},
"pass_fail_notes": [
"status mismatch: expected routed, got requires_confirmation",
"missing expected parameter keys: team"
]
},
{
"id": "eval-0010",
"case_type": "success",
"input": "ticket: mlops staging api, runtime Python, region Central US, diagnostics on",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "create_web_app"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"app_name": "mlops-web-app",
"diagnostics_enabled": true,
"environment": "staging",
"region": "centralus",
"runtime": "python311",
"team": "mlops"
},
"status": "routed",
"workflow": "create_web_app"
},
"actual_router_output": {
"status": "requires_confirmation",
"workflow": "create_web_app",
"confidence": 0.69,
"parameters": {
"region": "centralus",
"runtime": "python311",
"environment": "staging",
"diagnostics_enabled": true
},
"missing_fields": [
"app_name"
],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": "What app name should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "create_web_app",
"missing_fields": [
"app_name"
],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Missing required fields: app_name"
],
"clarifying_question": "What app name should RouterCore use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": "create_web_app",
"confidence": 0.69,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Missing required fields: app_name"
],
"clarifying_question": "What app name should RouterCore use?"
},
"actual": {
"status": "needs_clarification",
"workflow": "create_web_app",
"confidence": 0.69,
"parameters": {
"region": "centralus",
"runtime": "python311",
"environment": "staging",
"diagnostics_enabled": true
},
"missing_fields": [
"app_name"
],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.69
}
],
"failure_reasons": [
"Missing required fields: app_name"
],
"clarifying_question": "What app name should RouterCore use?"
},
"pass_fail_notes": [
"status mismatch: expected routed, got needs_clarification",
"missing expected parameter keys: app_name, team"
]
},
{
"id": "eval-0011",
"case_type": "success",
"input": "infra: bucket for claims, env prod, region centralus, class archive",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "create_storage_bucket"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"bucket_name": "claims-bucket",
"environment": "prod",
"region": "centralus",
"storage_class": "archive",
"team": "claims"
},
"status": "routed",
"workflow": "create_storage_bucket"
},
"actual_router_output": {
"status": "requires_confirmation",
"workflow": "create_storage_bucket",
"confidence": 0.69,
"parameters": {
"region": "centralus",
"environment": "prod",
"storage_class": "archive"
},
"missing_fields": [
"bucket_name"
],
"candidate_workflows": [
{
"workflow": "create_storage_bucket",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": "What bucket name should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "create_storage_bucket",
"missing_fields": [
"bucket_name"
],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Missing required fields: bucket_name"
],
"clarifying_question": "What bucket name should RouterCore use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": "create_storage_bucket",
"confidence": 0.69,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Missing required fields: bucket_name"
],
"clarifying_question": "What bucket name should RouterCore use?"
},
"actual": {
"status": "needs_clarification",
"workflow": "create_storage_bucket",
"confidence": 0.69,
"parameters": {
"region": "centralus",
"environment": "prod",
"storage_class": "archive"
},
"missing_fields": [
"bucket_name"
],
"candidate_workflows": [
{
"workflow": "create_storage_bucket",
"confidence": 0.69
}
],
"failure_reasons": [
"Missing required fields: bucket_name"
],
"clarifying_question": "What bucket name should RouterCore use?"
},
"pass_fail_notes": [
"status mismatch: expected routed, got needs_clarification",
"missing expected parameter keys: bucket_name, team"
]
},
{
"id": "eval-0012",
"case_type": "success",
"input": "Create a archive storage bucket named finance-bucket in East US for staging.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "create_storage_bucket"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"bucket_name": "finance-bucket",
"environment": "staging",
"region": "eastus",
"storage_class": "archive",
"team": "finance"
},
"status": "routed",
"workflow": "create_storage_bucket"
},
"actual_router_output": {
"status": "routed",
"workflow": "create_storage_bucket",
"confidence": 0.93,
"parameters": {
"bucket_name": "finance-bucket",
"region": "eastus",
"environment": "staging",
"storage_class": "archive"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_storage_bucket",
"confidence": 0.93
}
],
"failure_reasons": [],
"clarifying_question": null
},
"validation_result": {
"valid": true,
"workflow": "create_storage_bucket",
"missing_fields": [],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [],
"clarifying_question": null
},
"policy_decision": {
"status": "routed",
"workflow": "create_storage_bucket",
"confidence": 0.93,
"accepted": true,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Route accepted for execution preview only."
],
"clarifying_question": null
},
"actual": {
"status": "routed",
"workflow": "create_storage_bucket",
"confidence": 0.93,
"parameters": {
"bucket_name": "finance-bucket",
"region": "eastus",
"environment": "staging",
"storage_class": "archive"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_storage_bucket",
"confidence": 0.93
}
],
"failure_reasons": [
"Route accepted for execution preview only."
],
"clarifying_question": null
},
"pass_fail_notes": [
"missing expected parameter keys: team"
]
},
{
"id": "eval-0013",
"case_type": "success",
"input": "Give analyst the contributor role on reporting-project.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "grant_iam_role"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"environment": "dev",
"principal": "analyst",
"role": "contributor",
"scope": "reporting-project"
},
"status": "routed",
"workflow": "grant_iam_role"
},
"actual_router_output": {
"status": "requires_confirmation",
"workflow": "grant_iam_role",
"confidence": 0.69,
"parameters": {
"role": "contributor",
"scope": "reporting-project."
},
"missing_fields": [
"principal"
],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": "What principal should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "grant_iam_role",
"missing_fields": [
"principal"
],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Missing required fields: principal"
],
"clarifying_question": "What principal should RouterCore use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": "grant_iam_role",
"confidence": 0.69,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Missing required fields: principal"
],
"clarifying_question": "What principal should RouterCore use?"
},
"actual": {
"status": "needs_clarification",
"workflow": "grant_iam_role",
"confidence": 0.69,
"parameters": {
"role": "contributor",
"scope": "reporting-project."
},
"missing_fields": [
"principal"
],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.69
}
],
"failure_reasons": [
"Missing required fields: principal"
],
"clarifying_question": "What principal should RouterCore use?"
},
"pass_fail_notes": [
"status mismatch: expected routed, got needs_clarification",
"missing expected parameter keys: environment, principal"
]
},
{
"id": "eval-0014",
"case_type": "missing_fields",
"input": "daily reporting job, details later",
"expected": {
"candidate_workflows": [
{
"confidence": 0.74,
"workflow": "create_scheduler_job"
}
],
"clarifying_question": "What job name should RouterCore use?",
"confidence": 0.74,
"failure_reasons": [
"Missing required fields: job_name, schedule, environment"
],
"missing_fields": [
"job_name",
"schedule",
"environment"
],
"parameters": {
"target": "reporting"
},
"status": "needs_clarification",
"workflow": "create_scheduler_job"
},
"actual_router_output": {
"status": "requires_confirmation",
"workflow": "create_scheduler_job",
"confidence": 0.69,
"parameters": {
"schedule": "0 9 * * *"
},
"missing_fields": [
"job_name",
"target",
"environment"
],
"candidate_workflows": [
{
"workflow": "create_scheduler_job",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": "What job name should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "create_scheduler_job",
"missing_fields": [
"job_name",
"target",
"environment"
],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Missing required fields: job_name, target, environment"
],
"clarifying_question": "What job name should RouterCore use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": "create_scheduler_job",
"confidence": 0.69,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Missing required fields: job_name, target, environment"
],
"clarifying_question": "What job name should RouterCore use?"
},
"actual": {
"status": "needs_clarification",
"workflow": "create_scheduler_job",
"confidence": 0.69,
"parameters": {
"schedule": "0 9 * * *"
},
"missing_fields": [
"job_name",
"target",
"environment"
],
"candidate_workflows": [
{
"workflow": "create_scheduler_job",
"confidence": 0.69
}
],
"failure_reasons": [
"Missing required fields: job_name, target, environment"
],
"clarifying_question": "What job name should RouterCore use?"
},
"pass_fail_notes": [
"pass"
]
},
{
"id": "eval-0015",
"case_type": "success",
"input": "Create a staging Python web app for the platform team in Central US.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "create_web_app"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"app_name": "platform-web-app",
"diagnostics_enabled": true,
"environment": "staging",
"region": "centralus",
"runtime": "python311",
"team": "platform"
},
"status": "routed",
"workflow": "create_web_app"
},
"actual_router_output": {
"status": "routed",
"workflow": "create_web_app",
"confidence": 0.93,
"parameters": {
"app_name": "platform-web-app",
"region": "centralus",
"runtime": "python311",
"environment": "staging",
"team": "platform",
"diagnostics_enabled": false
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.93
}
],
"failure_reasons": [],
"clarifying_question": null
},
"validation_result": {
"valid": true,
"workflow": "create_web_app",
"missing_fields": [],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [],
"clarifying_question": null
},
"policy_decision": {
"status": "routed",
"workflow": "create_web_app",
"confidence": 0.93,
"accepted": true,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Route accepted for execution preview only."
],
"clarifying_question": null
},
"actual": {
"status": "routed",
"workflow": "create_web_app",
"confidence": 0.93,
"parameters": {
"app_name": "platform-web-app",
"region": "centralus",
"runtime": "python311",
"environment": "staging",
"team": "platform",
"diagnostics_enabled": false
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.93
}
],
"failure_reasons": [
"Route accepted for execution preview only."
],
"clarifying_question": null
},
"pass_fail_notes": [
"pass"
]
},
{
"id": "eval-0016",
"case_type": "success",
"input": "Set up blob storage for the mlops team in US Central for development.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "create_storage_bucket"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"bucket_name": "mlops-bucket",
"environment": "dev",
"region": "us-central1",
"storage_class": "cool",
"team": "mlops"
},
"status": "routed",
"workflow": "create_storage_bucket"
},
"actual_router_output": {
"status": "routed",
"workflow": "create_storage_bucket",
"confidence": 0.93,
"parameters": {
"bucket_name": "mlops-bucket",
"region": "us-central1",
"environment": "dev",
"team": "mlops"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_storage_bucket",
"confidence": 0.93
}
],
"failure_reasons": [],
"clarifying_question": null
},
"validation_result": {
"valid": true,
"workflow": "create_storage_bucket",
"missing_fields": [],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [],
"clarifying_question": null
},
"policy_decision": {
"status": "routed",
"workflow": "create_storage_bucket",
"confidence": 0.93,
"accepted": true,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Route accepted for execution preview only."
],
"clarifying_question": null
},
"actual": {
"status": "routed",
"workflow": "create_storage_bucket",
"confidence": 0.93,
"parameters": {
"bucket_name": "mlops-bucket",
"region": "us-central1",
"environment": "dev",
"team": "mlops"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_storage_bucket",
"confidence": 0.93
}
],
"failure_reasons": [
"Route accepted for execution preview only."
],
"clarifying_question": null
},
"pass_fail_notes": [
"missing expected parameter keys: storage_class"
]
},
{
"id": "eval-0017",
"case_type": "risky_rejected",
"input": "Give John the admin role across all production.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.88,
"workflow": "grant_iam_role"
}
],
"clarifying_question": null,
"confidence": 0.88,
"failure_reasons": [
"Blocked owner/admin IAM grant."
],
"missing_fields": [],
"parameters": {
"environment": "prod",
"principal": "john",
"role": "admin",
"scope": "production"
},
"status": "rejected",
"workflow": "grant_iam_role"
},
"actual_router_output": {
"status": "requires_confirmation",
"workflow": "grant_iam_role",
"confidence": 0.69,
"parameters": {
"role": "admin",
"scope": "production",
"environment": "prod"
},
"missing_fields": [
"principal"
],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": "What principal should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "grant_iam_role",
"missing_fields": [
"principal"
],
"invalid_fields": {},
"blocked_fields": {
"role": "admin"
},
"failure_reasons": [
"Missing required fields: principal",
"One or more fields contains a blocked value."
],
"clarifying_question": "What principal should RouterCore use?"
},
"policy_decision": {
"status": "rejected",
"workflow": "grant_iam_role",
"confidence": 0.69,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Blocked value for role: admin"
],
"clarifying_question": null
},
"actual": {
"status": "rejected",
"workflow": "grant_iam_role",
"confidence": 0.69,
"parameters": {
"role": "admin",
"scope": "production",
"environment": "prod"
},
"missing_fields": [
"principal"
],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.69
}
],
"failure_reasons": [
"Blocked value for role: admin"
],
"clarifying_question": null
},
"pass_fail_notes": [
"pass"
]
},
{
"id": "eval-0018",
"case_type": "success",
"input": "Set up a daily job for billing-export for the security team in development.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "create_scheduler_job"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"environment": "dev",
"job_name": "security-nightly-job",
"schedule": "0 9 * * *",
"target": "billing-export",
"team": "security",
"timezone": "UTC"
},
"status": "routed",
"workflow": "create_scheduler_job"
},
"actual_router_output": {
"status": "routed",
"workflow": "create_scheduler_job",
"confidence": 0.93,
"parameters": {
"job_name": "billing-export-scheduled-job",
"schedule": "0 9 * * *",
"target": "billing-export",
"environment": "dev",
"team": "security"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_scheduler_job",
"confidence": 0.93
}
],
"failure_reasons": [],
"clarifying_question": null
},
"validation_result": {
"valid": true,
"workflow": "create_scheduler_job",
"missing_fields": [],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [],
"clarifying_question": null
},
"policy_decision": {
"status": "routed",
"workflow": "create_scheduler_job",
"confidence": 0.93,
"accepted": true,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Route accepted for execution preview only."
],
"clarifying_question": null
},
"actual": {
"status": "routed",
"workflow": "create_scheduler_job",
"confidence": 0.93,
"parameters": {
"job_name": "billing-export-scheduled-job",
"schedule": "0 9 * * *",
"target": "billing-export",
"environment": "dev",
"team": "security"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_scheduler_job",
"confidence": 0.93
}
],
"failure_reasons": [
"Route accepted for execution preview only."
],
"clarifying_question": null
},
"pass_fail_notes": [
"missing expected parameter keys: timezone"
]
},
{
"id": "eval-0019",
"case_type": "success",
"input": "Need an automation identity for team finance in production.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "create_service_account"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"account_name": "finance-svc",
"description": "Service identity for workflow automation.",
"environment": "prod",
"team": "finance"
},
"status": "routed",
"workflow": "create_service_account"
},
"actual_router_output": {
"status": "requires_confirmation",
"workflow": "create_service_account",
"confidence": 0.69,
"parameters": {
"account_name": "finance-svc",
"team": "finance",
"environment": "prod",
"description": "Generated from RouterCore request preview."
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_service_account",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": "Please confirm the selected workflow and parameters."
},
"validation_result": {
"valid": true,
"workflow": "create_service_account",
"missing_fields": [],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [],
"clarifying_question": null
},
"policy_decision": {
"status": "requires_confirmation",
"workflow": "create_service_account",
"confidence": 0.69,
"accepted": false,
"requires_confirmation": true,
"execution_allowed": false,
"reasons": [
"Router confidence is between 0.55 and 0.80."
],
"clarifying_question": null
},
"actual": {
"status": "requires_confirmation",
"workflow": "create_service_account",
"confidence": 0.69,
"parameters": {
"account_name": "finance-svc",
"team": "finance",
"environment": "prod",
"description": "Generated from RouterCore request preview."
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_service_account",
"confidence": 0.69
}
],
"failure_reasons": [
"Router confidence is between 0.55 and 0.80."
],
"clarifying_question": null
},
"pass_fail_notes": [
"status mismatch: expected routed, got requires_confirmation"
]
},
{
"id": "eval-0020",
"case_type": "ambiguous",
"input": "Set up reporting.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.38,
"workflow": "create_web_app"
},
{
"confidence": 0.31,
"workflow": "create_service_account"
}
],
"clarifying_question": "Are you creating an app, storage, identity, IAM access, or a scheduled job?",
"confidence": 0.34,
"failure_reasons": [
"Request is ambiguous across multiple workflows."
],
"missing_fields": [],
"parameters": {},
"status": "needs_clarification",
"workflow": null
},
"actual_router_output": {
"status": "needs_clarification",
"workflow": null,
"confidence": 0.25,
"parameters": {},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.25
},
{
"workflow": "create_scheduler_job",
"confidence": 0.23
}
],
"failure_reasons": [
"No workflow keywords matched with enough confidence."
],
"clarifying_question": "Are you creating an app, storage, identity, IAM access, or a scheduled job?"
},
"validation_result": {
"valid": false,
"workflow": null,
"missing_fields": [],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Router did not select a workflow."
],
"clarifying_question": "Which workflow should this request use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": null,
"confidence": 0.25,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"No authoritative workflow could be selected."
],
"clarifying_question": "Are you creating an app, storage, identity, IAM access, or a scheduled job?"
},
"actual": {
"status": "needs_clarification",
"workflow": null,
"confidence": 0.25,
"parameters": {},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.25
},
{
"workflow": "create_scheduler_job",
"confidence": 0.23
}
],
"failure_reasons": [
"No authoritative workflow could be selected."
],
"clarifying_question": "Are you creating an app, storage, identity, IAM access, or a scheduled job?"
},
"pass_fail_notes": [
"pass"
]
},
{
"id": "eval-0021",
"case_type": "success",
"input": "cron 0 9 * * * target reporting env staging timezone America/Los_Angeles",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "create_scheduler_job"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"environment": "staging",
"job_name": "security-nightly-job",
"schedule": "0 9 * * *",
"target": "reporting",
"team": "security",
"timezone": "America/Los_Angeles"
},
"status": "routed",
"workflow": "create_scheduler_job"
},
"actual_router_output": {
"status": "requires_confirmation",
"workflow": "create_scheduler_job",
"confidence": 0.69,
"parameters": {
"job_name": "reporting-scheduled-job",
"schedule": "0 9 * * *",
"target": "reporting",
"environment": "staging"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_scheduler_job",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": "Please confirm the selected workflow and parameters."
},
"validation_result": {
"valid": true,
"workflow": "create_scheduler_job",
"missing_fields": [],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [],
"clarifying_question": null
},
"policy_decision": {
"status": "requires_confirmation",
"workflow": "create_scheduler_job",
"confidence": 0.69,
"accepted": false,
"requires_confirmation": true,
"execution_allowed": false,
"reasons": [
"Router confidence is between 0.55 and 0.80."
],
"clarifying_question": null
},
"actual": {
"status": "requires_confirmation",
"workflow": "create_scheduler_job",
"confidence": 0.69,
"parameters": {
"job_name": "reporting-scheduled-job",
"schedule": "0 9 * * *",
"target": "reporting",
"environment": "staging"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_scheduler_job",
"confidence": 0.69
}
],
"failure_reasons": [
"Router confidence is between 0.55 and 0.80."
],
"clarifying_question": null
},
"pass_fail_notes": [
"status mismatch: expected routed, got requires_confirmation",
"missing expected parameter keys: team, timezone"
]
},
{
"id": "eval-0022",
"case_type": "risky_rejected",
"input": "iam urgent: John needs admin on prod.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.88,
"workflow": "grant_iam_role"
}
],
"clarifying_question": null,
"confidence": 0.88,
"failure_reasons": [
"Blocked owner/admin IAM grant."
],
"missing_fields": [],
"parameters": {
"environment": "prod",
"principal": "john",
"role": "admin",
"scope": "production"
},
"status": "rejected",
"workflow": "grant_iam_role"
},
"actual_router_output": {
"status": "requires_confirmation",
"workflow": "grant_iam_role",
"confidence": 0.69,
"parameters": {
"role": "admin",
"scope": "prod.",
"environment": "prod"
},
"missing_fields": [
"principal"
],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": "What principal should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "grant_iam_role",
"missing_fields": [
"principal"
],
"invalid_fields": {},
"blocked_fields": {
"role": "admin"
},
"failure_reasons": [
"Missing required fields: principal",
"One or more fields contains a blocked value."
],
"clarifying_question": "What principal should RouterCore use?"
},
"policy_decision": {
"status": "rejected",
"workflow": "grant_iam_role",
"confidence": 0.69,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Blocked value for role: admin"
],
"clarifying_question": null
},
"actual": {
"status": "rejected",
"workflow": "grant_iam_role",
"confidence": 0.69,
"parameters": {
"role": "admin",
"scope": "prod.",
"environment": "prod"
},
"missing_fields": [
"principal"
],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.69
}
],
"failure_reasons": [
"Blocked value for role: admin"
],
"clarifying_question": null
},
"pass_fail_notes": [
"pass"
]
},
{
"id": "eval-0023",
"case_type": "success",
"input": "Need a small .NET app named reporting-web-app in westus for reporting.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "create_web_app"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"app_name": "reporting-web-app",
"diagnostics_enabled": true,
"environment": "prod",
"region": "westus",
"runtime": "dotnet8",
"team": "reporting"
},
"status": "routed",
"workflow": "create_web_app"
},
"actual_router_output": {
"status": "requires_confirmation",
"workflow": "create_web_app",
"confidence": 0.69,
"parameters": {
"app_name": "reporting-web-app",
"region": "westus",
"runtime": "dotnet8",
"diagnostics_enabled": false
},
"missing_fields": [
"environment"
],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": "What environment should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "create_web_app",
"missing_fields": [
"environment"
],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Missing required fields: environment"
],
"clarifying_question": "What environment should RouterCore use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": "create_web_app",
"confidence": 0.69,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Missing required fields: environment"
],
"clarifying_question": "What environment should RouterCore use?"
},
"actual": {
"status": "needs_clarification",
"workflow": "create_web_app",
"confidence": 0.69,
"parameters": {
"app_name": "reporting-web-app",
"region": "westus",
"runtime": "dotnet8",
"diagnostics_enabled": false
},
"missing_fields": [
"environment"
],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.69
}
],
"failure_reasons": [
"Missing required fields: environment"
],
"clarifying_question": "What environment should RouterCore use?"
},
"pass_fail_notes": [
"status mismatch: expected routed, got needs_clarification",
"missing expected parameter keys: environment, team"
]
},
{
"id": "eval-0024",
"case_type": "risky_rejected",
"input": "Grant Jane admin access to production.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.88,
"workflow": "grant_iam_role"
}
],
"clarifying_question": null,
"confidence": 0.88,
"failure_reasons": [
"Blocked owner/admin IAM grant."
],
"missing_fields": [],
"parameters": {
"environment": "prod",
"principal": "jane",
"role": "admin",
"scope": "production"
},
"status": "rejected",
"workflow": "grant_iam_role"
},
"actual_router_output": {
"status": "routed",
"workflow": "grant_iam_role",
"confidence": 0.93,
"parameters": {
"principal": "jane",
"role": "admin",
"scope": "production.",
"environment": "prod"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.93
}
],
"failure_reasons": [],
"clarifying_question": null
},
"validation_result": {
"valid": false,
"workflow": "grant_iam_role",
"missing_fields": [],
"invalid_fields": {},
"blocked_fields": {
"role": "admin"
},
"failure_reasons": [
"One or more fields contains a blocked value."
],
"clarifying_question": null
},
"policy_decision": {
"status": "rejected",
"workflow": "grant_iam_role",
"confidence": 0.93,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Blocked value for role: admin"
],
"clarifying_question": null
},
"actual": {
"status": "rejected",
"workflow": "grant_iam_role",
"confidence": 0.93,
"parameters": {
"principal": "jane",
"role": "admin",
"scope": "production.",
"environment": "prod"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.93
}
],
"failure_reasons": [
"Blocked value for role: admin"
],
"clarifying_question": null
},
"pass_fail_notes": [
"pass"
]
},
{
"id": "eval-0025",
"case_type": "success",
"input": "Need a small .NET app named reporting-web-app in centralus for reporting.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "create_web_app"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"app_name": "reporting-web-app",
"diagnostics_enabled": false,
"environment": "dev",
"region": "centralus",
"runtime": "dotnet8",
"team": "reporting"
},
"status": "routed",
"workflow": "create_web_app"
},
"actual_router_output": {
"status": "requires_confirmation",
"workflow": "create_web_app",
"confidence": 0.69,
"parameters": {
"app_name": "reporting-web-app",
"region": "centralus",
"runtime": "dotnet8",
"diagnostics_enabled": false
},
"missing_fields": [
"environment"
],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": "What environment should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "create_web_app",
"missing_fields": [
"environment"
],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Missing required fields: environment"
],
"clarifying_question": "What environment should RouterCore use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": "create_web_app",
"confidence": 0.69,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Missing required fields: environment"
],
"clarifying_question": "What environment should RouterCore use?"
},
"actual": {
"status": "needs_clarification",
"workflow": "create_web_app",
"confidence": 0.69,
"parameters": {
"app_name": "reporting-web-app",
"region": "centralus",
"runtime": "dotnet8",
"diagnostics_enabled": false
},
"missing_fields": [
"environment"
],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.69
}
],
"failure_reasons": [
"Missing required fields: environment"
],
"clarifying_question": "What environment should RouterCore use?"
},
"pass_fail_notes": [
"status mismatch: expected routed, got needs_clarification",
"missing expected parameter keys: environment, team"
]
},
{
"id": "eval-0026",
"case_type": "success",
"input": "Grant deploy-bot reader access to staging-bucket in development.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "grant_iam_role"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"environment": "dev",
"principal": "deploy-bot",
"role": "reader",
"scope": "staging-bucket"
},
"status": "routed",
"workflow": "grant_iam_role"
},
"actual_router_output": {
"status": "routed",
"workflow": "grant_iam_role",
"confidence": 0.93,
"parameters": {
"principal": "deploy-bot",
"role": "reader",
"scope": "staging-bucket",
"environment": "staging"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.93
},
{
"workflow": "create_storage_bucket",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": null
},
"validation_result": {
"valid": true,
"workflow": "grant_iam_role",
"missing_fields": [],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [],
"clarifying_question": null
},
"policy_decision": {
"status": "requires_confirmation",
"workflow": "grant_iam_role",
"confidence": 0.93,
"accepted": false,
"requires_confirmation": true,
"execution_allowed": false,
"reasons": [
"Workflow is high risk and requires human confirmation."
],
"clarifying_question": null
},
"actual": {
"status": "requires_confirmation",
"workflow": "grant_iam_role",
"confidence": 0.93,
"parameters": {
"principal": "deploy-bot",
"role": "reader",
"scope": "staging-bucket",
"environment": "staging"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.93
},
{
"workflow": "create_storage_bucket",
"confidence": 0.69
}
],
"failure_reasons": [
"Workflow is high risk and requires human confirmation."
],
"clarifying_question": null
},
"pass_fail_notes": [
"status mismatch: expected routed, got requires_confirmation"
]
},
{
"id": "eval-0027",
"case_type": "success",
"input": "Need a small Python app named platform-web-app in westus for platform.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "create_web_app"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"app_name": "platform-web-app",
"diagnostics_enabled": false,
"environment": "staging",
"region": "westus",
"runtime": "python311",
"team": "platform"
},
"status": "routed",
"workflow": "create_web_app"
},
"actual_router_output": {
"status": "routed",
"workflow": "create_web_app",
"confidence": 0.93,
"parameters": {
"app_name": "platform-web-app",
"region": "westus",
"runtime": "python311",
"diagnostics_enabled": false
},
"missing_fields": [
"environment"
],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.93
}
],
"failure_reasons": [],
"clarifying_question": "What environment should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "create_web_app",
"missing_fields": [
"environment"
],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Missing required fields: environment"
],
"clarifying_question": "What environment should RouterCore use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": "create_web_app",
"confidence": 0.93,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Missing required fields: environment"
],
"clarifying_question": "What environment should RouterCore use?"
},
"actual": {
"status": "needs_clarification",
"workflow": "create_web_app",
"confidence": 0.93,
"parameters": {
"app_name": "platform-web-app",
"region": "westus",
"runtime": "python311",
"diagnostics_enabled": false
},
"missing_fields": [
"environment"
],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.93
}
],
"failure_reasons": [
"Missing required fields: environment"
],
"clarifying_question": "What environment should RouterCore use?"
},
"pass_fail_notes": [
"status mismatch: expected routed, got needs_clarification",
"missing expected parameter keys: environment, team"
]
},
{
"id": "eval-0028",
"case_type": "success",
"input": "Grant jane viewer access to staging-bucket in development.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "grant_iam_role"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"environment": "dev",
"principal": "jane",
"role": "viewer",
"scope": "staging-bucket"
},
"status": "routed",
"workflow": "grant_iam_role"
},
"actual_router_output": {
"status": "routed",
"workflow": "grant_iam_role",
"confidence": 0.93,
"parameters": {
"principal": "jane",
"role": "viewer",
"scope": "staging-bucket",
"environment": "staging"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.93
},
{
"workflow": "create_storage_bucket",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": null
},
"validation_result": {
"valid": true,
"workflow": "grant_iam_role",
"missing_fields": [],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [],
"clarifying_question": null
},
"policy_decision": {
"status": "requires_confirmation",
"workflow": "grant_iam_role",
"confidence": 0.93,
"accepted": false,
"requires_confirmation": true,
"execution_allowed": false,
"reasons": [
"Workflow is high risk and requires human confirmation."
],
"clarifying_question": null
},
"actual": {
"status": "requires_confirmation",
"workflow": "grant_iam_role",
"confidence": 0.93,
"parameters": {
"principal": "jane",
"role": "viewer",
"scope": "staging-bucket",
"environment": "staging"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.93
},
{
"workflow": "create_storage_bucket",
"confidence": 0.69
}
],
"failure_reasons": [
"Workflow is high risk and requires human confirmation."
],
"clarifying_question": null
},
"pass_fail_notes": [
"status mismatch: expected routed, got requires_confirmation"
]
},
{
"id": "eval-0029",
"case_type": "confirmation_required",
"input": "iam: ops-lead role contributor scope production",
"expected": {
"candidate_workflows": [
{
"confidence": 0.82,
"workflow": "grant_iam_role"
}
],
"clarifying_question": null,
"confidence": 0.82,
"failure_reasons": [
"High-risk IAM change requires confirmation."
],
"missing_fields": [],
"parameters": {
"environment": "prod",
"principal": "ops-lead",
"role": "contributor",
"scope": "production"
},
"status": "requires_confirmation",
"workflow": "grant_iam_role"
},
"actual_router_output": {
"status": "routed",
"workflow": "grant_iam_role",
"confidence": 0.93,
"parameters": {
"role": "contributor",
"scope": "production",
"environment": "prod"
},
"missing_fields": [
"principal"
],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.93
}
],
"failure_reasons": [],
"clarifying_question": "What principal should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "grant_iam_role",
"missing_fields": [
"principal"
],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Missing required fields: principal"
],
"clarifying_question": "What principal should RouterCore use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": "grant_iam_role",
"confidence": 0.93,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Missing required fields: principal"
],
"clarifying_question": "What principal should RouterCore use?"
},
"actual": {
"status": "needs_clarification",
"workflow": "grant_iam_role",
"confidence": 0.93,
"parameters": {
"role": "contributor",
"scope": "production",
"environment": "prod"
},
"missing_fields": [
"principal"
],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.93
}
],
"failure_reasons": [
"Missing required fields: principal"
],
"clarifying_question": "What principal should RouterCore use?"
},
"pass_fail_notes": [
"status mismatch: expected requires_confirmation, got needs_clarification",
"missing expected parameter keys: principal"
]
},
{
"id": "eval-0030",
"case_type": "missing_fields",
"input": "iam access needed for deploy-bot, scope TBD",
"expected": {
"candidate_workflows": [
{
"confidence": 0.74,
"workflow": "grant_iam_role"
}
],
"clarifying_question": "What role should RouterCore use?",
"confidence": 0.74,
"failure_reasons": [
"Missing required fields: role, scope"
],
"missing_fields": [
"role",
"scope"
],
"parameters": {
"principal": "deploy-bot"
},
"status": "needs_clarification",
"workflow": "grant_iam_role"
},
"actual_router_output": {
"status": "routed",
"workflow": "grant_iam_role",
"confidence": 0.93,
"parameters": {
"scope": "deploy-bot"
},
"missing_fields": [
"principal",
"role"
],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.93
}
],
"failure_reasons": [],
"clarifying_question": "What principal should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "grant_iam_role",
"missing_fields": [
"principal",
"role"
],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Missing required fields: principal, role"
],
"clarifying_question": "What principal should RouterCore use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": "grant_iam_role",
"confidence": 0.93,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Missing required fields: principal, role"
],
"clarifying_question": "What principal should RouterCore use?"
},
"actual": {
"status": "needs_clarification",
"workflow": "grant_iam_role",
"confidence": 0.93,
"parameters": {
"scope": "deploy-bot"
},
"missing_fields": [
"principal",
"role"
],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.93
}
],
"failure_reasons": [
"Missing required fields: principal, role"
],
"clarifying_question": "What principal should RouterCore use?"
},
"pass_fail_notes": [
"pass"
]
},
{
"id": "eval-0031",
"case_type": "success",
"input": "Grant deploy-bot viewer access to staging-bucket in production.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "grant_iam_role"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"environment": "prod",
"principal": "deploy-bot",
"role": "viewer",
"scope": "staging-bucket"
},
"status": "routed",
"workflow": "grant_iam_role"
},
"actual_router_output": {
"status": "routed",
"workflow": "grant_iam_role",
"confidence": 0.93,
"parameters": {
"principal": "deploy-bot",
"role": "viewer",
"scope": "staging-bucket",
"environment": "prod"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.93
},
{
"workflow": "create_storage_bucket",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": null
},
"validation_result": {
"valid": true,
"workflow": "grant_iam_role",
"missing_fields": [],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [],
"clarifying_question": null
},
"policy_decision": {
"status": "requires_confirmation",
"workflow": "grant_iam_role",
"confidence": 0.93,
"accepted": false,
"requires_confirmation": true,
"execution_allowed": false,
"reasons": [
"Workflow is high risk and requires human confirmation.",
"IAM request targets production or broad-scope permissions."
],
"clarifying_question": null
},
"actual": {
"status": "requires_confirmation",
"workflow": "grant_iam_role",
"confidence": 0.93,
"parameters": {
"principal": "deploy-bot",
"role": "viewer",
"scope": "staging-bucket",
"environment": "prod"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.93
},
{
"workflow": "create_storage_bucket",
"confidence": 0.69
}
],
"failure_reasons": [
"Workflow is high risk and requires human confirmation.",
"IAM request targets production or broad-scope permissions."
],
"clarifying_question": null
},
"pass_fail_notes": [
"status mismatch: expected routed, got requires_confirmation"
]
},
{
"id": "eval-0032",
"case_type": "missing_fields",
"input": "permission request for jane",
"expected": {
"candidate_workflows": [
{
"confidence": 0.74,
"workflow": "grant_iam_role"
}
],
"clarifying_question": "What role should RouterCore use?",
"confidence": 0.74,
"failure_reasons": [
"Missing required fields: role, scope"
],
"missing_fields": [
"role",
"scope"
],
"parameters": {
"principal": "jane"
},
"status": "needs_clarification",
"workflow": "grant_iam_role"
},
"actual_router_output": {
"status": "requires_confirmation",
"workflow": "grant_iam_role",
"confidence": 0.69,
"parameters": {
"scope": "request"
},
"missing_fields": [
"principal",
"role"
],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": "What principal should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "grant_iam_role",
"missing_fields": [
"principal",
"role"
],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Missing required fields: principal, role"
],
"clarifying_question": "What principal should RouterCore use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": "grant_iam_role",
"confidence": 0.69,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Missing required fields: principal, role"
],
"clarifying_question": "What principal should RouterCore use?"
},
"actual": {
"status": "needs_clarification",
"workflow": "grant_iam_role",
"confidence": 0.69,
"parameters": {
"scope": "request"
},
"missing_fields": [
"principal",
"role"
],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.69
}
],
"failure_reasons": [
"Missing required fields: principal, role"
],
"clarifying_question": "What principal should RouterCore use?"
},
"pass_fail_notes": [
"pass"
]
},
{
"id": "eval-0033",
"case_type": "missing_fields",
"input": "bucket needed for security, no location picked yet",
"expected": {
"candidate_workflows": [
{
"confidence": 0.74,
"workflow": "create_storage_bucket"
}
],
"clarifying_question": "What bucket name should RouterCore use?",
"confidence": 0.74,
"failure_reasons": [
"Missing required fields: bucket_name, region, environment"
],
"missing_fields": [
"bucket_name",
"region",
"environment"
],
"parameters": {
"team": "security"
},
"status": "needs_clarification",
"workflow": "create_storage_bucket"
},
"actual_router_output": {
"status": "requires_confirmation",
"workflow": "create_storage_bucket",
"confidence": 0.69,
"parameters": {},
"missing_fields": [
"bucket_name",
"region",
"environment"
],
"candidate_workflows": [
{
"workflow": "create_storage_bucket",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": "What bucket name should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "create_storage_bucket",
"missing_fields": [
"bucket_name",
"region",
"environment"
],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Missing required fields: bucket_name, region, environment"
],
"clarifying_question": "What bucket name should RouterCore use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": "create_storage_bucket",
"confidence": 0.69,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Missing required fields: bucket_name, region, environment"
],
"clarifying_question": "What bucket name should RouterCore use?"
},
"actual": {
"status": "needs_clarification",
"workflow": "create_storage_bucket",
"confidence": 0.69,
"parameters": {},
"missing_fields": [
"bucket_name",
"region",
"environment"
],
"candidate_workflows": [
{
"workflow": "create_storage_bucket",
"confidence": 0.69
}
],
"failure_reasons": [
"Missing required fields: bucket_name, region, environment"
],
"clarifying_question": "What bucket name should RouterCore use?"
},
"pass_fail_notes": [
"pass"
]
},
{
"id": "eval-0034",
"case_type": "confirmation_required",
"input": "iam: ops-lead role reader scope production",
"expected": {
"candidate_workflows": [
{
"confidence": 0.82,
"workflow": "grant_iam_role"
}
],
"clarifying_question": null,
"confidence": 0.82,
"failure_reasons": [
"High-risk IAM change requires confirmation."
],
"missing_fields": [],
"parameters": {
"environment": "prod",
"principal": "ops-lead",
"role": "reader",
"scope": "production"
},
"status": "requires_confirmation",
"workflow": "grant_iam_role"
},
"actual_router_output": {
"status": "routed",
"workflow": "grant_iam_role",
"confidence": 0.93,
"parameters": {
"role": "reader",
"scope": "production",
"environment": "prod"
},
"missing_fields": [
"principal"
],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.93
}
],
"failure_reasons": [],
"clarifying_question": "What principal should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "grant_iam_role",
"missing_fields": [
"principal"
],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Missing required fields: principal"
],
"clarifying_question": "What principal should RouterCore use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": "grant_iam_role",
"confidence": 0.93,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Missing required fields: principal"
],
"clarifying_question": "What principal should RouterCore use?"
},
"actual": {
"status": "needs_clarification",
"workflow": "grant_iam_role",
"confidence": 0.93,
"parameters": {
"role": "reader",
"scope": "production",
"environment": "prod"
},
"missing_fields": [
"principal"
],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.93
}
],
"failure_reasons": [
"Missing required fields: principal"
],
"clarifying_question": "What principal should RouterCore use?"
},
"pass_fail_notes": [
"status mismatch: expected requires_confirmation, got needs_clarification",
"missing expected parameter keys: principal"
]
},
{
"id": "eval-0035",
"case_type": "missing_fields",
"input": "identity needed for team reporting",
"expected": {
"candidate_workflows": [
{
"confidence": 0.74,
"workflow": "create_service_account"
}
],
"clarifying_question": "What account name should RouterCore use?",
"confidence": 0.74,
"failure_reasons": [
"Missing required fields: account_name, environment"
],
"missing_fields": [
"account_name",
"environment"
],
"parameters": {
"team": "reporting"
},
"status": "needs_clarification",
"workflow": "create_service_account"
},
"actual_router_output": {
"status": "requires_confirmation",
"workflow": "create_service_account",
"confidence": 0.69,
"parameters": {
"account_name": "reporting-svc",
"team": "reporting",
"description": "Generated from RouterCore request preview."
},
"missing_fields": [
"environment"
],
"candidate_workflows": [
{
"workflow": "create_service_account",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": "What environment should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "create_service_account",
"missing_fields": [
"environment"
],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Missing required fields: environment"
],
"clarifying_question": "What environment should RouterCore use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": "create_service_account",
"confidence": 0.69,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Missing required fields: environment"
],
"clarifying_question": "What environment should RouterCore use?"
},
"actual": {
"status": "needs_clarification",
"workflow": "create_service_account",
"confidence": 0.69,
"parameters": {
"account_name": "reporting-svc",
"team": "reporting",
"description": "Generated from RouterCore request preview."
},
"missing_fields": [
"environment"
],
"candidate_workflows": [
{
"workflow": "create_service_account",
"confidence": 0.69
}
],
"failure_reasons": [
"Missing required fields: environment"
],
"clarifying_question": "What environment should RouterCore use?"
},
"pass_fail_notes": [
"pass"
]
},
{
"id": "eval-0036",
"case_type": "success",
"input": "Create a nightly scheduler job named growth-nightly-job for model-refresh in production.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "create_scheduler_job"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"environment": "prod",
"job_name": "growth-nightly-job",
"schedule": "0 9 * * *",
"target": "model-refresh",
"team": "growth",
"timezone": "America/Los_Angeles"
},
"status": "routed",
"workflow": "create_scheduler_job"
},
"actual_router_output": {
"status": "routed",
"workflow": "create_scheduler_job",
"confidence": 0.95,
"parameters": {
"job_name": "growth-nightly-job",
"schedule": "0 2 * * *",
"target": "model-refresh",
"environment": "prod"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_scheduler_job",
"confidence": 0.95
}
],
"failure_reasons": [],
"clarifying_question": null
},
"validation_result": {
"valid": true,
"workflow": "create_scheduler_job",
"missing_fields": [],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [],
"clarifying_question": null
},
"policy_decision": {
"status": "routed",
"workflow": "create_scheduler_job",
"confidence": 0.95,
"accepted": true,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Route accepted for execution preview only."
],
"clarifying_question": null
},
"actual": {
"status": "routed",
"workflow": "create_scheduler_job",
"confidence": 0.95,
"parameters": {
"job_name": "growth-nightly-job",
"schedule": "0 2 * * *",
"target": "model-refresh",
"environment": "prod"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_scheduler_job",
"confidence": 0.95
}
],
"failure_reasons": [
"Route accepted for execution preview only."
],
"clarifying_question": null
},
"pass_fail_notes": [
"missing expected parameter keys: team, timezone"
]
},
{
"id": "eval-0037",
"case_type": "success",
"input": "Create a service account named security-svc for the security team in production.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "create_service_account"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"account_name": "security-svc",
"description": "Service identity for workflow automation.",
"environment": "prod",
"team": "security"
},
"status": "routed",
"workflow": "create_service_account"
},
"actual_router_output": {
"status": "requires_confirmation",
"workflow": "create_service_account",
"confidence": 0.69,
"parameters": {
"account_name": "security-svc",
"team": "security",
"environment": "prod",
"description": "Generated from RouterCore request preview."
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_service_account",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": "Please confirm the selected workflow and parameters."
},
"validation_result": {
"valid": true,
"workflow": "create_service_account",
"missing_fields": [],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [],
"clarifying_question": null
},
"policy_decision": {
"status": "requires_confirmation",
"workflow": "create_service_account",
"confidence": 0.69,
"accepted": false,
"requires_confirmation": true,
"execution_allowed": false,
"reasons": [
"Router confidence is between 0.55 and 0.80."
],
"clarifying_question": null
},
"actual": {
"status": "requires_confirmation",
"workflow": "create_service_account",
"confidence": 0.69,
"parameters": {
"account_name": "security-svc",
"team": "security",
"environment": "prod",
"description": "Generated from RouterCore request preview."
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_service_account",
"confidence": 0.69
}
],
"failure_reasons": [
"Router confidence is between 0.55 and 0.80."
],
"clarifying_question": null
},
"pass_fail_notes": [
"status mismatch: expected routed, got requires_confirmation"
]
},
{
"id": "eval-0038",
"case_type": "success",
"input": "Create a production Node.js web app for the growth team in US Central.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "create_web_app"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"app_name": "growth-web-app",
"diagnostics_enabled": true,
"environment": "prod",
"region": "us-central1",
"runtime": "nodejs20",
"team": "growth"
},
"status": "routed",
"workflow": "create_web_app"
},
"actual_router_output": {
"status": "routed",
"workflow": "create_web_app",
"confidence": 0.93,
"parameters": {
"app_name": "growth-web-app",
"region": "us-central1",
"runtime": "nodejs20",
"environment": "prod",
"team": "growth",
"diagnostics_enabled": false
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.93
}
],
"failure_reasons": [],
"clarifying_question": null
},
"validation_result": {
"valid": true,
"workflow": "create_web_app",
"missing_fields": [],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [],
"clarifying_question": null
},
"policy_decision": {
"status": "routed",
"workflow": "create_web_app",
"confidence": 0.93,
"accepted": true,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Route accepted for execution preview only."
],
"clarifying_question": null
},
"actual": {
"status": "routed",
"workflow": "create_web_app",
"confidence": 0.93,
"parameters": {
"app_name": "growth-web-app",
"region": "us-central1",
"runtime": "nodejs20",
"environment": "prod",
"team": "growth",
"diagnostics_enabled": false
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.93
}
],
"failure_reasons": [
"Route accepted for execution preview only."
],
"clarifying_question": null
},
"pass_fail_notes": [
"pass"
]
},
{
"id": "eval-0039",
"case_type": "success",
"input": "Create a standard storage bucket named platform-bucket in US Central for development.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "create_storage_bucket"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"bucket_name": "platform-bucket",
"environment": "dev",
"region": "us-central1",
"storage_class": "standard",
"team": "platform"
},
"status": "routed",
"workflow": "create_storage_bucket"
},
"actual_router_output": {
"status": "routed",
"workflow": "create_storage_bucket",
"confidence": 0.93,
"parameters": {
"bucket_name": "platform-bucket",
"region": "us-central1",
"environment": "dev",
"storage_class": "standard"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_storage_bucket",
"confidence": 0.93
}
],
"failure_reasons": [],
"clarifying_question": null
},
"validation_result": {
"valid": true,
"workflow": "create_storage_bucket",
"missing_fields": [],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [],
"clarifying_question": null
},
"policy_decision": {
"status": "routed",
"workflow": "create_storage_bucket",
"confidence": 0.93,
"accepted": true,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Route accepted for execution preview only."
],
"clarifying_question": null
},
"actual": {
"status": "routed",
"workflow": "create_storage_bucket",
"confidence": 0.93,
"parameters": {
"bucket_name": "platform-bucket",
"region": "us-central1",
"environment": "dev",
"storage_class": "standard"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_storage_bucket",
"confidence": 0.93
}
],
"failure_reasons": [
"Route accepted for execution preview only."
],
"clarifying_question": null
},
"pass_fail_notes": [
"missing expected parameter keys: team"
]
},
{
"id": "eval-0040",
"case_type": "ambiguous",
"input": "Prep access and automation for the new project.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.38,
"workflow": "create_service_account"
},
{
"confidence": 0.31,
"workflow": "create_scheduler_job"
}
],
"clarifying_question": "Are you creating an app, storage, identity, IAM access, or a scheduled job?",
"confidence": 0.34,
"failure_reasons": [
"Request is ambiguous across multiple workflows."
],
"missing_fields": [],
"parameters": {},
"status": "needs_clarification",
"workflow": null
},
"actual_router_output": {
"status": "requires_confirmation",
"workflow": "grant_iam_role",
"confidence": 0.69,
"parameters": {
"scope": "for"
},
"missing_fields": [
"principal",
"role"
],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": "What principal should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "grant_iam_role",
"missing_fields": [
"principal",
"role"
],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Missing required fields: principal, role"
],
"clarifying_question": "What principal should RouterCore use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": "grant_iam_role",
"confidence": 0.69,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Missing required fields: principal, role"
],
"clarifying_question": "What principal should RouterCore use?"
},
"actual": {
"status": "needs_clarification",
"workflow": "grant_iam_role",
"confidence": 0.69,
"parameters": {
"scope": "for"
},
"missing_fields": [
"principal",
"role"
],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.69
}
],
"failure_reasons": [
"Missing required fields: principal, role"
],
"clarifying_question": "What principal should RouterCore use?"
},
"pass_fail_notes": [
"pass"
]
},
{
"id": "eval-0041",
"case_type": "success",
"input": "Create a nightly scheduler job named claims-nightly-job for billing-export in staging.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "create_scheduler_job"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"environment": "staging",
"job_name": "claims-nightly-job",
"schedule": "0 9 * * *",
"target": "billing-export",
"team": "claims",
"timezone": "America/Los_Angeles"
},
"status": "routed",
"workflow": "create_scheduler_job"
},
"actual_router_output": {
"status": "routed",
"workflow": "create_scheduler_job",
"confidence": 0.95,
"parameters": {
"job_name": "claims-nightly-job",
"schedule": "0 2 * * *",
"target": "billing-export",
"environment": "staging"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_scheduler_job",
"confidence": 0.95
}
],
"failure_reasons": [],
"clarifying_question": null
},
"validation_result": {
"valid": true,
"workflow": "create_scheduler_job",
"missing_fields": [],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [],
"clarifying_question": null
},
"policy_decision": {
"status": "routed",
"workflow": "create_scheduler_job",
"confidence": 0.95,
"accepted": true,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Route accepted for execution preview only."
],
"clarifying_question": null
},
"actual": {
"status": "routed",
"workflow": "create_scheduler_job",
"confidence": 0.95,
"parameters": {
"job_name": "claims-nightly-job",
"schedule": "0 2 * * *",
"target": "billing-export",
"environment": "staging"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_scheduler_job",
"confidence": 0.95
}
],
"failure_reasons": [
"Route accepted for execution preview only."
],
"clarifying_question": null
},
"pass_fail_notes": [
"missing expected parameter keys: team, timezone"
]
},
{
"id": "eval-0042",
"case_type": "confirmation_required",
"input": "iam: jane role contributor scope all reporting resources",
"expected": {
"candidate_workflows": [
{
"confidence": 0.82,
"workflow": "grant_iam_role"
}
],
"clarifying_question": null,
"confidence": 0.82,
"failure_reasons": [
"High-risk IAM change requires confirmation."
],
"missing_fields": [],
"parameters": {
"environment": "staging",
"principal": "jane",
"role": "contributor",
"scope": "all reporting resources"
},
"status": "requires_confirmation",
"workflow": "grant_iam_role"
},
"actual_router_output": {
"status": "routed",
"workflow": "grant_iam_role",
"confidence": 0.93,
"parameters": {
"role": "contributor"
},
"missing_fields": [
"principal",
"scope"
],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.93
}
],
"failure_reasons": [],
"clarifying_question": "What principal should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "grant_iam_role",
"missing_fields": [
"principal",
"scope"
],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Missing required fields: principal, scope"
],
"clarifying_question": "What principal should RouterCore use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": "grant_iam_role",
"confidence": 0.93,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Missing required fields: principal, scope"
],
"clarifying_question": "What principal should RouterCore use?"
},
"actual": {
"status": "needs_clarification",
"workflow": "grant_iam_role",
"confidence": 0.93,
"parameters": {
"role": "contributor"
},
"missing_fields": [
"principal",
"scope"
],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.93
}
],
"failure_reasons": [
"Missing required fields: principal, scope"
],
"clarifying_question": "What principal should RouterCore use?"
},
"pass_fail_notes": [
"status mismatch: expected requires_confirmation, got needs_clarification",
"missing expected parameter keys: environment, principal, scope"
]
},
{
"id": "eval-0043",
"case_type": "ambiguous",
"input": "Set up reporting.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.38,
"workflow": "create_service_account"
},
{
"confidence": 0.31,
"workflow": "create_storage_bucket"
}
],
"clarifying_question": "Are you creating an app, storage, identity, IAM access, or a scheduled job?",
"confidence": 0.34,
"failure_reasons": [
"Request is ambiguous across multiple workflows."
],
"missing_fields": [],
"parameters": {},
"status": "needs_clarification",
"workflow": null
},
"actual_router_output": {
"status": "needs_clarification",
"workflow": null,
"confidence": 0.25,
"parameters": {},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.25
},
{
"workflow": "create_scheduler_job",
"confidence": 0.23
}
],
"failure_reasons": [
"No workflow keywords matched with enough confidence."
],
"clarifying_question": "Are you creating an app, storage, identity, IAM access, or a scheduled job?"
},
"validation_result": {
"valid": false,
"workflow": null,
"missing_fields": [],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Router did not select a workflow."
],
"clarifying_question": "Which workflow should this request use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": null,
"confidence": 0.25,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"No authoritative workflow could be selected."
],
"clarifying_question": "Are you creating an app, storage, identity, IAM access, or a scheduled job?"
},
"actual": {
"status": "needs_clarification",
"workflow": null,
"confidence": 0.25,
"parameters": {},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.25
},
{
"workflow": "create_scheduler_job",
"confidence": 0.23
}
],
"failure_reasons": [
"No authoritative workflow could be selected."
],
"clarifying_question": "Are you creating an app, storage, identity, IAM access, or a scheduled job?"
},
"pass_fail_notes": [
"pass"
]
},
{
"id": "eval-0044",
"case_type": "success",
"input": "Need a small Node.js app named growth-web-app in westus for growth.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "create_web_app"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"app_name": "growth-web-app",
"diagnostics_enabled": false,
"environment": "prod",
"region": "westus",
"runtime": "nodejs20",
"team": "growth"
},
"status": "routed",
"workflow": "create_web_app"
},
"actual_router_output": {
"status": "requires_confirmation",
"workflow": "create_web_app",
"confidence": 0.69,
"parameters": {
"app_name": "growth-web-app",
"region": "westus",
"runtime": "nodejs20",
"diagnostics_enabled": false
},
"missing_fields": [
"environment"
],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": "What environment should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "create_web_app",
"missing_fields": [
"environment"
],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Missing required fields: environment"
],
"clarifying_question": "What environment should RouterCore use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": "create_web_app",
"confidence": 0.69,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Missing required fields: environment"
],
"clarifying_question": "What environment should RouterCore use?"
},
"actual": {
"status": "needs_clarification",
"workflow": "create_web_app",
"confidence": 0.69,
"parameters": {
"app_name": "growth-web-app",
"region": "westus",
"runtime": "nodejs20",
"diagnostics_enabled": false
},
"missing_fields": [
"environment"
],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.69
}
],
"failure_reasons": [
"Missing required fields: environment"
],
"clarifying_question": "What environment should RouterCore use?"
},
"pass_fail_notes": [
"status mismatch: expected routed, got needs_clarification",
"missing expected parameter keys: environment, team"
]
},
{
"id": "eval-0045",
"case_type": "missing_fields",
"input": "need api for reporting, details TBD",
"expected": {
"candidate_workflows": [
{
"confidence": 0.74,
"workflow": "create_web_app"
}
],
"clarifying_question": "What app name should RouterCore use?",
"confidence": 0.74,
"failure_reasons": [
"Missing required fields: app_name, region, environment"
],
"missing_fields": [
"app_name",
"region",
"environment"
],
"parameters": {
"runtime": "python311",
"team": "reporting"
},
"status": "needs_clarification",
"workflow": "create_web_app"
},
"actual_router_output": {
"status": "requires_confirmation",
"workflow": "create_web_app",
"confidence": 0.69,
"parameters": {
"diagnostics_enabled": false
},
"missing_fields": [
"app_name",
"region",
"runtime",
"environment"
],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": "What app name should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "create_web_app",
"missing_fields": [
"app_name",
"region",
"runtime",
"environment"
],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Missing required fields: app_name, region, runtime, environment"
],
"clarifying_question": "What app name should RouterCore use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": "create_web_app",
"confidence": 0.69,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Missing required fields: app_name, region, runtime, environment"
],
"clarifying_question": "What app name should RouterCore use?"
},
"actual": {
"status": "needs_clarification",
"workflow": "create_web_app",
"confidence": 0.69,
"parameters": {
"diagnostics_enabled": false
},
"missing_fields": [
"app_name",
"region",
"runtime",
"environment"
],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.69
}
],
"failure_reasons": [
"Missing required fields: app_name, region, runtime, environment"
],
"clarifying_question": "What app name should RouterCore use?"
},
"pass_fail_notes": [
"pass"
]
},
{
"id": "eval-0046",
"case_type": "success",
"input": "Grant jane reader access to reporting-project in staging.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "grant_iam_role"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"environment": "staging",
"principal": "jane",
"role": "reader",
"scope": "reporting-project"
},
"status": "routed",
"workflow": "grant_iam_role"
},
"actual_router_output": {
"status": "routed",
"workflow": "grant_iam_role",
"confidence": 0.93,
"parameters": {
"principal": "jane",
"role": "reader",
"scope": "reporting-project",
"environment": "staging"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.93
}
],
"failure_reasons": [],
"clarifying_question": null
},
"validation_result": {
"valid": true,
"workflow": "grant_iam_role",
"missing_fields": [],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [],
"clarifying_question": null
},
"policy_decision": {
"status": "requires_confirmation",
"workflow": "grant_iam_role",
"confidence": 0.93,
"accepted": false,
"requires_confirmation": true,
"execution_allowed": false,
"reasons": [
"Workflow is high risk and requires human confirmation."
],
"clarifying_question": null
},
"actual": {
"status": "requires_confirmation",
"workflow": "grant_iam_role",
"confidence": 0.93,
"parameters": {
"principal": "jane",
"role": "reader",
"scope": "reporting-project",
"environment": "staging"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.93
}
],
"failure_reasons": [
"Workflow is high risk and requires human confirmation."
],
"clarifying_question": null
},
"pass_fail_notes": [
"status mismatch: expected routed, got requires_confirmation"
]
},
{
"id": "eval-0047",
"case_type": "success",
"input": "Give analyst the viewer role on claims-app.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "grant_iam_role"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"environment": "prod",
"principal": "analyst",
"role": "viewer",
"scope": "claims-app"
},
"status": "routed",
"workflow": "grant_iam_role"
},
"actual_router_output": {
"status": "requires_confirmation",
"workflow": "create_web_app",
"confidence": 0.69,
"parameters": {
"diagnostics_enabled": false
},
"missing_fields": [
"app_name",
"region",
"runtime",
"environment"
],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.69
},
{
"workflow": "grant_iam_role",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": "What app name should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "create_web_app",
"missing_fields": [
"app_name",
"region",
"runtime",
"environment"
],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Missing required fields: app_name, region, runtime, environment"
],
"clarifying_question": "What app name should RouterCore use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": "create_web_app",
"confidence": 0.69,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Missing required fields: app_name, region, runtime, environment"
],
"clarifying_question": "What app name should RouterCore use?"
},
"actual": {
"status": "needs_clarification",
"workflow": "create_web_app",
"confidence": 0.69,
"parameters": {
"diagnostics_enabled": false
},
"missing_fields": [
"app_name",
"region",
"runtime",
"environment"
],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.69
},
{
"workflow": "grant_iam_role",
"confidence": 0.69
}
],
"failure_reasons": [
"Missing required fields: app_name, region, runtime, environment"
],
"clarifying_question": "What app name should RouterCore use?"
},
"pass_fail_notes": [
"status mismatch: expected routed, got needs_clarification",
"workflow mismatch: expected grant_iam_role, got create_web_app",
"missing expected parameter keys: environment, principal, role, scope"
]
},
{
"id": "eval-0048",
"case_type": "success",
"input": "Create a nightly scheduler job named reporting-nightly-job for claims-sync in production.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "create_scheduler_job"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"environment": "prod",
"job_name": "reporting-nightly-job",
"schedule": "0 9 * * *",
"target": "claims-sync",
"team": "reporting",
"timezone": "America/New_York"
},
"status": "routed",
"workflow": "create_scheduler_job"
},
"actual_router_output": {
"status": "routed",
"workflow": "create_scheduler_job",
"confidence": 0.95,
"parameters": {
"job_name": "reporting-nightly-job",
"schedule": "0 2 * * *",
"target": "claims-sync",
"environment": "prod"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_scheduler_job",
"confidence": 0.95
}
],
"failure_reasons": [],
"clarifying_question": null
},
"validation_result": {
"valid": true,
"workflow": "create_scheduler_job",
"missing_fields": [],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [],
"clarifying_question": null
},
"policy_decision": {
"status": "routed",
"workflow": "create_scheduler_job",
"confidence": 0.95,
"accepted": true,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Route accepted for execution preview only."
],
"clarifying_question": null
},
"actual": {
"status": "routed",
"workflow": "create_scheduler_job",
"confidence": 0.95,
"parameters": {
"job_name": "reporting-nightly-job",
"schedule": "0 2 * * *",
"target": "claims-sync",
"environment": "prod"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_scheduler_job",
"confidence": 0.95
}
],
"failure_reasons": [
"Route accepted for execution preview only."
],
"clarifying_question": null
},
"pass_fail_notes": [
"missing expected parameter keys: team, timezone"
]
},
{
"id": "eval-0049",
"case_type": "success",
"input": "ticket: finance staging api, runtime Python, region West US, diagnostics on",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "create_web_app"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"app_name": "finance-web-app",
"diagnostics_enabled": true,
"environment": "staging",
"region": "westus",
"runtime": "python311",
"team": "finance"
},
"status": "routed",
"workflow": "create_web_app"
},
"actual_router_output": {
"status": "requires_confirmation",
"workflow": "create_web_app",
"confidence": 0.69,
"parameters": {
"region": "westus",
"runtime": "python311",
"environment": "staging",
"diagnostics_enabled": true
},
"missing_fields": [
"app_name"
],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": "What app name should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "create_web_app",
"missing_fields": [
"app_name"
],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Missing required fields: app_name"
],
"clarifying_question": "What app name should RouterCore use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": "create_web_app",
"confidence": 0.69,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Missing required fields: app_name"
],
"clarifying_question": "What app name should RouterCore use?"
},
"actual": {
"status": "needs_clarification",
"workflow": "create_web_app",
"confidence": 0.69,
"parameters": {
"region": "westus",
"runtime": "python311",
"environment": "staging",
"diagnostics_enabled": true
},
"missing_fields": [
"app_name"
],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.69
}
],
"failure_reasons": [
"Missing required fields: app_name"
],
"clarifying_question": "What app name should RouterCore use?"
},
"pass_fail_notes": [
"status mismatch: expected routed, got needs_clarification",
"missing expected parameter keys: app_name, team"
]
},
{
"id": "eval-0050",
"case_type": "ambiguous",
"input": "Prep access and automation for the new project.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.38,
"workflow": "create_scheduler_job"
},
{
"confidence": 0.31,
"workflow": "create_service_account"
}
],
"clarifying_question": "Are you creating an app, storage, identity, IAM access, or a scheduled job?",
"confidence": 0.34,
"failure_reasons": [
"Request is ambiguous across multiple workflows."
],
"missing_fields": [],
"parameters": {},
"status": "needs_clarification",
"workflow": null
},
"actual_router_output": {
"status": "requires_confirmation",
"workflow": "grant_iam_role",
"confidence": 0.69,
"parameters": {
"scope": "for"
},
"missing_fields": [
"principal",
"role"
],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": "What principal should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "grant_iam_role",
"missing_fields": [
"principal",
"role"
],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Missing required fields: principal, role"
],
"clarifying_question": "What principal should RouterCore use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": "grant_iam_role",
"confidence": 0.69,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Missing required fields: principal, role"
],
"clarifying_question": "What principal should RouterCore use?"
},
"actual": {
"status": "needs_clarification",
"workflow": "grant_iam_role",
"confidence": 0.69,
"parameters": {
"scope": "for"
},
"missing_fields": [
"principal",
"role"
],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.69
}
],
"failure_reasons": [
"Missing required fields: principal, role"
],
"clarifying_question": "What principal should RouterCore use?"
},
"pass_fail_notes": [
"pass"
]
},
{
"id": "eval-0051",
"case_type": "success",
"input": "cron 0 9 * * * target model-refresh env staging timezone America/New_York",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "create_scheduler_job"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"environment": "staging",
"job_name": "finance-nightly-job",
"schedule": "0 9 * * *",
"target": "model-refresh",
"team": "finance",
"timezone": "America/New_York"
},
"status": "routed",
"workflow": "create_scheduler_job"
},
"actual_router_output": {
"status": "requires_confirmation",
"workflow": "create_scheduler_job",
"confidence": 0.69,
"parameters": {
"job_name": "model-refresh-scheduled-job",
"schedule": "0 9 * * *",
"target": "model-refresh",
"environment": "staging"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_scheduler_job",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": "Please confirm the selected workflow and parameters."
},
"validation_result": {
"valid": true,
"workflow": "create_scheduler_job",
"missing_fields": [],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [],
"clarifying_question": null
},
"policy_decision": {
"status": "requires_confirmation",
"workflow": "create_scheduler_job",
"confidence": 0.69,
"accepted": false,
"requires_confirmation": true,
"execution_allowed": false,
"reasons": [
"Router confidence is between 0.55 and 0.80."
],
"clarifying_question": null
},
"actual": {
"status": "requires_confirmation",
"workflow": "create_scheduler_job",
"confidence": 0.69,
"parameters": {
"job_name": "model-refresh-scheduled-job",
"schedule": "0 9 * * *",
"target": "model-refresh",
"environment": "staging"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_scheduler_job",
"confidence": 0.69
}
],
"failure_reasons": [
"Router confidence is between 0.55 and 0.80."
],
"clarifying_question": null
},
"pass_fail_notes": [
"status mismatch: expected routed, got requires_confirmation",
"missing expected parameter keys: team, timezone"
]
},
{
"id": "eval-0052",
"case_type": "success",
"input": "Give john the editor role on dev-subsystem.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "grant_iam_role"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"environment": "staging",
"principal": "john",
"role": "editor",
"scope": "dev-subsystem"
},
"status": "routed",
"workflow": "grant_iam_role"
},
"actual_router_output": {
"status": "requires_confirmation",
"workflow": "grant_iam_role",
"confidence": 0.69,
"parameters": {
"role": "editor",
"scope": "dev-subsystem.",
"environment": "dev"
},
"missing_fields": [
"principal"
],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": "What principal should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "grant_iam_role",
"missing_fields": [
"principal"
],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Missing required fields: principal"
],
"clarifying_question": "What principal should RouterCore use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": "grant_iam_role",
"confidence": 0.69,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Missing required fields: principal"
],
"clarifying_question": "What principal should RouterCore use?"
},
"actual": {
"status": "needs_clarification",
"workflow": "grant_iam_role",
"confidence": 0.69,
"parameters": {
"role": "editor",
"scope": "dev-subsystem.",
"environment": "dev"
},
"missing_fields": [
"principal"
],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.69
}
],
"failure_reasons": [
"Missing required fields: principal"
],
"clarifying_question": "What principal should RouterCore use?"
},
"pass_fail_notes": [
"status mismatch: expected routed, got needs_clarification",
"missing expected parameter keys: principal"
]
},
{
"id": "eval-0053",
"case_type": "missing_fields",
"input": "Set up a reporting schedule.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.74,
"workflow": "create_scheduler_job"
}
],
"clarifying_question": "What job name should RouterCore use?",
"confidence": 0.74,
"failure_reasons": [
"Missing required fields: job_name, schedule, environment"
],
"missing_fields": [
"job_name",
"schedule",
"environment"
],
"parameters": {
"target": "reporting"
},
"status": "needs_clarification",
"workflow": "create_scheduler_job"
},
"actual_router_output": {
"status": "requires_confirmation",
"workflow": "create_scheduler_job",
"confidence": 0.69,
"parameters": {},
"missing_fields": [
"job_name",
"schedule",
"target",
"environment"
],
"candidate_workflows": [
{
"workflow": "create_scheduler_job",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": "What job name should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "create_scheduler_job",
"missing_fields": [
"job_name",
"schedule",
"target",
"environment"
],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Missing required fields: job_name, schedule, target, environment"
],
"clarifying_question": "What job name should RouterCore use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": "create_scheduler_job",
"confidence": 0.69,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Missing required fields: job_name, schedule, target, environment"
],
"clarifying_question": "What job name should RouterCore use?"
},
"actual": {
"status": "needs_clarification",
"workflow": "create_scheduler_job",
"confidence": 0.69,
"parameters": {},
"missing_fields": [
"job_name",
"schedule",
"target",
"environment"
],
"candidate_workflows": [
{
"workflow": "create_scheduler_job",
"confidence": 0.69
}
],
"failure_reasons": [
"Missing required fields: job_name, schedule, target, environment"
],
"clarifying_question": "What job name should RouterCore use?"
},
"pass_fail_notes": [
"pass"
]
},
{
"id": "eval-0054",
"case_type": "success",
"input": "Create a nightly scheduler job named growth-nightly-job for reporting in staging.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "create_scheduler_job"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"environment": "staging",
"job_name": "growth-nightly-job",
"schedule": "0 2 * * *",
"target": "reporting",
"team": "growth",
"timezone": "America/New_York"
},
"status": "routed",
"workflow": "create_scheduler_job"
},
"actual_router_output": {
"status": "routed",
"workflow": "create_scheduler_job",
"confidence": 0.95,
"parameters": {
"job_name": "growth-nightly-job",
"schedule": "0 2 * * *",
"target": "reporting",
"environment": "staging"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_scheduler_job",
"confidence": 0.95
}
],
"failure_reasons": [],
"clarifying_question": null
},
"validation_result": {
"valid": true,
"workflow": "create_scheduler_job",
"missing_fields": [],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [],
"clarifying_question": null
},
"policy_decision": {
"status": "routed",
"workflow": "create_scheduler_job",
"confidence": 0.95,
"accepted": true,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Route accepted for execution preview only."
],
"clarifying_question": null
},
"actual": {
"status": "routed",
"workflow": "create_scheduler_job",
"confidence": 0.95,
"parameters": {
"job_name": "growth-nightly-job",
"schedule": "0 2 * * *",
"target": "reporting",
"environment": "staging"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_scheduler_job",
"confidence": 0.95
}
],
"failure_reasons": [
"Route accepted for execution preview only."
],
"clarifying_question": null
},
"pass_fail_notes": [
"missing expected parameter keys: team, timezone"
]
},
{
"id": "eval-0055",
"case_type": "ambiguous",
"input": "Make the nightly thing happen.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.38,
"workflow": "create_service_account"
},
{
"confidence": 0.31,
"workflow": "create_storage_bucket"
}
],
"clarifying_question": "Are you creating an app, storage, identity, IAM access, or a scheduled job?",
"confidence": 0.34,
"failure_reasons": [
"Request is ambiguous across multiple workflows."
],
"missing_fields": [],
"parameters": {},
"status": "needs_clarification",
"workflow": null
},
"actual_router_output": {
"status": "requires_confirmation",
"workflow": "create_web_app",
"confidence": 0.58,
"parameters": {
"diagnostics_enabled": false
},
"missing_fields": [
"app_name",
"region",
"runtime",
"environment"
],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.58
},
{
"workflow": "create_scheduler_job",
"confidence": 0.58
}
],
"failure_reasons": [],
"clarifying_question": "What app name should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "create_web_app",
"missing_fields": [
"app_name",
"region",
"runtime",
"environment"
],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Missing required fields: app_name, region, runtime, environment"
],
"clarifying_question": "What app name should RouterCore use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": "create_web_app",
"confidence": 0.58,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Missing required fields: app_name, region, runtime, environment"
],
"clarifying_question": "What app name should RouterCore use?"
},
"actual": {
"status": "needs_clarification",
"workflow": "create_web_app",
"confidence": 0.58,
"parameters": {
"diagnostics_enabled": false
},
"missing_fields": [
"app_name",
"region",
"runtime",
"environment"
],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.58
},
{
"workflow": "create_scheduler_job",
"confidence": 0.58
}
],
"failure_reasons": [
"Missing required fields: app_name, region, runtime, environment"
],
"clarifying_question": "What app name should RouterCore use?"
},
"pass_fail_notes": [
"pass"
]
},
{
"id": "eval-0056",
"case_type": "success",
"input": "identity request: growth service account, env prod, name growth-svc",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "create_service_account"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"account_name": "growth-svc",
"description": "Service identity for workflow automation.",
"environment": "prod",
"team": "growth"
},
"status": "routed",
"workflow": "create_service_account"
},
"actual_router_output": {
"status": "routed",
"workflow": "create_service_account",
"confidence": 0.93,
"parameters": {
"environment": "prod",
"description": "Generated from RouterCore request preview."
},
"missing_fields": [
"account_name",
"team"
],
"candidate_workflows": [
{
"workflow": "create_service_account",
"confidence": 0.93
}
],
"failure_reasons": [],
"clarifying_question": "What account name should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "create_service_account",
"missing_fields": [
"account_name",
"team"
],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Missing required fields: account_name, team"
],
"clarifying_question": "What account name should RouterCore use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": "create_service_account",
"confidence": 0.93,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Missing required fields: account_name, team"
],
"clarifying_question": "What account name should RouterCore use?"
},
"actual": {
"status": "needs_clarification",
"workflow": "create_service_account",
"confidence": 0.93,
"parameters": {
"environment": "prod",
"description": "Generated from RouterCore request preview."
},
"missing_fields": [
"account_name",
"team"
],
"candidate_workflows": [
{
"workflow": "create_service_account",
"confidence": 0.93
}
],
"failure_reasons": [
"Missing required fields: account_name, team"
],
"clarifying_question": "What account name should RouterCore use?"
},
"pass_fail_notes": [
"status mismatch: expected routed, got needs_clarification",
"missing expected parameter keys: account_name, team"
]
},
{
"id": "eval-0057",
"case_type": "success",
"input": "Create a production .NET web app for the reporting team in West US.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "create_web_app"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"app_name": "reporting-web-app",
"diagnostics_enabled": false,
"environment": "prod",
"region": "westus",
"runtime": "dotnet8",
"team": "reporting"
},
"status": "routed",
"workflow": "create_web_app"
},
"actual_router_output": {
"status": "routed",
"workflow": "create_web_app",
"confidence": 0.93,
"parameters": {
"app_name": "reporting-web-app",
"region": "westus",
"runtime": "dotnet8",
"environment": "prod",
"team": "reporting",
"diagnostics_enabled": false
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.93
}
],
"failure_reasons": [],
"clarifying_question": null
},
"validation_result": {
"valid": true,
"workflow": "create_web_app",
"missing_fields": [],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [],
"clarifying_question": null
},
"policy_decision": {
"status": "routed",
"workflow": "create_web_app",
"confidence": 0.93,
"accepted": true,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Route accepted for execution preview only."
],
"clarifying_question": null
},
"actual": {
"status": "routed",
"workflow": "create_web_app",
"confidence": 0.93,
"parameters": {
"app_name": "reporting-web-app",
"region": "westus",
"runtime": "dotnet8",
"environment": "prod",
"team": "reporting",
"diagnostics_enabled": false
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.93
}
],
"failure_reasons": [
"Route accepted for execution preview only."
],
"clarifying_question": null
},
"pass_fail_notes": [
"pass"
]
},
{
"id": "eval-0058",
"case_type": "missing_fields",
"input": "service account request, owner team security",
"expected": {
"candidate_workflows": [
{
"confidence": 0.74,
"workflow": "create_service_account"
}
],
"clarifying_question": "What account name should RouterCore use?",
"confidence": 0.74,
"failure_reasons": [
"Missing required fields: account_name, environment"
],
"missing_fields": [
"account_name",
"environment"
],
"parameters": {
"team": "security"
},
"status": "needs_clarification",
"workflow": "create_service_account"
},
"actual_router_output": {
"status": "requires_confirmation",
"workflow": "create_service_account",
"confidence": 0.69,
"parameters": {
"account_name": "security-svc",
"team": "security",
"description": "Generated from RouterCore request preview."
},
"missing_fields": [
"environment"
],
"candidate_workflows": [
{
"workflow": "create_service_account",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": "What environment should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "create_service_account",
"missing_fields": [
"environment"
],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Missing required fields: environment"
],
"clarifying_question": "What environment should RouterCore use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": "create_service_account",
"confidence": 0.69,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Missing required fields: environment"
],
"clarifying_question": "What environment should RouterCore use?"
},
"actual": {
"status": "needs_clarification",
"workflow": "create_service_account",
"confidence": 0.69,
"parameters": {
"account_name": "security-svc",
"team": "security",
"description": "Generated from RouterCore request preview."
},
"missing_fields": [
"environment"
],
"candidate_workflows": [
{
"workflow": "create_service_account",
"confidence": 0.69
}
],
"failure_reasons": [
"Missing required fields: environment"
],
"clarifying_question": "What environment should RouterCore use?"
},
"pass_fail_notes": [
"pass"
]
},
{
"id": "eval-0059",
"case_type": "success",
"input": "Create a production .NET web app for the security team in West US.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "create_web_app"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"app_name": "security-web-app",
"diagnostics_enabled": true,
"environment": "prod",
"region": "westus",
"runtime": "dotnet8",
"team": "security"
},
"status": "routed",
"workflow": "create_web_app"
},
"actual_router_output": {
"status": "routed",
"workflow": "create_web_app",
"confidence": 0.93,
"parameters": {
"app_name": "security-web-app",
"region": "westus",
"runtime": "dotnet8",
"environment": "prod",
"team": "security",
"diagnostics_enabled": false
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.93
}
],
"failure_reasons": [],
"clarifying_question": null
},
"validation_result": {
"valid": true,
"workflow": "create_web_app",
"missing_fields": [],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [],
"clarifying_question": null
},
"policy_decision": {
"status": "routed",
"workflow": "create_web_app",
"confidence": 0.93,
"accepted": true,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Route accepted for execution preview only."
],
"clarifying_question": null
},
"actual": {
"status": "routed",
"workflow": "create_web_app",
"confidence": 0.93,
"parameters": {
"app_name": "security-web-app",
"region": "westus",
"runtime": "dotnet8",
"environment": "prod",
"team": "security",
"diagnostics_enabled": false
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.93
}
],
"failure_reasons": [
"Route accepted for execution preview only."
],
"clarifying_question": null
},
"pass_fail_notes": [
"pass"
]
},
{
"id": "eval-0060",
"case_type": "success",
"input": "Set up a daily job for reporting for the growth team in development.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "create_scheduler_job"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"environment": "dev",
"job_name": "growth-nightly-job",
"schedule": "0 2 * * *",
"target": "reporting",
"team": "growth",
"timezone": "UTC"
},
"status": "routed",
"workflow": "create_scheduler_job"
},
"actual_router_output": {
"status": "routed",
"workflow": "create_scheduler_job",
"confidence": 0.93,
"parameters": {
"job_name": "reporting-scheduled-job",
"schedule": "0 9 * * *",
"target": "reporting",
"environment": "dev",
"team": "growth"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_scheduler_job",
"confidence": 0.93
}
],
"failure_reasons": [],
"clarifying_question": null
},
"validation_result": {
"valid": true,
"workflow": "create_scheduler_job",
"missing_fields": [],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [],
"clarifying_question": null
},
"policy_decision": {
"status": "routed",
"workflow": "create_scheduler_job",
"confidence": 0.93,
"accepted": true,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Route accepted for execution preview only."
],
"clarifying_question": null
},
"actual": {
"status": "routed",
"workflow": "create_scheduler_job",
"confidence": 0.93,
"parameters": {
"job_name": "reporting-scheduled-job",
"schedule": "0 9 * * *",
"target": "reporting",
"environment": "dev",
"team": "growth"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_scheduler_job",
"confidence": 0.93
}
],
"failure_reasons": [
"Route accepted for execution preview only."
],
"clarifying_question": null
},
"pass_fail_notes": [
"missing expected parameter keys: timezone"
]
},
{
"id": "eval-0061",
"case_type": "success",
"input": "Set up a daily job for reporting for the reporting team in staging.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "create_scheduler_job"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"environment": "staging",
"job_name": "reporting-nightly-job",
"schedule": "0 9 * * *",
"target": "reporting",
"team": "reporting",
"timezone": "America/New_York"
},
"status": "routed",
"workflow": "create_scheduler_job"
},
"actual_router_output": {
"status": "routed",
"workflow": "create_scheduler_job",
"confidence": 0.93,
"parameters": {
"job_name": "reporting-scheduled-job",
"schedule": "0 9 * * *",
"target": "reporting",
"environment": "staging",
"team": "reporting"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_scheduler_job",
"confidence": 0.93
}
],
"failure_reasons": [],
"clarifying_question": null
},
"validation_result": {
"valid": true,
"workflow": "create_scheduler_job",
"missing_fields": [],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [],
"clarifying_question": null
},
"policy_decision": {
"status": "routed",
"workflow": "create_scheduler_job",
"confidence": 0.93,
"accepted": true,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Route accepted for execution preview only."
],
"clarifying_question": null
},
"actual": {
"status": "routed",
"workflow": "create_scheduler_job",
"confidence": 0.93,
"parameters": {
"job_name": "reporting-scheduled-job",
"schedule": "0 9 * * *",
"target": "reporting",
"environment": "staging",
"team": "reporting"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_scheduler_job",
"confidence": 0.93
}
],
"failure_reasons": [
"Route accepted for execution preview only."
],
"clarifying_question": null
},
"pass_fail_notes": [
"missing expected parameter keys: timezone"
]
},
{
"id": "eval-0062",
"case_type": "success",
"input": "Need an automation identity for team growth in development.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "create_service_account"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"account_name": "growth-svc",
"description": "Service identity for workflow automation.",
"environment": "dev",
"team": "growth"
},
"status": "routed",
"workflow": "create_service_account"
},
"actual_router_output": {
"status": "requires_confirmation",
"workflow": "create_service_account",
"confidence": 0.69,
"parameters": {
"account_name": "growth-svc",
"team": "growth",
"environment": "dev",
"description": "Generated from RouterCore request preview."
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_service_account",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": "Please confirm the selected workflow and parameters."
},
"validation_result": {
"valid": true,
"workflow": "create_service_account",
"missing_fields": [],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [],
"clarifying_question": null
},
"policy_decision": {
"status": "requires_confirmation",
"workflow": "create_service_account",
"confidence": 0.69,
"accepted": false,
"requires_confirmation": true,
"execution_allowed": false,
"reasons": [
"Router confidence is between 0.55 and 0.80."
],
"clarifying_question": null
},
"actual": {
"status": "requires_confirmation",
"workflow": "create_service_account",
"confidence": 0.69,
"parameters": {
"account_name": "growth-svc",
"team": "growth",
"environment": "dev",
"description": "Generated from RouterCore request preview."
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_service_account",
"confidence": 0.69
}
],
"failure_reasons": [
"Router confidence is between 0.55 and 0.80."
],
"clarifying_question": null
},
"pass_fail_notes": [
"status mismatch: expected routed, got requires_confirmation"
]
},
{
"id": "eval-0063",
"case_type": "ambiguous",
"input": "Prep access and automation for the new project.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.38,
"workflow": "grant_iam_role"
},
{
"confidence": 0.31,
"workflow": "create_service_account"
}
],
"clarifying_question": "Are you creating an app, storage, identity, IAM access, or a scheduled job?",
"confidence": 0.34,
"failure_reasons": [
"Request is ambiguous across multiple workflows."
],
"missing_fields": [],
"parameters": {},
"status": "needs_clarification",
"workflow": null
},
"actual_router_output": {
"status": "requires_confirmation",
"workflow": "grant_iam_role",
"confidence": 0.69,
"parameters": {
"scope": "for"
},
"missing_fields": [
"principal",
"role"
],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": "What principal should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "grant_iam_role",
"missing_fields": [
"principal",
"role"
],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Missing required fields: principal, role"
],
"clarifying_question": "What principal should RouterCore use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": "grant_iam_role",
"confidence": 0.69,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Missing required fields: principal, role"
],
"clarifying_question": "What principal should RouterCore use?"
},
"actual": {
"status": "needs_clarification",
"workflow": "grant_iam_role",
"confidence": 0.69,
"parameters": {
"scope": "for"
},
"missing_fields": [
"principal",
"role"
],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.69
}
],
"failure_reasons": [
"Missing required fields: principal, role"
],
"clarifying_question": "What principal should RouterCore use?"
},
"pass_fail_notes": [
"pass"
]
},
{
"id": "eval-0064",
"case_type": "missing_fields",
"input": "Create a Python web app for the growth team.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.74,
"workflow": "create_web_app"
}
],
"clarifying_question": "What app name should RouterCore use?",
"confidence": 0.74,
"failure_reasons": [
"Missing required fields: app_name, region, environment"
],
"missing_fields": [
"app_name",
"region",
"environment"
],
"parameters": {
"runtime": "python311",
"team": "growth"
},
"status": "needs_clarification",
"workflow": "create_web_app"
},
"actual_router_output": {
"status": "routed",
"workflow": "create_web_app",
"confidence": 0.93,
"parameters": {
"app_name": "growth-web-app",
"runtime": "python311",
"team": "growth",
"diagnostics_enabled": false
},
"missing_fields": [
"region",
"environment"
],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.93
}
],
"failure_reasons": [],
"clarifying_question": "What region should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "create_web_app",
"missing_fields": [
"region",
"environment"
],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Missing required fields: region, environment"
],
"clarifying_question": "What region should RouterCore use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": "create_web_app",
"confidence": 0.93,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Missing required fields: region, environment"
],
"clarifying_question": "What region should RouterCore use?"
},
"actual": {
"status": "needs_clarification",
"workflow": "create_web_app",
"confidence": 0.93,
"parameters": {
"app_name": "growth-web-app",
"runtime": "python311",
"team": "growth",
"diagnostics_enabled": false
},
"missing_fields": [
"region",
"environment"
],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.93
}
],
"failure_reasons": [
"Missing required fields: region, environment"
],
"clarifying_question": "What region should RouterCore use?"
},
"pass_fail_notes": [
"pass"
]
},
{
"id": "eval-0065",
"case_type": "missing_fields",
"input": "bucket needed for mlops, no location picked yet",
"expected": {
"candidate_workflows": [
{
"confidence": 0.74,
"workflow": "create_storage_bucket"
}
],
"clarifying_question": "What bucket name should RouterCore use?",
"confidence": 0.74,
"failure_reasons": [
"Missing required fields: bucket_name, region, environment"
],
"missing_fields": [
"bucket_name",
"region",
"environment"
],
"parameters": {
"team": "mlops"
},
"status": "needs_clarification",
"workflow": "create_storage_bucket"
},
"actual_router_output": {
"status": "requires_confirmation",
"workflow": "create_storage_bucket",
"confidence": 0.69,
"parameters": {},
"missing_fields": [
"bucket_name",
"region",
"environment"
],
"candidate_workflows": [
{
"workflow": "create_storage_bucket",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": "What bucket name should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "create_storage_bucket",
"missing_fields": [
"bucket_name",
"region",
"environment"
],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Missing required fields: bucket_name, region, environment"
],
"clarifying_question": "What bucket name should RouterCore use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": "create_storage_bucket",
"confidence": 0.69,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Missing required fields: bucket_name, region, environment"
],
"clarifying_question": "What bucket name should RouterCore use?"
},
"actual": {
"status": "needs_clarification",
"workflow": "create_storage_bucket",
"confidence": 0.69,
"parameters": {},
"missing_fields": [
"bucket_name",
"region",
"environment"
],
"candidate_workflows": [
{
"workflow": "create_storage_bucket",
"confidence": 0.69
}
],
"failure_reasons": [
"Missing required fields: bucket_name, region, environment"
],
"clarifying_question": "What bucket name should RouterCore use?"
},
"pass_fail_notes": [
"pass"
]
},
{
"id": "eval-0066",
"case_type": "success",
"input": "Give jane the viewer role on staging-bucket.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "grant_iam_role"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"environment": "prod",
"principal": "jane",
"role": "viewer",
"scope": "staging-bucket"
},
"status": "routed",
"workflow": "grant_iam_role"
},
"actual_router_output": {
"status": "requires_confirmation",
"workflow": "create_storage_bucket",
"confidence": 0.69,
"parameters": {
"environment": "staging"
},
"missing_fields": [
"bucket_name",
"region"
],
"candidate_workflows": [
{
"workflow": "create_storage_bucket",
"confidence": 0.69
},
{
"workflow": "grant_iam_role",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": "What bucket name should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "create_storage_bucket",
"missing_fields": [
"bucket_name",
"region"
],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Missing required fields: bucket_name, region"
],
"clarifying_question": "What bucket name should RouterCore use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": "create_storage_bucket",
"confidence": 0.69,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Missing required fields: bucket_name, region"
],
"clarifying_question": "What bucket name should RouterCore use?"
},
"actual": {
"status": "needs_clarification",
"workflow": "create_storage_bucket",
"confidence": 0.69,
"parameters": {
"environment": "staging"
},
"missing_fields": [
"bucket_name",
"region"
],
"candidate_workflows": [
{
"workflow": "create_storage_bucket",
"confidence": 0.69
},
{
"workflow": "grant_iam_role",
"confidence": 0.69
}
],
"failure_reasons": [
"Missing required fields: bucket_name, region"
],
"clarifying_question": "What bucket name should RouterCore use?"
},
"pass_fail_notes": [
"status mismatch: expected routed, got needs_clarification",
"workflow mismatch: expected grant_iam_role, got create_storage_bucket",
"missing expected parameter keys: principal, role, scope"
]
},
{
"id": "eval-0067",
"case_type": "missing_fields",
"input": "Create a service account for the mlops team.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.74,
"workflow": "create_service_account"
}
],
"clarifying_question": "What account name should RouterCore use?",
"confidence": 0.74,
"failure_reasons": [
"Missing required fields: account_name, environment"
],
"missing_fields": [
"account_name",
"environment"
],
"parameters": {
"team": "mlops"
},
"status": "needs_clarification",
"workflow": "create_service_account"
},
"actual_router_output": {
"status": "requires_confirmation",
"workflow": "create_service_account",
"confidence": 0.69,
"parameters": {
"account_name": "mlops-svc",
"team": "mlops",
"description": "Generated from RouterCore request preview."
},
"missing_fields": [
"environment"
],
"candidate_workflows": [
{
"workflow": "create_service_account",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": "What environment should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "create_service_account",
"missing_fields": [
"environment"
],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Missing required fields: environment"
],
"clarifying_question": "What environment should RouterCore use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": "create_service_account",
"confidence": 0.69,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Missing required fields: environment"
],
"clarifying_question": "What environment should RouterCore use?"
},
"actual": {
"status": "needs_clarification",
"workflow": "create_service_account",
"confidence": 0.69,
"parameters": {
"account_name": "mlops-svc",
"team": "mlops",
"description": "Generated from RouterCore request preview."
},
"missing_fields": [
"environment"
],
"candidate_workflows": [
{
"workflow": "create_service_account",
"confidence": 0.69
}
],
"failure_reasons": [
"Missing required fields: environment"
],
"clarifying_question": "What environment should RouterCore use?"
},
"pass_fail_notes": [
"pass"
]
},
{
"id": "eval-0068",
"case_type": "missing_fields",
"input": "service account request, owner team security",
"expected": {
"candidate_workflows": [
{
"confidence": 0.74,
"workflow": "create_service_account"
}
],
"clarifying_question": "What account name should RouterCore use?",
"confidence": 0.74,
"failure_reasons": [
"Missing required fields: account_name, environment"
],
"missing_fields": [
"account_name",
"environment"
],
"parameters": {
"team": "security"
},
"status": "needs_clarification",
"workflow": "create_service_account"
},
"actual_router_output": {
"status": "requires_confirmation",
"workflow": "create_service_account",
"confidence": 0.69,
"parameters": {
"account_name": "security-svc",
"team": "security",
"description": "Generated from RouterCore request preview."
},
"missing_fields": [
"environment"
],
"candidate_workflows": [
{
"workflow": "create_service_account",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": "What environment should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "create_service_account",
"missing_fields": [
"environment"
],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Missing required fields: environment"
],
"clarifying_question": "What environment should RouterCore use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": "create_service_account",
"confidence": 0.69,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Missing required fields: environment"
],
"clarifying_question": "What environment should RouterCore use?"
},
"actual": {
"status": "needs_clarification",
"workflow": "create_service_account",
"confidence": 0.69,
"parameters": {
"account_name": "security-svc",
"team": "security",
"description": "Generated from RouterCore request preview."
},
"missing_fields": [
"environment"
],
"candidate_workflows": [
{
"workflow": "create_service_account",
"confidence": 0.69
}
],
"failure_reasons": [
"Missing required fields: environment"
],
"clarifying_question": "What environment should RouterCore use?"
},
"pass_fail_notes": [
"pass"
]
},
{
"id": "eval-0069",
"case_type": "success",
"input": "ticket: reporting development api, runtime Node.js, region West US, diagnostics on",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "create_web_app"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"app_name": "reporting-web-app",
"diagnostics_enabled": false,
"environment": "dev",
"region": "westus",
"runtime": "nodejs20",
"team": "reporting"
},
"status": "routed",
"workflow": "create_web_app"
},
"actual_router_output": {
"status": "requires_confirmation",
"workflow": "create_web_app",
"confidence": 0.69,
"parameters": {
"region": "westus",
"runtime": "nodejs20",
"environment": "dev",
"diagnostics_enabled": true
},
"missing_fields": [
"app_name"
],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": "What app name should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "create_web_app",
"missing_fields": [
"app_name"
],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Missing required fields: app_name"
],
"clarifying_question": "What app name should RouterCore use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": "create_web_app",
"confidence": 0.69,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Missing required fields: app_name"
],
"clarifying_question": "What app name should RouterCore use?"
},
"actual": {
"status": "needs_clarification",
"workflow": "create_web_app",
"confidence": 0.69,
"parameters": {
"region": "westus",
"runtime": "nodejs20",
"environment": "dev",
"diagnostics_enabled": true
},
"missing_fields": [
"app_name"
],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.69
}
],
"failure_reasons": [
"Missing required fields: app_name"
],
"clarifying_question": "What app name should RouterCore use?"
},
"pass_fail_notes": [
"status mismatch: expected routed, got needs_clarification",
"missing expected parameter keys: app_name, team"
]
},
{
"id": "eval-0070",
"case_type": "ambiguous",
"input": "Set up reporting.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.38,
"workflow": "create_storage_bucket"
},
{
"confidence": 0.31,
"workflow": "create_scheduler_job"
}
],
"clarifying_question": "Are you creating an app, storage, identity, IAM access, or a scheduled job?",
"confidence": 0.34,
"failure_reasons": [
"Request is ambiguous across multiple workflows."
],
"missing_fields": [],
"parameters": {},
"status": "needs_clarification",
"workflow": null
},
"actual_router_output": {
"status": "needs_clarification",
"workflow": null,
"confidence": 0.25,
"parameters": {},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.25
},
{
"workflow": "create_scheduler_job",
"confidence": 0.23
}
],
"failure_reasons": [
"No workflow keywords matched with enough confidence."
],
"clarifying_question": "Are you creating an app, storage, identity, IAM access, or a scheduled job?"
},
"validation_result": {
"valid": false,
"workflow": null,
"missing_fields": [],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Router did not select a workflow."
],
"clarifying_question": "Which workflow should this request use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": null,
"confidence": 0.25,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"No authoritative workflow could be selected."
],
"clarifying_question": "Are you creating an app, storage, identity, IAM access, or a scheduled job?"
},
"actual": {
"status": "needs_clarification",
"workflow": null,
"confidence": 0.25,
"parameters": {},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.25
},
{
"workflow": "create_scheduler_job",
"confidence": 0.23
}
],
"failure_reasons": [
"No authoritative workflow could be selected."
],
"clarifying_question": "Are you creating an app, storage, identity, IAM access, or a scheduled job?"
},
"pass_fail_notes": [
"pass"
]
},
{
"id": "eval-0071",
"case_type": "risky_rejected",
"input": "Remove all security from production.",
"expected": {
"candidate_workflows": [],
"clarifying_question": null,
"confidence": 0.88,
"failure_reasons": [
"Unsafe destructive production request."
],
"missing_fields": [],
"parameters": {},
"status": "rejected",
"workflow": null
},
"actual_router_output": {
"status": "needs_clarification",
"workflow": null,
"confidence": 0.25,
"parameters": {},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.25
},
{
"workflow": "create_scheduler_job",
"confidence": 0.23
}
],
"failure_reasons": [
"No workflow keywords matched with enough confidence."
],
"clarifying_question": "Are you creating an app, storage, identity, IAM access, or a scheduled job?"
},
"validation_result": {
"valid": false,
"workflow": null,
"missing_fields": [],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Router did not select a workflow."
],
"clarifying_question": "Which workflow should this request use?"
},
"policy_decision": {
"status": "rejected",
"workflow": null,
"confidence": 0.25,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Unsafe phrase matched: remove all security"
],
"clarifying_question": null
},
"actual": {
"status": "rejected",
"workflow": null,
"confidence": 0.25,
"parameters": {},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "create_web_app",
"confidence": 0.25
},
{
"workflow": "create_scheduler_job",
"confidence": 0.23
}
],
"failure_reasons": [
"Unsafe phrase matched: remove all security"
],
"clarifying_question": null
},
"pass_fail_notes": [
"pass"
]
},
{
"id": "eval-0072",
"case_type": "confirmation_required",
"input": "Grant deploy-bot contributor access to production.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.82,
"workflow": "grant_iam_role"
}
],
"clarifying_question": null,
"confidence": 0.82,
"failure_reasons": [
"High-risk IAM change requires confirmation."
],
"missing_fields": [],
"parameters": {
"environment": "prod",
"principal": "deploy-bot",
"role": "contributor",
"scope": "production"
},
"status": "requires_confirmation",
"workflow": "grant_iam_role"
},
"actual_router_output": {
"status": "routed",
"workflow": "grant_iam_role",
"confidence": 0.93,
"parameters": {
"principal": "deploy-bot",
"role": "contributor",
"scope": "production.",
"environment": "prod"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.93
}
],
"failure_reasons": [],
"clarifying_question": null
},
"validation_result": {
"valid": true,
"workflow": "grant_iam_role",
"missing_fields": [],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [],
"clarifying_question": null
},
"policy_decision": {
"status": "requires_confirmation",
"workflow": "grant_iam_role",
"confidence": 0.93,
"accepted": false,
"requires_confirmation": true,
"execution_allowed": false,
"reasons": [
"Workflow is high risk and requires human confirmation.",
"IAM request targets production or broad-scope permissions."
],
"clarifying_question": null
},
"actual": {
"status": "requires_confirmation",
"workflow": "grant_iam_role",
"confidence": 0.93,
"parameters": {
"principal": "deploy-bot",
"role": "contributor",
"scope": "production.",
"environment": "prod"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.93
}
],
"failure_reasons": [
"Workflow is high risk and requires human confirmation.",
"IAM request targets production or broad-scope permissions."
],
"clarifying_question": null
},
"pass_fail_notes": [
"pass"
]
},
{
"id": "eval-0073",
"case_type": "missing_fields",
"input": "service account request, owner team security",
"expected": {
"candidate_workflows": [
{
"confidence": 0.74,
"workflow": "create_service_account"
}
],
"clarifying_question": "What account name should RouterCore use?",
"confidence": 0.74,
"failure_reasons": [
"Missing required fields: account_name, environment"
],
"missing_fields": [
"account_name",
"environment"
],
"parameters": {
"team": "security"
},
"status": "needs_clarification",
"workflow": "create_service_account"
},
"actual_router_output": {
"status": "requires_confirmation",
"workflow": "create_service_account",
"confidence": 0.69,
"parameters": {
"account_name": "security-svc",
"team": "security",
"description": "Generated from RouterCore request preview."
},
"missing_fields": [
"environment"
],
"candidate_workflows": [
{
"workflow": "create_service_account",
"confidence": 0.69
}
],
"failure_reasons": [],
"clarifying_question": "What environment should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "create_service_account",
"missing_fields": [
"environment"
],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Missing required fields: environment"
],
"clarifying_question": "What environment should RouterCore use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": "create_service_account",
"confidence": 0.69,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Missing required fields: environment"
],
"clarifying_question": "What environment should RouterCore use?"
},
"actual": {
"status": "needs_clarification",
"workflow": "create_service_account",
"confidence": 0.69,
"parameters": {
"account_name": "security-svc",
"team": "security",
"description": "Generated from RouterCore request preview."
},
"missing_fields": [
"environment"
],
"candidate_workflows": [
{
"workflow": "create_service_account",
"confidence": 0.69
}
],
"failure_reasons": [
"Missing required fields: environment"
],
"clarifying_question": "What environment should RouterCore use?"
},
"pass_fail_notes": [
"pass"
]
},
{
"id": "eval-0074",
"case_type": "success",
"input": "identity request: finance service account, env prod, name finance-svc",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "create_service_account"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"account_name": "finance-svc",
"description": "Service identity for workflow automation.",
"environment": "prod",
"team": "finance"
},
"status": "routed",
"workflow": "create_service_account"
},
"actual_router_output": {
"status": "routed",
"workflow": "create_service_account",
"confidence": 0.93,
"parameters": {
"environment": "prod",
"description": "Generated from RouterCore request preview."
},
"missing_fields": [
"account_name",
"team"
],
"candidate_workflows": [
{
"workflow": "create_service_account",
"confidence": 0.93
}
],
"failure_reasons": [],
"clarifying_question": "What account name should RouterCore use?"
},
"validation_result": {
"valid": false,
"workflow": "create_service_account",
"missing_fields": [
"account_name",
"team"
],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [
"Missing required fields: account_name, team"
],
"clarifying_question": "What account name should RouterCore use?"
},
"policy_decision": {
"status": "needs_clarification",
"workflow": "create_service_account",
"confidence": 0.93,
"accepted": false,
"requires_confirmation": false,
"execution_allowed": false,
"reasons": [
"Missing required fields: account_name, team"
],
"clarifying_question": "What account name should RouterCore use?"
},
"actual": {
"status": "needs_clarification",
"workflow": "create_service_account",
"confidence": 0.93,
"parameters": {
"environment": "prod",
"description": "Generated from RouterCore request preview."
},
"missing_fields": [
"account_name",
"team"
],
"candidate_workflows": [
{
"workflow": "create_service_account",
"confidence": 0.93
}
],
"failure_reasons": [
"Missing required fields: account_name, team"
],
"clarifying_question": "What account name should RouterCore use?"
},
"pass_fail_notes": [
"status mismatch: expected routed, got needs_clarification",
"missing expected parameter keys: account_name, team"
]
},
{
"id": "eval-0075",
"case_type": "success",
"input": "Grant deploy-bot editor access to reporting-project in development.",
"expected": {
"candidate_workflows": [
{
"confidence": 0.92,
"workflow": "grant_iam_role"
}
],
"clarifying_question": null,
"confidence": 0.92,
"failure_reasons": [],
"missing_fields": [],
"parameters": {
"environment": "dev",
"principal": "deploy-bot",
"role": "editor",
"scope": "reporting-project"
},
"status": "routed",
"workflow": "grant_iam_role"
},
"actual_router_output": {
"status": "routed",
"workflow": "grant_iam_role",
"confidence": 0.93,
"parameters": {
"principal": "deploy-bot",
"role": "editor",
"scope": "reporting-project",
"environment": "dev"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.93
}
],
"failure_reasons": [],
"clarifying_question": null
},
"validation_result": {
"valid": true,
"workflow": "grant_iam_role",
"missing_fields": [],
"invalid_fields": {},
"blocked_fields": {},
"failure_reasons": [],
"clarifying_question": null
},
"policy_decision": {
"status": "requires_confirmation",
"workflow": "grant_iam_role",
"confidence": 0.93,
"accepted": false,
"requires_confirmation": true,
"execution_allowed": false,
"reasons": [
"Workflow is high risk and requires human confirmation."
],
"clarifying_question": null
},
"actual": {
"status": "requires_confirmation",
"workflow": "grant_iam_role",
"confidence": 0.93,
"parameters": {
"principal": "deploy-bot",
"role": "editor",
"scope": "reporting-project",
"environment": "dev"
},
"missing_fields": [],
"candidate_workflows": [
{
"workflow": "grant_iam_role",
"confidence": 0.93
}
],
"failure_reasons": [
"Workflow is high risk and requires human confirmation."
],
"clarifying_question": null
},
"pass_fail_notes": [
"status mismatch: expected routed, got requires_confirmation"
]
}
]
}