name: "PolicyEvolverEnv" description: "Policy Design and Evolution Sandbox — agents refine their strategy to evolve real-world governance frameworks through meta-reasoning" version: "1.0.0" author: "PolicyEvolution Team" tags: - "policy" - "governance" - "meta-reasoning" - "content-moderation" - "AI-safety" environment: module: "server.environment" class: "PolicyEvolverEnvironment" variables: HF_TOKEN: description: "API key for LLM inference provider (Groq recommended)" required: true API_BASE_URL: description: "OpenAI-compatible endpoint. Default: Groq" default: "https://api.groq.com/openai/v1" MODEL_NAME: description: "Model identifier for the inference provider" default: "llama-3.1-8b-instant" observation_schema: type: "object" description: "Policy context, data corpus, and system state" action_schema: type: "object" description: "Discriminated union on action_type field" discriminator: "action_type" variants: - action_type: "propose_clarification" schema: "ProposeClarificationAction" - action_type: "propose_new_rule" schema: "ProposeNewRuleAction" - action_type: "evolve_policy" schema: "EvolveProcessAction" description: "Hard task metric keys: fraud_rate (aliases: fraud_detection, fraud), revenue_velocity (aliases: queue_overload, revenue), seller_trust (aliases: seller_confidence, trust)." reward_range: [0.0, 1.0] runtime: max_steps: 5 timeout_seconds: 1200 vcpu: 2 memory_gb: 8 tasks: - id: "task_easy" difficulty: "easy" description: "Identify and clarify ambiguous policy terms in a social media community guidelines" expected_min_score: 0.70 - id: "task_medium" difficulty: "medium" description: "Detect policy gaps in corporate HR policies and propose new rules for emerging scenarios" expected_min_score: 0.55 - id: "task_hard" difficulty: "hard" description: "Holistically evolve an e-commerce Trust & Safety framework with trade-off reasoning" expected_min_score: 0.40 grading: module: "server.grader" function: "grade" return_range: [0.0, 1.0] endpoints: required: - path: "/reset" method: "POST" - path: "/step" method: "POST" - path: "/state" method: "GET" - path: "/tasks" method: "GET" - path: "/grader" method: "POST" - path: "/baseline" method: "GET" optional: - path: "/health" method: "GET"