File size: 2,521 Bytes
8cd3fa7
511f04a
8cd3fa7
 
 
 
 
 
 
 
 
 
a541c0b
8cd3fa7
511f04a
 
 
 
 
 
 
 
 
 
8cd3fa7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28e7c64
8cd3fa7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8520614
8cd3fa7
 
 
 
 
 
 
 
 
 
 
 
 
 
b978fbd
8cd3fa7
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
name: "PolicyEvolverEnv"
description: "Policy Design and Evolution Sandbox — agents refine their strategy to evolve real-world governance frameworks through meta-reasoning"
version: "1.0.0"
author: "PolicyEvolution Team"
tags:
  - "policy"
  - "governance"
  - "meta-reasoning"
  - "content-moderation"
  - "AI-safety"

environment:
  module: "server.environment"
  class: "PolicyEvolverEnvironment"
  variables:
    HF_TOKEN:
      description: "API key for LLM inference provider (Groq recommended)"
      required: true
    API_BASE_URL:
      description: "OpenAI-compatible endpoint. Default: Groq"
      default: "https://api.groq.com/openai/v1"
    MODEL_NAME:
      description: "Model identifier for the inference provider"
      default: "llama-3.1-8b-instant"

  observation_schema:
    type: "object"
    description: "Policy context, data corpus, and system state"

  action_schema:
    type: "object"
    description: "Discriminated union on action_type field"
    discriminator: "action_type"
    variants:
      - action_type: "propose_clarification"
        schema: "ProposeClarificationAction"
      - action_type: "propose_new_rule"
        schema: "ProposeNewRuleAction"
      - action_type: "evolve_policy"
        schema: "EvolveProcessAction"
        description: "Hard task metric keys: fraud_rate (aliases: fraud_detection, fraud), revenue_velocity (aliases: queue_overload, revenue), seller_trust (aliases: seller_confidence, trust)."

  reward_range: [0.0, 1.0]

runtime:
  max_steps: 5
  timeout_seconds: 1200
  vcpu: 2
  memory_gb: 8

tasks:
  - id: "task_easy"
    difficulty: "easy"
    description: "Identify and clarify ambiguous policy terms in a social media community guidelines"
    expected_min_score: 0.70

  - id: "task_medium"
    difficulty: "medium"
    description: "Detect policy gaps in corporate HR policies and propose new rules for emerging scenarios"
    expected_min_score: 0.55

  - id: "task_hard"
    difficulty: "hard"
    description: "Holistically evolve an e-commerce Trust & Safety framework with trade-off reasoning"
    expected_min_score: 0.40

grading:
  module: "server.grader"
  function: "grade"
  return_range: [0.0, 1.0]

endpoints:
  required:
    - path: "/reset"
      method: "POST"
    - path: "/step"
      method: "POST"
    - path: "/state"
      method: "GET"
    - path: "/tasks"
      method: "GET"
    - path: "/grader"
      method: "POST"
    - path: "/baseline"
      method: "GET"
  optional:
    - path: "/health"
      method: "GET"