File size: 4,910 Bytes
08c19c7
7203787
08c19c7
 
 
7203787
 
671787b
08c19c7
 
 
 
 
 
 
 
9c888b7
 
dccaaac
 
08c19c7
dccaaac
08c19c7
 
 
 
9c888b7
08c19c7
 
9c888b7
dccaaac
 
9c888b7
08c19c7
 
 
 
 
7203787
08c19c7
7203787
 
dccaaac
 
9c888b7
08c19c7
 
 
 
 
7203787
 
 
 
9c888b7
 
 
 
 
7203787
08c19c7
 
dccaaac
 
 
 
 
 
9c888b7
dccaaac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9c888b7
dccaaac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7203787
dccaaac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
08c19c7
 
 
7203787
 
9c888b7
08c19c7
7203787
08c19c7
7203787
9c888b7
dccaaac
9c888b7
dccaaac
7203787
dccaaac
08c19c7
 
9c888b7
08c19c7
 
 
9c888b7
7203787
08c19c7
 
 
7203787
 
 
 
 
 
 
08c19c7
7203787
 
671787b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
name: smart-contract-audit-env
version: "1.2.0"
description: >
  Reinforcement learning environment for smart contract security analysis.
  Agents interact with real-world Solidity contract data from Certora-audited
  projects, practising three real audit tasks: vulnerability detection,
  property discovery, and rule checking.
author: "Codex47"
license: MIT

tasks:
  - id: task1_vuln_detection
    name: Targeted Vulnerability Detection
    difficulty: medium
    status: active
    description: >
      Given a Solidity contract (4-6 functions), identify the single vulnerable
      function and describe its vulnerability type in 2-3 words.
    max_steps: 40
    reward_range: [0, 1]
    grader: tasks/task1/grader.py
    grader_score_range: [0, 1]

  - id: task2_property_discovery
    name: Property Discovery
    difficulty: hard
    status: active
    description: >
      Given a single Solidity function with known properties, discover the
      correct natural-language postcondition describing its correct behaviour.
    max_steps: 30
    reward_range: [0, 1]
    grader: tasks/task2/grader.py
    grader_score_range: [0.0, 1.0]

  - id: task3_rule_checker
    name: Rule Checker
    difficulty: easy
    status: active
    description: >
      Given a natural-language property and a Solidity contract, identify the
      function that violates that property. Partial credit for internal subfunctions.
    max_steps: 20
    reward_range: [0, 1]
    grader: tasks/task3/grader.py
    grader_score_range: [0.0, 1.0]

observation_space:
  type: object
  properties:
    task_id:              {type: string}
    contract_name:        {type: string}
    contract_description: {type: string}
    available_actions:    {type: array, items: {type: string}}
    last_action:          {type: string, nullable: true}
    last_action_result:   {type: string, nullable: true}
    step_count:           {type: integer}
    cumulative_reward:    {type: number}
    done:                 {type: boolean}
    extra:                {type: object}

action_space:
  # General actions applicable across all tasks
  general:
    unknown:   {reward: 0.0}    # UNKNOWN action cost
    repeated:  {reward: -0.22}  # REPEATED action cost
    resubmit:  {reward: 0.0}    # RESUBMIT action cost

  task1:
    list_functions:
      params: {}
      reward: -0.04
    get_function_code:
      params: {function_name: string}
      reward: -0.14
    get_function_summary:
      params: {function_name: string}
      reward: -0.07
    get_file_metadata:
      params: {}
      reward: -0.02
    get_state_variable:
      params: {variable_name: "string opt"}
      reward: -0.06
    get_call_graph:
      params: {}
      reward: -0.08
    submit:
      params: {function_name: string, vulnerability_type: string}
      reward: 0.0   # terminal reward handled by grader

  task2:
    get_function_code:
      params: {}
      reward: -0.14
    get_function_natspec:
      params: {}
      reward: -0.08
    get_file_natspec:
      params: {}
      reward: 0.05
    get_related_functions:
      params: {}
      reward: 0.07
    get_signature:
      params: {}
      reward: 0.04
    get_similar_rule:
      params: {}
      reward: 0.15
    submit_property:
      params: {property: string}
      reward: 0.0   # terminal reward handled by grader

  task3:
    list_functions:
      params: {}
      reward: -0.04
    get_function_metadata:
      params: {function_name: string}
      reward: 0.04
    get_function_code:
      params: {function_name: string}
      reward: -0.14
    get_state_variable:
      params: {variable_name: "string opt"}
      reward: -0.06
    get_call_graph:
      params: {}
      reward: -0.08
    get_property_specification:   # replaces get_formalized_property
      params: {}
      reward: 0.02
    submit_function:
      params: {function_name: string}
      reward: 0.0   # terminal reward handled by grader

reward:
  type: shaped
  all_tasks_shared:
    repeated_query: -0.40
  task1_shaping:
    get_function_code_correct: +0.05
    get_function_code_wrong: -0.10
    get_function_summary_correct: +0.03
    get_function_summary_wrong: -0.05
  task1_terminal:
    range: [0.0, 1.0]
  task2_terminal:
    range: [0.0, 1.0]
  task3_terminal:
    range: [0.0, 1.0]

data:
  source: "Certora audited DeFi projects"
  format: JSON
  num_contracts: 4
  num_vulnerable_functions: 8
  num_property_functions: 11
  num_task3_episodes: 8

interface:
  http:
    reset:             "POST /reset"
    step:              "POST /step"
    state:             "GET /state"
    tasks:             "GET /tasks"
    health:            "GET /health"
    action_space:      "GET /action_space?task_id=<id>"
    observation_space: "GET /observation_space"
  python:
    reset: "env.reset(seed=None) -> ResetResult"
    step:  "env.step(action)     -> StepResult"
    state: "env.state()          -> StateResult"