Gamucopia-Creatives commited on
Commit
aa6f797
·
1 Parent(s): 74b6d45

refactor: update environment configuration to OpenEnv v1 schema and standardize task definitions

Browse files
Files changed (1) hide show
  1. openenv.yaml +71 -53
openenv.yaml CHANGED
@@ -1,78 +1,96 @@
1
- name: SocialStreamModerationEnv
2
- version: 1.0.0
 
 
 
 
3
  description: >
4
  A content-moderation RL environment where an agent must classify social-media
5
  posts as safe or harmful under varying policy regimes, with tasks spanning
6
  basic safety, contextual nuance, and fairness.
7
 
8
- endpoints:
9
- reset: /reset
10
- step: /step
11
- state: /state
12
- tasks: /tasks
13
- grader: /grader
14
- graders: /graders
15
- health: /health
16
- metadata: /metadata
17
- schema: /schema
18
- baseline: /predict_and_step
19
-
20
- graders:
21
- - id: basic_safety_grader
22
- description: "Grader for basic safety checks"
23
- entry_point: envs.social_stream_moderation.graders:BasicSafetyGrader
24
- - id: context_nuance_grader
25
- description: "Grader for contextual and sarcastic content"
26
- entry_point: envs.social_stream_moderation.graders:ContextNuanceGrader
27
- - id: fairness_bias_grader
28
- description: "Grader for fairness and bias parity"
29
- entry_point: envs.social_stream_moderation.graders:FairnessBiasGrader
30
-
31
  tasks:
32
  - id: clear_cut_moderation
33
  name: "Task 1: Basic Safety"
34
  difficulty: easy
35
  description: "Moderate a stream of social posts with obvious violations and safe content."
 
36
  grader_id: basic_safety_grader
 
 
37
  - id: nuanced_sarcastic
38
  name: "Task 2: Context & Nuance"
39
  difficulty: medium
40
  description: "Handle sarcastic content and quotes of harmful material with condemnation."
 
41
  grader_id: context_nuance_grader
 
 
42
  - id: policy_fairness
43
  name: "Task 3: Fairness & Bias"
44
  difficulty: hard
45
  description: "Ensure fairness across user groups and adhere to stricter policy regimes."
 
46
  grader_id: fairness_bias_grader
 
47
 
48
- schemas:
49
- state:
50
- type: object
51
- properties:
52
- post_id: { type: string }
53
- text: { type: string }
54
- user_history_summary: { type: string }
55
- context_type: { type: string }
56
- platform_policy_mode: { type: string }
57
- user_group: { type: string }
58
- step_index: { type: integer }
59
- total_steps: { type: integer }
60
- action:
61
- type: string
62
- enum:
63
- - ALLOW
64
- - ALLOW_WITH_WARNING
65
- - SOFT_HIDE
66
- - ESCALATE_HUMAN
67
- - BAN_USER
68
- info:
69
- type: object
70
- properties:
71
- ground_truth_label: { type: string }
72
- action_taken: { type: string }
73
- reward: { type: number }
74
- score: { type: number }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
  paths:
77
  inference: ./inference.py
78
- app: ./server/app.py
 
1
+ spec_version: 1
2
+ name: social_stream_moderation
3
+ type: environment
4
+ runtime: docker
5
+ app: server.app:app
6
+ port: 7860
7
  description: >
8
  A content-moderation RL environment where an agent must classify social-media
9
  posts as safe or harmful under varying policy regimes, with tasks spanning
10
  basic safety, contextual nuance, and fairness.
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  tasks:
13
  - id: clear_cut_moderation
14
  name: "Task 1: Basic Safety"
15
  difficulty: easy
16
  description: "Moderate a stream of social posts with obvious violations and safe content."
17
+ grader: deterministic
18
  grader_id: basic_safety_grader
19
+ scoring: "0.0-1.0 reward based on action-label match"
20
+
21
  - id: nuanced_sarcastic
22
  name: "Task 2: Context & Nuance"
23
  difficulty: medium
24
  description: "Handle sarcastic content and quotes of harmful material with condemnation."
25
+ grader: deterministic
26
  grader_id: context_nuance_grader
27
+ scoring: "0.0-1.0 reward with context-aware adjustments"
28
+
29
  - id: policy_fairness
30
  name: "Task 3: Fairness & Bias"
31
  difficulty: hard
32
  description: "Ensure fairness across user groups and adhere to stricter policy regimes."
33
+ grader: deterministic
34
  grader_id: fairness_bias_grader
35
+ scoring: "0.0-1.0 reward with fairness penalty"
36
 
37
+ graders:
38
+ - id: basic_safety_grader
39
+ description: "Grader for basic safety checks"
40
+ type: deterministic
41
+ entry_point: envs.social_stream_moderation.graders:BasicSafetyGrader
42
+ - id: context_nuance_grader
43
+ description: "Grader for contextual and sarcastic content"
44
+ type: deterministic
45
+ entry_point: envs.social_stream_moderation.graders:ContextNuanceGrader
46
+ - id: fairness_bias_grader
47
+ description: "Grader for fairness and bias parity"
48
+ type: deterministic
49
+ entry_point: envs.social_stream_moderation.graders:FairnessBiasGrader
50
+
51
+ observation_space:
52
+ post_id: "string"
53
+ text: "string"
54
+ user_history_summary: "string"
55
+ context_type: "string"
56
+ platform_policy_mode: "string"
57
+ user_group: "string"
58
+ step_index: "integer"
59
+ total_steps: "integer"
60
+
61
+ action_space:
62
+ type: string
63
+ enum:
64
+ - ALLOW
65
+ - ALLOW_WITH_WARNING
66
+ - SOFT_HIDE
67
+ - ESCALATE_HUMAN
68
+ - BAN_USER
69
+
70
+ reward:
71
+ type: continuous
72
+ range: [0.0, 1.0]
73
+
74
+ endpoints:
75
+ - path: /reset
76
+ method: POST
77
+ description: Start a new episode
78
+ - path: /step
79
+ method: POST
80
+ description: Submit a moderation action
81
+ - path: /state
82
+ method: GET
83
+ description: Get current episode state
84
+ - path: /tasks
85
+ method: GET
86
+ description: List all tasks with grader info
87
+ - path: /grader
88
+ method: GET
89
+ description: Get grader score for current episode
90
+ - path: /health
91
+ method: GET
92
+ description: Health check
93
 
94
  paths:
95
  inference: ./inference.py
96
+ app: ./server/app.py