Gamucopia-Creatives commited on
Commit ·
a447d83
1
Parent(s): 46d41dd
feat: implement state interface, standardize schema IDs, and add task discovery endpoint
Browse files- envs/social_stream_moderation/environment.py +4 -1
- openenv.yaml +55 -55
- server/app.py +27 -3
envs/social_stream_moderation/environment.py
CHANGED
|
@@ -66,7 +66,10 @@ class SocialStreamModerationEnv:
|
|
| 66 |
step_index=self.step_index,
|
| 67 |
total_steps=len(self.episode_posts)
|
| 68 |
)
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
| 70 |
async def step(self, action: ModerationAction) -> Tuple[Optional[State], float, bool, Dict[str, Any]]:
|
| 71 |
"""Processes one moderation action."""
|
| 72 |
if self.done:
|
|
|
|
| 66 |
step_index=self.step_index,
|
| 67 |
total_steps=len(self.episode_posts)
|
| 68 |
)
|
| 69 |
+
def state(self) -> State:
|
| 70 |
+
"""Standard OpenEnv interface to return the current observation."""
|
| 71 |
+
return self._get_state()
|
| 72 |
+
|
| 73 |
async def step(self, action: ModerationAction) -> Tuple[Optional[State], float, bool, Dict[str, Any]]:
|
| 74 |
"""Processes one moderation action."""
|
| 75 |
if self.done:
|
openenv.yaml
CHANGED
|
@@ -1,55 +1,55 @@
|
|
| 1 |
-
name: SocialStreamModerationEnv
|
| 2 |
-
version: 1.0.0
|
| 3 |
-
graders:
|
| 4 |
-
-
|
| 5 |
-
description: "Grader for basic safety checks"
|
| 6 |
-
-
|
| 7 |
-
description: "Grader for contextual and sarcastic content"
|
| 8 |
-
-
|
| 9 |
-
description: "Grader for fairness and bias parity"
|
| 10 |
-
|
| 11 |
-
tasks:
|
| 12 |
-
-
|
| 13 |
-
difficulty: easy
|
| 14 |
-
description: "Moderate a stream of social posts with obvious violations and safe content."
|
| 15 |
-
|
| 16 |
-
-
|
| 17 |
-
difficulty: medium
|
| 18 |
-
description: "Handle sarcastic content and quotes of harmful material with condemnation."
|
| 19 |
-
|
| 20 |
-
-
|
| 21 |
-
difficulty: hard
|
| 22 |
-
description: "Ensure fairness across user groups and adhere to stricter policy regimes."
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
schemas:
|
| 26 |
-
state:
|
| 27 |
-
type: object
|
| 28 |
-
properties:
|
| 29 |
-
post_id: { type: string }
|
| 30 |
-
text: { type: string }
|
| 31 |
-
user_history_summary: { type: string }
|
| 32 |
-
context_type: { type: string }
|
| 33 |
-
platform_policy_mode: { type: string }
|
| 34 |
-
user_group: { type: string }
|
| 35 |
-
step_index: { type: integer }
|
| 36 |
-
total_steps: { type: integer }
|
| 37 |
-
action:
|
| 38 |
-
type: string
|
| 39 |
-
enum:
|
| 40 |
-
- ALLOW
|
| 41 |
-
- ALLOW_WITH_WARNING
|
| 42 |
-
- SOFT_HIDE
|
| 43 |
-
- ESCALATE_HUMAN
|
| 44 |
-
- BAN_USER
|
| 45 |
-
info:
|
| 46 |
-
type: object
|
| 47 |
-
properties:
|
| 48 |
-
ground_truth_label: { type: string }
|
| 49 |
-
action_taken: { type: string }
|
| 50 |
-
reward: { type: number }
|
| 51 |
-
score: { type: number }
|
| 52 |
-
|
| 53 |
-
paths:
|
| 54 |
-
inference: ./inference.py
|
| 55 |
-
app: ./server/app.py
|
|
|
|
| 1 |
+
name: SocialStreamModerationEnv
|
| 2 |
+
version: 1.0.0
|
| 3 |
+
graders:
|
| 4 |
+
- id: basic_safety_grader
|
| 5 |
+
description: "Grader for basic safety checks"
|
| 6 |
+
- id: context_nuance_grader
|
| 7 |
+
description: "Grader for contextual and sarcastic content"
|
| 8 |
+
- id: fairness_bias_grader
|
| 9 |
+
description: "Grader for fairness and bias parity"
|
| 10 |
+
|
| 11 |
+
tasks:
|
| 12 |
+
- id: "Task 1: Basic Safety"
|
| 13 |
+
difficulty: easy
|
| 14 |
+
description: "Moderate a stream of social posts with obvious violations and safe content."
|
| 15 |
+
grader_id: basic_safety_grader
|
| 16 |
+
- id: "Task 2: Context & Nuance"
|
| 17 |
+
difficulty: medium
|
| 18 |
+
description: "Handle sarcastic content and quotes of harmful material with condemnation."
|
| 19 |
+
grader_id: context_nuance_grader
|
| 20 |
+
- id: "Task 3: Fairness & Bias"
|
| 21 |
+
difficulty: hard
|
| 22 |
+
description: "Ensure fairness across user groups and adhere to stricter policy regimes."
|
| 23 |
+
grader_id: fairness_bias_grader
|
| 24 |
+
|
| 25 |
+
schemas:
|
| 26 |
+
state:
|
| 27 |
+
type: object
|
| 28 |
+
properties:
|
| 29 |
+
post_id: { type: string }
|
| 30 |
+
text: { type: string }
|
| 31 |
+
user_history_summary: { type: string }
|
| 32 |
+
context_type: { type: string }
|
| 33 |
+
platform_policy_mode: { type: string }
|
| 34 |
+
user_group: { type: string }
|
| 35 |
+
step_index: { type: integer }
|
| 36 |
+
total_steps: { type: integer }
|
| 37 |
+
action:
|
| 38 |
+
type: string
|
| 39 |
+
enum:
|
| 40 |
+
- ALLOW
|
| 41 |
+
- ALLOW_WITH_WARNING
|
| 42 |
+
- SOFT_HIDE
|
| 43 |
+
- ESCALATE_HUMAN
|
| 44 |
+
- BAN_USER
|
| 45 |
+
info:
|
| 46 |
+
type: object
|
| 47 |
+
properties:
|
| 48 |
+
ground_truth_label: { type: string }
|
| 49 |
+
action_taken: { type: string }
|
| 50 |
+
reward: { type: number }
|
| 51 |
+
score: { type: number }
|
| 52 |
+
|
| 53 |
+
paths:
|
| 54 |
+
inference: ./inference.py
|
| 55 |
+
app: ./server/app.py
|
server/app.py
CHANGED
|
@@ -767,13 +767,37 @@ async def reset_env(req: ResetRequest = Body(default=ResetRequest())):
|
|
| 767 |
except ValueError as e:
|
| 768 |
raise HTTPException(status_code=400, detail=str(e))
|
| 769 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 770 |
@app.get("/graders", tags=["🛡️ Automated Benchmarking"])
|
| 771 |
async def list_graders():
|
| 772 |
"""Returns the list of graders available in the environment for discovery."""
|
| 773 |
return [
|
| 774 |
-
{"
|
| 775 |
-
{"
|
| 776 |
-
{"
|
| 777 |
]
|
| 778 |
|
| 779 |
@app.post("/evaluate", tags=["🧪 Interactive Lab"], summary="Test Model Logic (XAI Insight)")
|
|
|
|
| 767 |
except ValueError as e:
|
| 768 |
raise HTTPException(status_code=400, detail=str(e))
|
| 769 |
|
| 770 |
+
@app.get("/tasks", tags=["🤖 Automated Benchmarking"])
|
| 771 |
+
async def list_tasks():
|
| 772 |
+
"""Returns the list of tasks available in the environment for discovery."""
|
| 773 |
+
return [
|
| 774 |
+
{
|
| 775 |
+
"id": "Task 1: Basic Safety",
|
| 776 |
+
"difficulty": "easy",
|
| 777 |
+
"description": "Moderate a stream of social posts with obvious violations and safe content.",
|
| 778 |
+
"grader_id": "basic_safety_grader"
|
| 779 |
+
},
|
| 780 |
+
{
|
| 781 |
+
"id": "Task 2: Context & Nuance",
|
| 782 |
+
"difficulty": "medium",
|
| 783 |
+
"description": "Handle sarcastic content and quotes of harmful material with condemnation.",
|
| 784 |
+
"grader_id": "context_nuance_grader"
|
| 785 |
+
},
|
| 786 |
+
{
|
| 787 |
+
"id": "Task 3: Fairness & Bias",
|
| 788 |
+
"difficulty": "hard",
|
| 789 |
+
"description": "Ensure fairness across user groups and adhere to stricter policy regimes.",
|
| 790 |
+
"grader_id": "fairness_bias_grader"
|
| 791 |
+
}
|
| 792 |
+
]
|
| 793 |
+
|
| 794 |
@app.get("/graders", tags=["🛡️ Automated Benchmarking"])
|
| 795 |
async def list_graders():
|
| 796 |
"""Returns the list of graders available in the environment for discovery."""
|
| 797 |
return [
|
| 798 |
+
{"id": "basic_safety_grader", "description": "Grader for basic safety checks"},
|
| 799 |
+
{"id": "context_nuance_grader", "description": "Grader for contextual and sarcastic content"},
|
| 800 |
+
{"id": "fairness_bias_grader", "description": "Grader for fairness and bias parity"}
|
| 801 |
]
|
| 802 |
|
| 803 |
@app.post("/evaluate", tags=["🧪 Interactive Lab"], summary="Test Model Logic (XAI Insight)")
|