POLICYPULSE-AI / openenv.yaml
Gamucopia-Creatives
refactor: update environment configuration to OpenEnv v1 schema and standardize task definitions
aa6f797
spec_version: 1
name: social_stream_moderation
type: environment
runtime: docker
app: server.app:app
port: 7860
description: >
A content-moderation RL environment where an agent must classify social-media
posts as safe or harmful under varying policy regimes, with tasks spanning
basic safety, contextual nuance, and fairness.
tasks:
- id: clear_cut_moderation
name: "Task 1: Basic Safety"
difficulty: easy
description: "Moderate a stream of social posts with obvious violations and safe content."
grader: deterministic
grader_id: basic_safety_grader
scoring: "0.0-1.0 reward based on action-label match"
- id: nuanced_sarcastic
name: "Task 2: Context & Nuance"
difficulty: medium
description: "Handle sarcastic content and quotes of harmful material with condemnation."
grader: deterministic
grader_id: context_nuance_grader
scoring: "0.0-1.0 reward with context-aware adjustments"
- id: policy_fairness
name: "Task 3: Fairness & Bias"
difficulty: hard
description: "Ensure fairness across user groups and adhere to stricter policy regimes."
grader: deterministic
grader_id: fairness_bias_grader
scoring: "0.0-1.0 reward with fairness penalty"
graders:
- id: basic_safety_grader
description: "Grader for basic safety checks"
type: deterministic
entry_point: envs.social_stream_moderation.graders:BasicSafetyGrader
- id: context_nuance_grader
description: "Grader for contextual and sarcastic content"
type: deterministic
entry_point: envs.social_stream_moderation.graders:ContextNuanceGrader
- id: fairness_bias_grader
description: "Grader for fairness and bias parity"
type: deterministic
entry_point: envs.social_stream_moderation.graders:FairnessBiasGrader
observation_space:
post_id: "string"
text: "string"
user_history_summary: "string"
context_type: "string"
platform_policy_mode: "string"
user_group: "string"
step_index: "integer"
total_steps: "integer"
action_space:
type: string
enum:
- ALLOW
- ALLOW_WITH_WARNING
- SOFT_HIDE
- ESCALATE_HUMAN
- BAN_USER
reward:
type: continuous
range: [0.0, 1.0]
endpoints:
- path: /reset
method: POST
description: Start a new episode
- path: /step
method: POST
description: Submit a moderation action
- path: /state
method: GET
description: Get current episode state
- path: /tasks
method: GET
description: List all tasks with grader info
- path: /grader
method: GET
description: Get grader score for current episode
- path: /health
method: GET
description: Health check
paths:
inference: ./inference.py
app: ./server/app.py