Spaces:

GamucopiaCreatives
/

POLICYPULSE-AI

Running

Gamucopia-Creatives

refactor: update environment configuration to OpenEnv v1 schema and standardize task definitions

aa6f797 3 days ago

2.71 kB

	spec_version: 1
	name: social_stream_moderation
	type: environment
	runtime: docker
	app: server.app:app
	port: 7860
	description: >
	A content-moderation RL environment where an agent must classify social-media
	posts as safe or harmful under varying policy regimes, with tasks spanning
	basic safety, contextual nuance, and fairness.

	tasks:
	- id: clear_cut_moderation
	name: "Task 1: Basic Safety"
	difficulty: easy
	description: "Moderate a stream of social posts with obvious violations and safe content."
	grader: deterministic
	grader_id: basic_safety_grader
	scoring: "0.0-1.0 reward based on action-label match"

	- id: nuanced_sarcastic
	name: "Task 2: Context & Nuance"
	difficulty: medium
	description: "Handle sarcastic content and quotes of harmful material with condemnation."
	grader: deterministic
	grader_id: context_nuance_grader
	scoring: "0.0-1.0 reward with context-aware adjustments"

	- id: policy_fairness
	name: "Task 3: Fairness & Bias"
	difficulty: hard
	description: "Ensure fairness across user groups and adhere to stricter policy regimes."
	grader: deterministic
	grader_id: fairness_bias_grader
	scoring: "0.0-1.0 reward with fairness penalty"

	graders:
	- id: basic_safety_grader
	description: "Grader for basic safety checks"
	type: deterministic
	entry_point: envs.social_stream_moderation.graders:BasicSafetyGrader
	- id: context_nuance_grader
	description: "Grader for contextual and sarcastic content"
	type: deterministic
	entry_point: envs.social_stream_moderation.graders:ContextNuanceGrader
	- id: fairness_bias_grader
	description: "Grader for fairness and bias parity"
	type: deterministic
	entry_point: envs.social_stream_moderation.graders:FairnessBiasGrader

	observation_space:
	post_id: "string"
	text: "string"
	user_history_summary: "string"
	context_type: "string"
	platform_policy_mode: "string"
	user_group: "string"
	step_index: "integer"
	total_steps: "integer"

	action_space:
	type: string
	enum:
	- ALLOW
	- ALLOW_WITH_WARNING
	- SOFT_HIDE
	- ESCALATE_HUMAN
	- BAN_USER

	reward:
	type: continuous
	range: [0.0, 1.0]

	endpoints:
	- path: /reset
	method: POST
	description: Start a new episode
	- path: /step
	method: POST
	description: Submit a moderation action
	- path: /state
	method: GET
	description: Get current episode state
	- path: /tasks
	method: GET
	description: List all tasks with grader info
	- path: /grader
	method: GET
	description: Get grader score for current episode
	- path: /health
	method: GET
	description: Health check

	paths:
	inference: ./inference.py
	app: ./server/app.py