Spaces:

mj064
/

ContentGuardEnv

Sleeping

Mridul Jain

Initial Release: High-fidelity policy moderation research framework

4535620 about 1 month ago

1.27 kB

	openenv: 0.2.0
	name: content-guard-env
	version: "1.0.0"
	description: >
	An OpenEnv-compliant training environment for AI content moderation agents.
	Simulates Meta-scale Trust & Safety decisions across three progressive tasks:
	violation detection, enforcement action, and appeal report generation.
	author: mj064
	tags:
	- openenv
	- content-moderation
	- trust-and-safety
	- meta
	- llama-3
	- agent-environment

	tasks:
	- id: easy
	name: Violation Classification
	description: Mapping social media content against Meta's primary community standard categories.
	difficulty: easy
	reward_range: [0.05, 0.95]
	- id: medium
	name: Enforcement Proximity
	description: Determining the proportionate enforcement action and severity for a detected policy violation.
	difficulty: medium
	reward_range: [0.05, 0.95]
	- id: hard
	name: Appellate Adjudication
	description: Generating high-fidelity moderation rulings with evidence-backed policy rationale.
	difficulty: hard
	reward_range: [0.0, 1.0]
	hardware:
	cpu: 2
	memory_gb: 8
	environment:
	port: 7860
	api:
	rest: true
	websocket: true
	endpoints:
	reset: POST /reset
	step: POST /step/{episode_id}
	state: GET /state/{episode_id}
	websocket: WS /ws