File size: 1,267 Bytes
7743c15
 
 
 
 
 
 
 
 
 
 
 
 
478fa20
 
 
 
7743c15
4535620
 
7743c15
17a146a
7743c15
4535620
 
7743c15
17a146a
7743c15
4535620
 
7743c15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
openenv: 0.2.0
name: content-guard-env
version: "1.0.0"
description: >
  An OpenEnv-compliant training environment for AI content moderation agents.
  Simulates Meta-scale Trust & Safety decisions across three progressive tasks:
  violation detection, enforcement action, and appeal report generation.
author: mj064
tags:
  - openenv
  - content-moderation
  - trust-and-safety
  - meta
  - llama-3
  - agent-environment

tasks:
  - id: easy
    name: Violation Classification
    description: Mapping social media content against Meta's primary community standard categories.
    difficulty: easy
    reward_range: [0.05, 0.95]
  - id: medium
    name: Enforcement Proximity
    description: Determining the proportionate enforcement action and severity for a detected policy violation.
    difficulty: medium
    reward_range: [0.05, 0.95]
  - id: hard
    name: Appellate Adjudication
    description: Generating high-fidelity moderation rulings with evidence-backed policy rationale.
    difficulty: hard
    reward_range: [0.0, 1.0]
hardware:
  cpu: 2
  memory_gb: 8
environment:
  port: 7860
  api:
    rest: true
    websocket: true
  endpoints:
    reset: POST /reset
    step: POST /step/{episode_id}
    state: GET /state/{episode_id}
    websocket: WS /ws