ANI00's picture
first commit
eb0a4a1
name: content-moderation-env
version: "1.0.0"
description: >
AI-powered content moderation environment. Agents triage text, social
posts, and multimodal content (including deepfake detection) across
three difficulty levels using the standard OpenEnv step/reset/state API.
author: openenv-participant
license: MIT
tasks:
- id: text_spam
difficulty: easy
description: Classify email/message content as spam or legitimate
content_type: text
num_items: 5
score_range: [0.0, 1.0]
- id: content_moderation
difficulty: medium
description: Multi-label social media content moderation
content_type: text
num_items: 5
score_range: [0.0, 1.0]
- id: deepfake_detection
difficulty: hard
description: Detect AI-manipulated/deepfake media and make moderation decisions
content_type: multimodal
num_items: 5
score_range: [0.0, 1.0]
action_space:
type: object
fields:
decision:
type: string
enum: [approve, reject, escalate, flag]
reason:
type: string
confidence:
type: float
range: [0.0, 1.0]
labels:
type: array
items: string
valid_values:
- spam
- scam
- phishing
- pharmaceutical_spam
- hate_speech
- violence
- harassment
- misinformation
- adult_content
- deepfake
- political_manipulation
- fraud
observation_space:
type: object
fields:
content_id: string
content_type: string
text: optional string
image_description: optional string
detector_score: optional float
metadata: object
step_num: integer
total_steps: integer
endpoints:
reset: POST /reset
step: POST /step
state: GET /state
close: POST /close
tasks: GET /tasks
health: GET /health
docker:
context: server
dockerfile: server/Dockerfile
huggingface:
space_sdk: docker
tags:
- openenv
- content-moderation
- deepfake-detection