Spaces:
Sleeping
Sleeping
File size: 8,518 Bytes
7743c15 499375e 7743c15 499375e 7743c15 499375e 7743c15 499375e 7743c15 499375e 7743c15 499375e 7743c15 499375e 7743c15 499375e 7743c15 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 | """
Synthetic & Hugging Face content case generator for ContentGuardEnv.
Generates realistic social media posts with associated metadata,
user account history, and ground-truth moderation decisions.
Integrates natively with Hugging Face datasets.
By default, external dataset loading is disabled to keep startup/test
paths deterministic. Enable with CONTENT_GUARD_USE_HF_DATA=1.
"""
import os
import random
import copy
from typing import Any, Dict, Tuple
import logging
logger = logging.getLogger(__name__)
HF_DATASET = None
_HF_DATA_ATTEMPTED = False
_HF_DATA_ENABLED = os.getenv("CONTENT_GUARD_USE_HF_DATA", "0").strip().lower() in {"1", "true", "yes", "on"}
try:
from datasets import load_dataset as _hf_load_dataset
except Exception:
_hf_load_dataset = None
def _maybe_load_hf_data() -> None:
"""Lazy one-time dataset initialization to avoid import-time blocking."""
global HF_DATASET, _HF_DATA_ATTEMPTED
if _HF_DATA_ATTEMPTED:
return
_HF_DATA_ATTEMPTED = True
if not _HF_DATA_ENABLED:
logger.info("Hugging Face dataset disabled (CONTENT_GUARD_USE_HF_DATA=0). Using synthetic cases.")
return
if _hf_load_dataset is None:
logger.warning("datasets package unavailable; using synthetic cases.")
return
try:
logger.info("Initializing Hugging Face Datasets connection...")
# Keep dataset bounded for memory and faster cold starts.
HF_DATASET = _hf_load_dataset("SetFit/toxic_conversations", split="train[:1000]")
logger.info(f"Loaded {len(HF_DATASET)} Hugging Face evaluation records.")
except Exception as e:
logger.warning(f"Could not load Hugging Face dataset (falling back to synthetic): {str(e)}")
HF_DATASET = None
# ββ Post templates (Fallback / Structure) ββββββββββββββββββββββββββββββββββββββ
POSTS = [
{
"violation": "hate_speech",
"severity": 5,
"action": "remove",
"ruling": "upheld",
"policy_references": ["Hate Speech Policy", "Community Standards Section 12"],
"content_variants": [
"People from [COUNTRY] are all criminals and shouldn't be allowed here. Send them back where they came from.",
],
"platform": "social_feed",
"has_image": False,
"account_age_days_range": (1, 90),
"prior_violations_range": (0, 3),
"appeal_message": "I was just expressing my political opinion about immigration policy. This is free speech.",
},
{
"violation": "spam",
"severity": 2,
"action": "restrict_reach",
"ruling": "upheld",
"policy_references": ["Spam and Fake Engagement Policy"],
"content_variants": [
"π° MAKE $5000/WEEK FROM HOME!! No experience needed! DM me NOW for the secret link! Limited spots!! π°π°π°",
],
"platform": "comment_section",
"has_image": False,
"account_age_days_range": (1, 30),
"prior_violations_range": (1, 8),
"appeal_message": "This is a legitimate business promotion. I'm just trying to make a living.",
},
{
"violation": "safe",
"severity": 1,
"action": "no_action",
"ruling": "overturned",
"policy_references": [],
"content_variants": [
"Unpopular opinion: pineapple on pizza is actually really good and anyone who disagrees is wrong ππ",
],
"platform": "public_post",
"has_image": False,
"account_age_days_range": (100, 3000),
"prior_violations_range": (0, 1),
"appeal_message": "Thank you, I knew this was wrongly removed. This is normal discussion.",
}
]
PLATFORMS = ["social_feed", "public_post", "comment_section", "private_group", "marketplace", "stories"]
COUNTRIES = ["Brazil", "India", "Germany", "Nigeria", "Indonesia", "USA", "France"]
DEVICE_TYPES = ["mobile_ios", "mobile_android", "desktop_web", "desktop_app"]
def get_real_hf_case() -> Tuple[str, str, int, str]:
"""Pulls a random record from the real Hugging Face dataset."""
if HF_DATASET is None or len(HF_DATASET) == 0:
raise ValueError("HF dataset is not initialized.")
record = HF_DATASET[random.randrange(len(HF_DATASET))]
# SetFit/toxic_conversations uses 'text' and 'label' (0=safe, 1=toxic)
content = record.get("text", "")
is_toxic = record.get("label", 0) == 1
if is_toxic:
# Heavily simplify the map for the demo
violation_type = random.choice(["hate_speech", "harassment", "violence", "misinformation"])
severity = random.randint(3, 5)
action = "remove" if severity == 5 else "add_warning_label"
else:
violation_type = "safe"
severity = 1
action = "no_action"
return content, violation_type, severity, action
def generate_case(task_id: str) -> Tuple[Dict[str, Any], Dict[str, Any]]:
"""Generate a content moderation case leveraging real HF data if available."""
# 1. Base Meta structure
template = copy.deepcopy(random.choice(POSTS))
_maybe_load_hf_data()
# 2. Content & Ground Truth assignment
if HF_DATASET is not None and len(HF_DATASET) > 0:
# Real AI Training Data Path
content, violation, severity, action = get_real_hf_case()
template["violation"] = violation
template["severity"] = severity
template["action"] = action
if violation == "safe":
template["ruling"] = "overturned"
template["policy_references"] = []
else:
template["ruling"] = "upheld"
template["policy_references"] = [f"{violation.capitalize()} Policy"]
else:
# Synthetic fallback
content = random.choice(template["content_variants"])
account_age = random.randint(1, 3000)
prior_violations = random.randint(0, 5)
case: Dict[str, Any] = {
"post_id": f"POST-{random.randint(100000, 999999)}",
"content": content[:800], # Cap length for UI aesthetics
"platform": random.choice(PLATFORMS),
"has_media_attachment": random.random() > 0.5,
"language": "en",
"user_account": {
"account_age_days": account_age,
"follower_count": random.randint(0, 50000),
"is_verified": random.random() < 0.05,
"prior_violations": prior_violations,
"account_type": random.choice(["personal", "page", "business"]),
"region": random.choice(COUNTRIES),
"trust_score": round(random.uniform(0.1, 1.0), 2), # New metric for agent evaluation
},
"engagement": {
"likes": random.randint(0, 5000),
"shares": random.randint(0, 2000),
"comments": random.randint(0, 800),
"reports_received": random.randint(1, 150),
"community_impact_score": round(random.uniform(1.0, 10.0), 1), # Creativity: Novel metric
"viral_potential": random.choice(["Low", "Medium", "High", "Critical"]), # Predictive moderation signal
},
"flags": {
"auto_detected": random.random() < 0.7,
"user_reported": random.random() < 0.9,
"repeat_offender": prior_violations >= 3,
},
"device": random.choice(DEVICE_TYPES),
}
if task_id in ("medium", "hard"):
case["detected_violation"] = template["violation"]
if task_id == "hard":
case["action_taken"] = template["action"]
case["user_appeal"] = random.choice([
"I believe this was moderated in error. Please restore my post.",
"My political views are being silenced! This doesn't violate any rules.",
"I see others posting the same thing, why was mine removed?",
"It was a joke for my friends, please help."
])
# Ground truth
if task_id == "easy":
ground_truth = {"violation": template["violation"]}
elif task_id == "medium":
ground_truth = {
"action": template["action"],
"severity": template["severity"],
"violation": template["violation"],
}
else:
ground_truth = {
"ruling": template["ruling"],
"policy_references": template["policy_references"],
"violation": template["violation"],
"action": template["action"],
}
return case, ground_truth
|