openenv-content-moderation / server /data_generator.py
ThejasRao's picture
Upload folder using huggingface_hub
0f8b6aa verified
"""
Deterministic, seed-based post generator.
Each task maps to a fixed template; the seed controls which slot values are
selected. All random state is local (random.Random(seed)) so global state is
never mutated.
"""
from __future__ import annotations
import random
import uuid
from models import (
ActionType,
GroundTruth,
Observation,
Severity,
TaskConfig,
ViolationType,
)
# ---------------------------------------------------------------------------
# Template banks keyed by difficulty
# ---------------------------------------------------------------------------
_EASY_TEMPLATES = [
# 0 β€” US explicit death threat (history is the key signal)
{
"content": "I will kill you, {target}. Watch your back.",
"targets": ["John", "Sarah", "Mike", "Emma", "David"],
"geo": "US",
"reports": 12,
"engagement": {"likes": 3, "shares": 1, "comments": 8},
"user_history": [
"Prior violation: threatening language (2023-11)",
"Prior violation: targeted harassment (2024-02)",
],
"thread_context": [
"Original post: 'Stop spreading lies about our community.'",
"Reply chain of 4 escalating messages before this post.",
],
"policy_clause": (
"Section 3.1 β€” Zero Tolerance Threats: Any post containing explicit "
"threats of physical harm against a named individual is subject to "
"immediate removal."
),
"ground_truth": GroundTruth(
violation_type=ViolationType.harassment,
base_severity=Severity.critical,
final_severity=Severity.critical,
expected_action=ActionType.remove,
required_investigation=["fetch_user_history"],
min_steps=2,
),
},
# 1 β€” EU location-based threat (history + thread confirm severity)
{
"content": "I know where you live, {target}. This is your final warning.",
"targets": ["Laura", "Pierre", "Hans", "Amelia", "Luca"],
"geo": "EU",
"reports": 15,
"engagement": {"likes": 2, "shares": 0, "comments": 6},
"user_history": [
"Prior violation: doxxing attempt (2024-01)",
"Prior violation: targeted harassment (2024-03)",
],
"thread_context": [
"Sent directly in response to a public profile post.",
"Recipient has filed a police report; case pending.",
],
"policy_clause": (
"Section 3.3 β€” Location-Based Threats: Any message implying knowledge "
"of a target's home address in a threatening context constitutes a "
"critical-severity violation requiring immediate removal."
),
"ground_truth": GroundTruth(
violation_type=ViolationType.harassment,
base_severity=Severity.critical,
final_severity=Severity.critical,
expected_action=ActionType.remove,
required_investigation=["fetch_user_history", "fetch_thread_context"],
min_steps=3,
),
},
# 2 β€” IN targeted intimidation (history confirms pattern)
{
"content": "You're dead to me, {target}. I will make your life a living hell β€” starting tonight.",
"targets": ["Priya", "Rahul", "Ananya", "Vikram", "Sunita"],
"geo": "IN",
"reports": 8,
"engagement": {"likes": 1, "shares": 0, "comments": 4},
"user_history": [
"Prior violation: abusive language (2023-09)",
"Account flagged for multiple spam reports.",
],
"thread_context": [
"Dispute originated in a neighbourhood WhatsApp group.",
"Target has blocked the sender; post reached mutual contacts.",
],
"policy_clause": (
"Section 3.2 β€” Targeted Harassment: Persistent threatening behaviour "
"toward a named individual warrants removal and account suspension review."
),
"ground_truth": GroundTruth(
violation_type=ViolationType.harassment,
base_severity=Severity.high,
final_severity=Severity.critical,
expected_action=ActionType.remove,
required_investigation=["fetch_user_history"],
min_steps=2,
),
},
# 3 β€” UK implied physical threat (thread reveals coordinated campaign)
{
"content": "Watch your back, {target}. Accidents happen to people like you.",
"targets": ["Oliver", "Charlotte", "George", "Isla", "Alfie"],
"geo": "UK",
"reports": 11,
"engagement": {"likes": 0, "shares": 2, "comments": 9},
"user_history": [
"Prior violation: threatening language (2024-01)",
"Prior flag: stalking behaviour (2024-04, unresolved).",
],
"thread_context": [
"Part of a targeted campaign against a community organiser.",
"Three other accounts made similar posts within the same hour.",
],
"policy_clause": (
"Section 3.4 β€” Implied Threats: Language strongly implying physical harm, "
"even without explicit wording, constitutes high-severity harassment under "
"UK Online Safety Act obligations."
),
"ground_truth": GroundTruth(
violation_type=ViolationType.harassment,
base_severity=Severity.high,
final_severity=Severity.critical,
expected_action=ActionType.remove,
required_investigation=["fetch_user_history", "fetch_thread_context"],
min_steps=3,
),
},
# 4 β€” US stalking threat (history alone settles it β€” 2 prior violations)
{
"content": "I'm going to find you, {target}, and make you regret everything.",
"targets": ["Alex", "Jordan", "Taylor", "Morgan", "Casey"],
"geo": "US",
"reports": 9,
"engagement": {"likes": 1, "shares": 0, "comments": 5},
"user_history": [
"Prior violation: hate speech (2024-01)",
"Prior violation: doxxing attempt (2024-03)",
],
"thread_context": [
"Post made in response to a public dispute about local politics.",
"Other users flagged this as a personal threat.",
],
"policy_clause": (
"Section 3.2 β€” Targeted Harassment: Persistent threatening behaviour "
"toward an individual warrants removal and account suspension review."
),
"ground_truth": GroundTruth(
violation_type=ViolationType.harassment,
base_severity=Severity.high,
final_severity=Severity.critical,
expected_action=ActionType.remove,
required_investigation=["fetch_user_history"],
min_steps=2,
),
},
# 5 β€” EU doxxing + threat (policy clause required β€” doxxing is a specific clause)
{
"content": "Everyone, here is {target}'s home address. Pay them a visit and let them know what you think.",
"targets": ["Sophie", "Jan", "Marie", "Klaus", "Elena"],
"geo": "EU",
"reports": 22,
"engagement": {"likes": 5, "shares": 8, "comments": 14},
"user_history": [
"Prior violation: doxxing (2023-12, account warned).",
],
"thread_context": [
"Posted in a hate group targeting local politicians.",
"Address has been verified as accurate by moderator review.",
],
"policy_clause": (
"Section 4.1 β€” Doxxing: Publishing private location information with "
"intent to harass constitutes a critical violation under GDPR-adjacent "
"platform policy. Immediate removal and account ban required."
),
"ground_truth": GroundTruth(
violation_type=ViolationType.harassment,
base_severity=Severity.critical,
final_severity=Severity.critical,
expected_action=ActionType.remove,
required_investigation=["fetch_user_history", "check_policy_clause"],
min_steps=3,
),
},
# 6 β€” IN blackmail / coercion (policy clause confirms extortion = remove)
{
"content": "Say goodbye to your job, {target}. I have screenshots of everything. Pay up or I post them.",
"targets": ["Deepak", "Kavya", "Rohit", "Nisha", "Arjun"],
"geo": "IN",
"reports": 6,
"engagement": {"likes": 0, "shares": 1, "comments": 3},
"user_history": [
"No prior violations.",
"Account age: 3 weeks, unverified.",
],
"thread_context": [
"Sent as a reply to the target's professional portfolio post.",
"Target has reported extortion to local cybercrime cell.",
],
"policy_clause": (
"Section 3.5 β€” Coercion and Extortion: Threats linked to demands for "
"money or silence are classified as critical harassment requiring "
"immediate removal and law-enforcement escalation."
),
"ground_truth": GroundTruth(
violation_type=ViolationType.harassment,
base_severity=Severity.critical,
final_severity=Severity.critical,
expected_action=ActionType.remove,
required_investigation=["fetch_user_history", "check_policy_clause"],
min_steps=3,
),
},
# 7 β€” UK confrontation (thread + history both needed β€” prior warnings + event context)
{
"content": "{target}, if I see you in person I cannot be responsible for what happens next.",
"targets": ["James", "Emily", "Harry", "Sophia", "Jack"],
"geo": "UK",
"reports": 7,
"engagement": {"likes": 0, "shares": 0, "comments": 5},
"user_history": [
"Prior violation: threatening language (2023-08)",
"Prior warning: harassment (2024-02).",
],
"thread_context": [
"Comment left under target's public event announcement.",
"Target cancelled event citing safety concerns.",
],
"policy_clause": (
"Section 3.4 β€” Implied Physical Threats: Statements that create a "
"reasonable fear of imminent violence, even without explicit wording, "
"are subject to immediate removal."
),
"ground_truth": GroundTruth(
violation_type=ViolationType.harassment,
base_severity=Severity.high,
final_severity=Severity.critical,
expected_action=ActionType.remove,
required_investigation=["fetch_user_history", "fetch_thread_context"],
min_steps=3,
),
},
]
_MEDIUM_TEMPLATES = [
# 0 β€” US aggressive consumer review (borderline β†’ allow)
{
"content": (
"This product is an absolute scam! {brand} is stealing money from "
"hard-working people. Do NOT buy from them."
),
"brands": ["AcmeCorp", "TechBrand", "ShopEasy", "QuickBuy"],
"geo": "US",
"reports": 3,
"engagement": {"likes": 47, "shares": 12, "comments": 21},
"user_history": [
"No prior violations.",
"Account age: 3 years, verified email.",
],
"thread_context": [
"Part of a consumer-reviews thread with 40+ similar complaints.",
"Brand has not responded to any user complaints in this thread.",
],
"policy_clause": (
"Section 5.4 β€” Commercial Speech: Negative reviews are permitted "
"unless they contain false factual claims presented as verified truth. "
"Opinion-based criticism is protected."
),
"ground_truth": GroundTruth(
violation_type=ViolationType.safe,
base_severity=Severity.low,
final_severity=Severity.low,
expected_action=ActionType.allow,
required_investigation=["check_policy_clause"],
min_steps=2,
),
},
# 1 β€” EU unverified financial fraud allegation (β†’ flag)
{
"content": (
"{brand} employees are criminals running a Ponzi scheme. "
"I have proof they faked their audit results."
),
"brands": ["FinCo", "InvestRight", "TrustBank", "WealthPro"],
"geo": "EU",
"reports": 7,
"engagement": {"likes": 89, "shares": 34, "comments": 56},
"user_history": [
"No prior violations.",
"Account age: 6 months.",
],
"thread_context": [
"Post went viral in a financial discussion group.",
"No corroborating sources linked in thread.",
],
"policy_clause": (
"Section 6.1 β€” Defamation & False Facts: Claims presenting unverified "
"allegations as established facts about named entities may be flagged "
"pending review under EU DSA guidelines."
),
"ground_truth": GroundTruth(
violation_type=ViolationType.restricted,
base_severity=Severity.medium,
final_severity=Severity.high,
expected_action=ActionType.flag,
required_investigation=["fetch_user_history", "fetch_thread_context", "check_policy_clause"],
min_steps=5,
),
},
# 2 β€” IN overnight disappearance allegation (β†’ flag)
{
"content": (
"{brand} is a total fraud. Don't give them your money β€” "
"they'll disappear with it overnight."
),
"brands": ["RupeeGrow", "SafeNest", "IndiaFund", "SwiftPay"],
"geo": "IN",
"reports": 4,
"engagement": {"likes": 62, "shares": 19, "comments": 31},
"user_history": [
"No prior violations.",
"Account age: 1 year.",
],
"thread_context": [
"Several users in thread report similar losses.",
"Brand's official account has been inactive for 2 weeks.",
],
"policy_clause": (
"Section 6.2 β€” Financial Misinformation: Allegations of fraud without "
"verified evidence require human review before distribution, especially "
"when engagement is high."
),
"ground_truth": GroundTruth(
violation_type=ViolationType.restricted,
base_severity=Severity.medium,
final_severity=Severity.medium,
expected_action=ActionType.flag,
required_investigation=["fetch_user_history", "check_policy_clause"],
min_steps=4,
),
},
# 3 β€” UK blacklisting complaint (borderline β†’ allow)
{
"content": (
"Never trust {brand} again. They blacklist customers who complain and "
"delete negative reviews. This needs to stop."
),
"brands": ["BritShop", "RoyalParts", "HighStreetCo", "UKDirect"],
"geo": "UK",
"reports": 2,
"engagement": {"likes": 33, "shares": 8, "comments": 15},
"user_history": [
"No prior violations.",
"Account age: 5 years, verified.",
],
"thread_context": [
"Posted in a consumer advocacy forum.",
"12 other users confirmed similar experiences in replies.",
],
"policy_clause": (
"Section 5.4 β€” Commercial Speech: Allegations about business practices "
"are treated as consumer opinion unless specific false factual claims "
"are made. No removal warranted for opinion."
),
"ground_truth": GroundTruth(
violation_type=ViolationType.safe,
base_severity=Severity.low,
final_severity=Severity.low,
expected_action=ActionType.allow,
required_investigation=["check_policy_clause"],
min_steps=2,
),
},
# 4 β€” US fake review allegation (β†’ flag)
{
"content": (
"{brand} paid for fake five-star reviews. I have screenshots of "
"the Fiverr orders. This is illegal false advertising."
),
"brands": ["GlowTech", "PureLeaf", "StarSelect", "NovaBrand"],
"geo": "US",
"reports": 5,
"engagement": {"likes": 112, "shares": 44, "comments": 67},
"user_history": [
"No prior violations.",
"Account age: 2 years.",
],
"thread_context": [
"Screenshots linked in post have not been verified by platform.",
"Brand's legal team sent a takedown request.",
],
"policy_clause": (
"Section 6.3 β€” Unverified Factual Claims: Posts alleging illegal conduct "
"with unverified evidence should be flagged for human review; removal "
"is not appropriate without verification."
),
"ground_truth": GroundTruth(
violation_type=ViolationType.restricted,
base_severity=Severity.medium,
final_severity=Severity.medium,
expected_action=ActionType.flag,
required_investigation=["fetch_thread_context", "check_policy_clause"],
min_steps=4,
),
},
# 5 β€” EU life-savings loss allegation (β†’ flag)
{
"content": (
"I lost my life savings because {brand} gave me illegal investment advice. "
"Their advisers are not licensed. Class action lawsuit incoming."
),
"brands": ["EuroVest", "AlphaTrade", "PrimeCapital", "ClearFund"],
"geo": "EU",
"reports": 9,
"engagement": {"likes": 204, "shares": 87, "comments": 143},
"user_history": [
"No prior violations.",
"Account age: 4 years, verified.",
],
"thread_context": [
"Post shared across three finance-related groups.",
"Regulatory body (ESMA) has an open investigation into the brand.",
],
"policy_clause": (
"Section 6.1 β€” DSA Compliance: High-reach posts alleging regulatory "
"violations require expedited human review before any action. Flagging "
"is the appropriate interim measure."
),
"ground_truth": GroundTruth(
violation_type=ViolationType.restricted,
base_severity=Severity.high,
final_severity=Severity.high,
expected_action=ActionType.flag,
required_investigation=["fetch_user_history", "fetch_thread_context", "check_policy_clause"],
min_steps=5,
),
},
# 6 β€” IN CEO cover-up allegation (β†’ escalate)
{
"content": (
"{brand}'s CEO personally ordered the cover-up of safety failures. "
"I have internal documents. The public deserves to know."
),
"brands": ["BuildSafe", "ChemIndia", "PowerGrid", "MegaCon"],
"geo": "IN",
"reports": 11,
"engagement": {"likes": 430, "shares": 215, "comments": 189},
"user_history": [
"No prior violations.",
"Account is linked to a registered journalist profile.",
],
"thread_context": [
"Documents referenced have not been independently verified.",
"Company has issued a denial; no legal action taken yet.",
"Story picked up by two regional news outlets.",
],
"policy_clause": (
"Section 7.1 β€” Whistleblower Content: Posts alleging corporate misconduct "
"by named executives with claimed documentary evidence require senior "
"moderator review before any action is taken."
),
"ground_truth": GroundTruth(
violation_type=ViolationType.restricted,
base_severity=Severity.high,
final_severity=Severity.high,
expected_action=ActionType.escalate,
required_investigation=["fetch_user_history", "fetch_thread_context", "check_policy_clause"],
min_steps=5,
),
},
# 7 β€” UK wellness cult claim (borderline β†’ flag)
{
"content": (
"{brand} is a cult disguised as a wellness company. "
"They manipulate vulnerable people into spending thousands. Avoid at all costs."
),
"brands": ["PureAura", "HealPath", "MindBloom", "TrueLight"],
"geo": "UK",
"reports": 6,
"engagement": {"likes": 78, "shares": 29, "comments": 52},
"user_history": [
"No prior violations.",
"Account age: 2 years.",
],
"thread_context": [
"Post appears in a support group for former members.",
"Multiple similar accounts describe pressure-selling tactics.",
],
"policy_clause": (
"Section 5.5 β€” Organisational Criticism: Characterising an organisation "
"as harmful or deceptive is protected opinion unless presented as "
"verifiable fact. This case requires human review given potential reach."
),
"ground_truth": GroundTruth(
violation_type=ViolationType.restricted,
base_severity=Severity.medium,
final_severity=Severity.medium,
expected_action=ActionType.flag,
required_investigation=["fetch_thread_context", "check_policy_clause"],
min_steps=4,
),
},
]
_HARD_TEMPLATES = [
# 0 β€” US vaccine-autism misinfo (viral β†’ remove)
{
"content": (
"BREAKING: Doctors are hiding the truth β€” vaccines cause autism in 1 "
"in 10 children. Share before they delete this! #VaccineHarm #TruthBomb"
),
"geo": "US",
"reports": 2,
"engagement": {"likes": 4200, "shares": 8900, "comments": 3100},
"user_history": [
"No prior violations.",
"Account age: 8 months, unverified.",
"Previously shared 3 health-related posts (no action taken).",
],
"thread_context": [
"Post re-shared from an anti-vaccine group with 500k members.",
"Multiple medical organisations have issued counter-statements.",
"Government health ministry has flagged similar content this week.",
],
"policy_clause": (
"Section 8.2 β€” Health Misinformation: Content that contradicts "
"established medical consensus on vaccine safety and is likely to "
"cause public health harm must be removed. High-engagement posts "
"receive elevated scrutiny."
),
"ground_truth": GroundTruth(
violation_type=ViolationType.misinformation,
base_severity=Severity.high,
final_severity=Severity.critical,
expected_action=ActionType.remove,
required_investigation=["fetch_thread_context", "check_policy_clause"],
min_steps=4,
),
},
# 1 β€” EU 5G-COVID conspiracy (viral β†’ remove)
{
"content": (
"5G towers are the real cause of COVID-19. The radiation weakens your "
"immune system. Governments know this. Spread awareness!"
),
"geo": "EU",
"reports": 1,
"engagement": {"likes": 6700, "shares": 12000, "comments": 4500},
"user_history": [
"Prior flag: unverified health claim (2023-09, no action taken).",
"Account age: 2 years.",
],
"thread_context": [
"Shared in 12 different community groups.",
"Fact-checking organisations have rated this claim as 'False'.",
"Local authorities received panic calls linked to this content.",
],
"policy_clause": (
"Section 8.3 β€” Crisis Misinformation: False claims linking infrastructure "
"to disease outbreaks during an active health emergency are subject to "
"immediate removal to prevent public harm."
),
"ground_truth": GroundTruth(
violation_type=ViolationType.misinformation,
base_severity=Severity.high,
final_severity=Severity.critical,
expected_action=ActionType.remove,
required_investigation=["fetch_user_history", "fetch_thread_context", "check_policy_clause"],
min_steps=5,
),
},
# 2 β€” IN natural immunity false framing (β†’ remove)
{
"content": (
"Natural immunity is 100x stronger than vaccines. Stop injecting your "
"children with untested chemicals. Real doctors agree."
),
"geo": "IN",
"reports": 3,
"engagement": {"likes": 3100, "shares": 5200, "comments": 2800},
"user_history": [
"No prior violations.",
"Account age: 1 year.",
"Shares content from known health-misinfo networks.",
],
"thread_context": [
"Posted during a regional measles outbreak.",
"Health officials have issued an alert about similar claims.",
"Multiple paediatricians have publicly debunked this framing.",
],
"policy_clause": (
"Section 8.2 β€” Health Misinformation: Subtle false framings that "
"discourage vaccination are treated as high-severity violations when "
"paired with high engagement, especially during active outbreaks."
),
"ground_truth": GroundTruth(
violation_type=ViolationType.misinformation,
base_severity=Severity.high,
final_severity=Severity.critical,
expected_action=ActionType.remove,
required_investigation=["fetch_thread_context", "check_policy_clause"],
min_steps=4,
),
},
# 3 β€” UK fluoride water conspiracy (β†’ flag)
{
"content": (
"The government is adding fluoride to drinking water to make the "
"population docile. This is mass medication without consent. Wake up! #TruthBomb"
),
"geo": "UK",
"reports": 5,
"engagement": {"likes": 870, "shares": 1400, "comments": 620},
"user_history": [
"No prior violations.",
"Account age: 3 years, verified.",
],
"thread_context": [
"Posted in response to a UK government water-quality announcement.",
"Some scientific debate around fluoride dosage does exist (context matters).",
"No active public health emergency related to this claim.",
],
"policy_clause": (
"Section 8.4 β€” Environmental Misinformation: Claims about water safety "
"require verification of scientific consensus; partial truth + conspiracy "
"framing warrants flagging rather than removal."
),
"ground_truth": GroundTruth(
violation_type=ViolationType.misinformation,
base_severity=Severity.medium,
final_severity=Severity.medium,
expected_action=ActionType.flag,
required_investigation=["fetch_user_history", "fetch_thread_context", "check_policy_clause"],
min_steps=5,
),
},
# 4 β€” US hydroxychloroquine cure claim (viral β†’ remove)
{
"content": (
"Hydroxychloroquine cures COVID-19 in 48 hours. Mainstream media is "
"suppressing this. Share this before Big Pharma deletes it! #HCQWorks"
),
"geo": "US",
"reports": 4,
"engagement": {"likes": 9800, "shares": 22000, "comments": 7600},
"user_history": [
"Prior violation: health misinfo (2021-04, post removed).",
"Account age: 5 years.",
],
"thread_context": [
"FDA has issued a specific warning about this drug for COVID treatment.",
"Post originated from a network known for coordinated health misinfo.",
"Multiple hospitals have reported patients requesting the drug based on similar posts.",
],
"policy_clause": (
"Section 8.5 β€” Dangerous Medical Advice: Posts promoting unapproved "
"treatments for serious diseases with documented public health risk "
"must be removed immediately, especially from repeat offenders."
),
"ground_truth": GroundTruth(
violation_type=ViolationType.misinformation,
base_severity=Severity.high,
final_severity=Severity.critical,
expected_action=ActionType.remove,
required_investigation=["fetch_user_history", "check_policy_clause"],
min_steps=4,
),
},
# 5 β€” EU mask brain-damage claim (β†’ remove)
{
"content": (
"New study proves masks cause oxygen deprivation and brain damage in "
"children. Governments are knowingly harming kids. Share everywhere!"
),
"geo": "EU",
"reports": 8,
"engagement": {"likes": 5400, "shares": 9100, "comments": 4200},
"user_history": [
"Prior violation: health misinfo (2022-11).",
"Account age: 4 years.",
],
"thread_context": [
"The referenced 'study' does not exist in any academic database.",
"Viral in EU parent groups ahead of a back-to-school policy update.",
"EU Health Commission has flagged this specific claim for removal.",
],
"policy_clause": (
"Section 8.2 β€” Health Misinformation: Fabricated study citations used "
"to spread health-damaging claims constitute a critical violation "
"under EU DSA crisis protocols."
),
"ground_truth": GroundTruth(
violation_type=ViolationType.misinformation,
base_severity=Severity.high,
final_severity=Severity.critical,
expected_action=ActionType.remove,
required_investigation=["fetch_thread_context", "check_policy_clause"],
min_steps=4,
),
},
# 6 β€” IN ayurvedic cancer cure claim (β†’ remove)
{
"content": (
"Ayurvedic herbs cure cancer completely. Big Pharma doesn't want you to know "
"about this cure. Stop chemotherapy β€” this works! #NaturalCure #BigPharmaLies"
),
"geo": "IN",
"reports": 6,
"engagement": {"likes": 7200, "shares": 14000, "comments": 5300},
"user_history": [
"No prior violations.",
"Account is registered to an Ayurvedic products vendor.",
],
"thread_context": [
"Indian Medical Association issued a counter-statement.",
"Post targeted at a cancer-patient support community.",
"Linked website sells products referenced in the post (financial motive).",
],
"policy_clause": (
"Section 8.6 β€” Cancer Treatment Misinformation: Posts discouraging "
"evidence-based cancer treatment in favour of unproven alternatives "
"are classified as critical violations regardless of geo context."
),
"ground_truth": GroundTruth(
violation_type=ViolationType.misinformation,
base_severity=Severity.high,
final_severity=Severity.critical,
expected_action=ActionType.remove,
required_investigation=["fetch_user_history", "check_policy_clause"],
min_steps=4,
),
},
# 7 β€” UK chemtrail mind-control conspiracy (β†’ flag)
{
"content": (
"Chemtrails contain mind-control chemicals approved by the WHO. "
"Pilots are whistleblowing internally. Share before it gets deleted! #Chemtrails"
),
"geo": "UK",
"reports": 3,
"engagement": {"likes": 1100, "shares": 2200, "comments": 890},
"user_history": [
"No prior violations.",
"Account age: 6 years, verified.",
],
"thread_context": [
"No credible whistleblower reports found in any verified source.",
"Claim is a longstanding conspiracy theory; no active health crisis linked.",
"Moderate reach β€” not yet trending nationally.",
],
"policy_clause": (
"Section 8.7 β€” Conspiracy Content: Conspiracy theories that do not "
"reference an active health emergency and have moderate (not viral) "
"engagement should be flagged for review rather than removed outright."
),
"ground_truth": GroundTruth(
violation_type=ViolationType.misinformation,
base_severity=Severity.medium,
final_severity=Severity.medium,
expected_action=ActionType.flag,
required_investigation=["fetch_user_history", "fetch_thread_context", "check_policy_clause"],
min_steps=5,
),
},
]
_TEMPLATES_BY_DIFFICULTY = {
"easy": _EASY_TEMPLATES,
"medium": _MEDIUM_TEMPLATES,
"hard": _HARD_TEMPLATES,
}
class DataGenerator:
"""Generate a deterministic (Observation, GroundTruth, hidden_context) triple."""
def generate(
self, task_config: TaskConfig
) -> tuple[Observation, GroundTruth, dict]:
"""
Returns:
observation β€” initial state shown to the agent (partial info)
ground_truth β€” hidden policy evaluation used by reward/grader
hidden β€” dict with user_history, thread_context, policy_clause
revealed progressively via investigation actions
"""
rng = random.Random(task_config.seed)
templates = _TEMPLATES_BY_DIFFICULTY[task_config.difficulty]
tmpl = templates[rng.randint(0, len(templates) - 1)]
# Fill content template slots
content = tmpl["content"]
if "{target}" in content:
content = content.format(target=rng.choice(tmpl["targets"]))
elif "{brand}" in content:
content = content.format(brand=rng.choice(tmpl["brands"]))
post_id = f"post_{uuid.UUID(int=rng.getrandbits(128))}"
user_id = f"user_{uuid.UUID(int=rng.getrandbits(128))}"
observation = Observation(
post_id=post_id,
content=content,
user_id=user_id,
reports=tmpl["reports"],
engagement=tmpl["engagement"],
geo=tmpl["geo"],
step=0,
max_steps=task_config.max_steps,
done=False,
# Hidden until revealed
user_history=None,
thread_context=None,
policy_clause=None,
violation_type=None,
)
hidden = {
"user_history": tmpl["user_history"],
"thread_context": tmpl["thread_context"],
"policy_clause": tmpl["policy_clause"],
}
return observation, tmpl["ground_truth"], hidden