Soham Banerjee
Fix campaign IDs: load dynamically from env instead of hardcoded values
e8094c5
"""
app.py — Gradio UI for ContentModerationEnv (Hugging Face Spaces)
=================================================================
Live interactive demo + API endpoint for the OpenEnv benchmark.
Tabs
----
1. Try It — step through individual scenarios
2. Campaign Mode — deterministic campaign episodes (reset(campaign_id=...))
3. Baseline — run the lexical agent over all 128 scenarios
4. API Docs — Python / shell examples
"""
import json
import sys
from pathlib import Path
import gradio as gr
SCRIPT_DIR = Path(__file__).parent.parent
sys.path.insert(0, str(SCRIPT_DIR))
from content_moderation_env import ContentModerationEnv, CampaignModerationEnv
from baseline_inference import run_baseline
# ── env singleton ──────────────────────────────────────────────────────────────
SCENARIOS_PATH = SCRIPT_DIR / "moderation_benchmark.json"
CAMPAIGNS_PATH = SCRIPT_DIR / "campaign_benchmark.json"
env = ContentModerationEnv(str(SCENARIOS_PATH), seed=42)
campaign_env = CampaignModerationEnv(str(CAMPAIGNS_PATH), seed=42)
ALL_IDS = env.scenario_ids
CAMPAIGN_IDS = campaign_env._campaign_ids
# ── helpers ───────────────────────────────────────────────────────────────────
def _fmt_state(s: dict) -> str:
lines = [f"**Text:** {s['text']}"]
if s.get("audio_transcript"):
lines.append(f"**Audio:** {s['audio_transcript']}")
if s.get("visual_tags"):
lines.append(f"**Visual tags:** {', '.join(s['visual_tags'])}")
lines.append(f"**Previous flags:** {s['previous_flags']} | **Policy:** {s['platform_policy']}")
return "\n\n".join(lines)
def _reward_bar(reward: float) -> str:
filled = int(reward * 20)
bar = "█" * filled + "░" * (20 - filled)
emoji = "✅" if reward >= 0.8 else ("🟡" if reward >= 0.4 else "❌")
return f"{emoji} [{bar}] {reward:.2f}"
# ── Tab 1: Try It ─────────────────────────────────────────────────────────────
def load_scenario(scenario_id: str):
try:
state = env.reset(scenario_id)
except Exception as e:
return f"Error: {e}", "", gr.update(visible=False)
tier = env._current_scenario["tier"]
show_sev = tier == "hard"
return _fmt_state(state), f"**Tier:** `{tier}`", gr.update(visible=show_sev)
def submit_action(scenario_id: str, label: str, action: str, severity: int, rationale: str):
try:
env.reset(scenario_id)
except Exception as e:
return f"Error resetting: {e}", ""
act_dict = {"label": label, "action": action, "severity": severity, "rationale": rationale}
try:
result = env.step(act_dict)
except Exception as e:
return f"Error in step(): {e}", ""
info = result["info"]
gt = info["ground_truth"]
bd = info["score_breakdown"]
reward = result["reward"]
out_md = f"""
### Result
{_reward_bar(reward)}
| Component | Score |
|-----------|-------|
| Label correct | `{bd.get('label_correct', 'n/a')}` |
| Action correct | `{bd.get('action_correct', 'n/a')}` |
| Severity ±1 | `{bd.get('severity_within_1', 'n/a')}` |
**Ground truth:** label=`{gt['label']}` action=`{gt['action']}` severity=`{gt.get('severity', 'n/a')}`
> {gt.get('rationale', '')}
"""
raw = json.dumps(result, indent=2, default=str)
return out_md, f"```json\n{raw}\n```"
# ── Tab 2: Baseline ───────────────────────────────────────────────────────────
def run_baseline_tab(tier_filter: str):
tf = None if tier_filter == "all" else tier_filter
results = run_baseline(tier_filter=tf, seed=42, verbose=False)
tiers = ["easy", "medium", "hard"]
rows = []
for t in tiers:
rs = [r for r in results if r["tier"] == t]
if not rs:
continue
rw = [r["reward"] for r in rs]
mn = sum(rw) / len(rw)
pct = sum(1 for r in rw if r == 1.0)
rows.append([t, len(rs), f"{mn:.3f}", pct, sum(1 for r in rw if r == 0.0)])
all_rw = [r["reward"] for r in results]
overall = sum(all_rw) / len(all_rw) if all_rw else 0.0
rows.append(["**OVERALL**", len(all_rw), f"{overall:.3f}",
sum(1 for r in all_rw if r == 1.0), sum(1 for r in all_rw if r == 0.0)])
headers = ["Tier", "N", "Mean Reward", "Perfect (1.0)", "Zero (0.0)"]
return rows, f"Baseline complete. Overall mean reward: **{overall:.3f}**"
# ── Tab 3: Campaign Detection ────────────────────────────────────────────────
def load_campaign(campaign_id=None):
"""Load a campaign scenario for the Campaign Detection tab"""
try:
state = campaign_env.reset(campaign_id=campaign_id)
except Exception as e:
return f"Error: {e}", "Failed to load campaign."
posts_md = ""
for i, p in enumerate(state.get("posts", []), 1):
posts_md += f"**Post {i}** — account: `{p.get('account_id', 'N/A')}`"
posts_md += f"  |  +{p.get('posted_at_offset_minutes', 0)} min"
posts_md += f"  |  platform: `{p.get('platform', 'unknown')}`\n\n"
posts_md += f"> {p.get('text', '')}\n\n"
if p.get("visual_tags"):
posts_md += f"*Visual signals: {', '.join(p['visual_tags'])}*\n\n"
posts_md += "---\n\n"
return (
f"**Campaign:** `{state.get('campaign_id', 'N/A')}`  |"
f"  {state.get('num_posts', 0)} posts\n",
posts_md
)
def submit_campaign(campaign_id, is_coord_str, action, reasoning):
"""Submit campaign detection decision"""
try:
campaign_env.reset(campaign_id=campaign_id)
except Exception as e:
return f"Error resetting campaign: {e}"
action_dict = {
"is_coordinated": is_coord_str == "true",
"action": action,
"reasoning": reasoning,
}
result = campaign_env.step(action_dict)
r = result.get("reward", 0.0)
info = result.get("info", {})
gt = info.get("ground_truth", {"is_coordinated": False, "correct_action": "None"})
bd = info.get("score_breakdown", {})
filled = int(max(r, 0) * 20)
bar = "█" * filled + "░" * (20 - filled)
emoji = "✅" if r >= 0.8 else ("🟡" if r >= 0.4 else "❌")
out = f"{emoji} [{bar}] {r:.2f}\n\n"
out += f"**Ground truth:** coordinated=`{gt['is_coordinated']}`"
out += f" action=`{gt['correct_action']}`\n\n"
out += f"**Score breakdown:**\n\n"
for k, v in bd.items():
out += f" - `{k}`: `{v}`\n"
return out
# ── Tab 4: API examples ───────────────────────────────────────────────────────
API_CURL = """\
# 1. Reset (load a random scenario)
STATE=$(python -c "
import json, sys
sys.path.insert(0, '.')
from content_moderation_env import ContentModerationEnv
env = ContentModerationEnv('moderation_benchmark.json', seed=42)
state = env.reset()
print(json.dumps(state, indent=2))
")
# 2. Step (submit your action)
python -c "
import json, sys
sys.path.insert(0, '.')
from content_moderation_env import ContentModerationEnv
env = ContentModerationEnv('moderation_benchmark.json', seed=42)
env.reset('scen_hard_1')
result = env.step({
'label': 'toxic',
'action': 'escalate',
'severity': 5,
'rationale': 'Coordinated physical threat.'
})
print(json.dumps(result, indent=2))
"
"""
API_PYTHON = """\
from content_moderation_env import ContentModerationEnv
# Instantiate
env = ContentModerationEnv("moderation_benchmark.json", seed=42)
print(f"Loaded {env.num_scenarios} scenarios")
# Episode
state = env.reset() # random
# state = env.reset("scen_hard_1") # specific
print(state["text"])
result = env.step({
"label": "toxic",
"action": "escalate",
"severity": 4,
"rationale": "Threat indicators detected."
})
print(f"Reward: {result['reward']}")
print(f"Breakdown: {result['info']['score_breakdown']}")
"""
# ── Build UI ──────────────────────────────────────────────────────────────────
THEME = gr.themes.Soft(
primary_hue="emerald",
neutral_hue="zinc",
font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"],
).set(
button_primary_background_fill="*primary_500",
button_primary_background_fill_hover="*primary_600",
block_radius="12px",
block_border_width="1px",
block_border_color="*neutral_200",
block_border_color_dark="*neutral_700",
block_background_fill="*background_fill_secondary",
)
CSS = """
.gradio-container {
max-width: 1100px !important;
margin: 0 auto;
}
.header {
text-align: center;
padding: 3rem 0 2rem;
margin-bottom: 2rem;
background: linear-gradient(135deg, rgba(16,185,129,0.1) 0%, rgba(59,130,246,0.1) 100%);
border-radius: 12px;
border: 1px solid rgba(0,0,0,0.05);
}
.dark .header {
background: linear-gradient(135deg, rgba(16,185,129,0.05) 0%, rgba(59,130,246,0.05) 100%);
border-color: rgba(255,255,255,0.05);
}
.action-btn {
font-weight: 600 !important;
font-size: 1.1rem !important;
padding: 0.75rem !important;
box-shadow: 0 4px 6px -1px rgba(0,0,0,0.1) !important;
transition: transform 0.1s ease !important;
}
.action-btn:hover {
transform: translateY(-1px) !important;
}
.observation-card {
background: white;
padding: 1.5rem;
border-radius: 12px;
border: 1px solid #e5e7eb;
box-shadow: 0 1px 3px 0 rgba(0,0,0,0.05);
}
.dark .observation-card {
background: #1f2937;
border-color: #374151;
}
"""
with gr.Blocks(theme=THEME, css=CSS, title="ContentModerationEnv — OpenEnv Benchmark") as demo:
with gr.Column(elem_classes=["header"]):
gr.Markdown("""
# 🛡️ ContentModerationEnv
### An OpenEnv benchmark for evaluating AI content moderation agents
> **128 scenarios** across 3 difficulty tiers (easy / medium / hard) ·
> **Partial-credit scoring** (0.0 – 1.0) · **Full OpenEnv API**
> `reset()` · `step()` · `state()` · `appeal()` · campaign mode · adversarial scenarios
""")
with gr.Tabs():
# ── Tab 1: Try It ─────────────────────────────────────────────────────
with gr.Tab("🎮 Try It"):
with gr.Row():
with gr.Column(scale=1):
sid_dd = gr.Dropdown(
choices=ALL_IDS,
value=ALL_IDS[0],
label="Select Scenario ID to Load",
interactive=True,
)
tier_md = gr.Markdown()
with gr.Column(scale=2):
state_md = gr.Markdown("**Select a scenario** from the dropdown to begin →", label="Observation", elem_classes=["observation-card"])
gr.Markdown("### Your moderation decision")
with gr.Row():
label_dd = gr.Dropdown(
choices=["safe", "toxic", "spam", "misleading"],
value="safe", label="Label"
)
action_dd = gr.Dropdown(
choices=["allow", "warn", "remove", "shadowban", "escalate"],
value="allow", label="Action"
)
sev_slider = gr.Slider(1, 5, value=3, step=1,
label="Severity (hard tier)", visible=False)
rationale_tb = gr.Textbox(label="Rationale (optional)", lines=2,
placeholder="Brief explanation …")
step_btn = gr.Button("Submit → env.step()", variant="primary", elem_classes=["action-btn"])
result_md = gr.Markdown()
result_raw = gr.Markdown()
sid_dd.change(
load_scenario,
inputs=[sid_dd],
outputs=[state_md, tier_md, sev_slider],
)
step_btn.click(
submit_action,
inputs=[sid_dd, label_dd, action_dd, sev_slider, rationale_tb],
outputs=[result_md, result_raw],
)
# ── Tab 2: Baseline ───────────────────────────────────────────────────
with gr.Tab("📊 Baseline"):
gr.Markdown("""
### Lexical Rule-Based Baseline
A deterministic, no-LLM agent that uses regex patterns to classify content
and policy-based rules to choose an action. Run it to verify the environment
and as a comparison floor for LLM agents.
""")
tier_radio = gr.Radio(
choices=["all", "easy", "medium", "hard"],
value="all", label="Tier to evaluate"
)
run_btn = gr.Button("Run Baseline", variant="primary")
status_md = gr.Markdown()
result_tbl = gr.Dataframe(
headers=["Tier", "N", "Mean Reward", "Perfect (1.0)", "Zero (0.0)"],
interactive=False,
)
run_btn.click(
run_baseline_tab,
inputs=[tier_radio],
outputs=[result_tbl, status_md],
)
# ── Tab 3: API Docs ───────────────────────────────────────────────────
with gr.Tab("📖 API Docs"):
gr.Markdown("""
## Quick Start
```bash
git clone https://huggingface.co/spaces/sohambanerjee/content-moderation-env
cd content-moderation-env
pip install -r requirements.txt
```
### Python API
""")
gr.Code(API_PYTHON, language="python", label="Python usage")
gr.Markdown("### Shell / curl equivalent")
gr.Textbox(API_CURL, label="Shell usage", lines=20, interactive=False)
gr.Markdown("""
## Action Space
| Field | Type | Required | Values |
|-------|------|----------|--------|
| `label` | str | ✅ | `safe` · `toxic` · `spam` · `misleading` |
| `action` | str | ✅ | `allow` · `warn` · `remove` · `shadowban` · `escalate` |
| `severity` | int 1-5 | ❌ (scored in hard) | `1` (mild) → `5` (critical) |
| `rationale` | str | ❌ | Free text explanation |
## Reward Function
| Tier | Label | Action | Severity ±1 |
|------|-------|--------|-------------|
| easy / medium | 0.5 | 0.5 | — |
| hard | 0.4 | 0.4 | 0.2 |
## Baseline Scores (lexical agent, seed=42)
| Tier | N | Mean Reward |
|------|---|-------------|
| easy | 52 | 0.375 |
| medium | 25 | 0.460 |
| hard | 51 | 0.144 |
| **overall** | **128** | **0.300** |
""")
# ── Tab 4: Campaign Detection ────────────────────────────────────
with gr.Tab("🎯 Campaign Detection"):
gr.Markdown("""
### Coordinated Campaign Detection
Review **3 posts from different accounts** and determine whether they form
a coordinated inauthentic behavior campaign.
| Field | Description |
|-------|-------------|
| `is_coordinated` | True if posts are from a coordinated operation |
| `action` | `allow` / `remove` / `shadowban` / `escalate` |
| Reward | +0.5 coordination detected · +0.5 action correct · -0.2 false positive |
""")
with gr.Row():
with gr.Column(scale=1):
camp_sid_dd = gr.Dropdown(
choices=CAMPAIGN_IDS,
value=CAMPAIGN_IDS[0],
label="Select Campaign to Load",
interactive=True,
)
camp_type_md = gr.Markdown()
with gr.Column(scale=2):
camp_posts_md = gr.Markdown("**Select a campaign** from the dropdown to begin →", elem_classes=["observation-card"])
with gr.Row():
is_coord_dd = gr.Dropdown(
choices=["true", "false"],
value="false",
label="Is Coordinated?"
)
camp_action_dd = gr.Dropdown(
choices=["allow", "remove", "shadowban", "escalate"],
value="allow",
label="Action"
)
reasoning_tb = gr.Textbox(
label="Reasoning (optional)", lines=2,
placeholder="Explain your coordination assessment..."
)
camp_submit_btn = gr.Button(
"Submit → campaign_env.step()", variant="primary", elem_classes=["action-btn"]
)
camp_result_md = gr.Markdown()
camp_sid_dd.change(
load_campaign,
inputs=[camp_sid_dd],
outputs=[camp_type_md, camp_posts_md]
)
camp_submit_btn.click(
submit_campaign,
inputs=[camp_sid_dd, is_coord_dd, camp_action_dd, reasoning_tb],
outputs=[camp_result_md]
)
gr.Markdown("""
---
<p style="text-align:center; color: #888; font-size: 0.85rem;">
ContentModerationEnv v2.0 · OpenEnv · MIT License
</p>
""")
# ── OpenEnv HTTP API routes ───────────────────────────────────────────────────
# Added to the Gradio FastAPI instance so POST /reset returns HTTP 200,
# satisfying the HF Space validator check.
from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse
import uvicorn
app = FastAPI()
@app.post("/reset")
@app.post("/reset/")
async def api_reset(request: Request):
"""POST /reset → initial observation, HTTP 200"""
try:
body: dict = {}
if request.headers.get("content-type", "").startswith("application/json"):
body = await request.json()
except Exception:
body = {}
scenario_id = body.get("scenario_id", None) if isinstance(body, dict) else None
try:
state = env.reset(scenario_id=scenario_id)
return JSONResponse({"state": state, "status": "ok"})
except Exception as exc:
return JSONResponse({"error": str(exc)}, status_code=400)
@app.post("/step")
@app.post("/step/")
async def api_step(request: Request):
"""POST /step → takes action dict, returns result"""
try:
body: dict = await request.json()
except Exception:
body = {}
action = body.get("action", {}) if isinstance(body, dict) else {}
try:
result = env.step(action)
return JSONResponse(result)
except Exception as exc:
return JSONResponse({"error": str(exc)}, status_code=400)
@app.get("/state")
@app.get("/state/")
async def api_state():
"""GET /state → current environment state"""
try:
state = env.state()
return JSONResponse({"state": state, "status": "ok"})
except Exception as exc:
return JSONResponse({"error": str(exc)}, status_code=400)
app = gr.mount_gradio_app(app, demo, path="/")
def main():
uvicorn.run("server.app:app", host="0.0.0.0", port=7860)
if __name__ == "__main__":
main()