Spaces:
Sleeping
Sleeping
| """ | |
| app.py — Gradio UI for ContentModerationEnv (Hugging Face Spaces) | |
| ================================================================= | |
| Live interactive demo + API endpoint for the OpenEnv benchmark. | |
| Tabs | |
| ---- | |
| 1. Try It — step through individual scenarios | |
| 2. Campaign Mode — deterministic campaign episodes (reset(campaign_id=...)) | |
| 3. Baseline — run the lexical agent over all 128 scenarios | |
| 4. API Docs — Python / shell examples | |
| """ | |
| import json | |
| import sys | |
| from pathlib import Path | |
| import gradio as gr | |
| SCRIPT_DIR = Path(__file__).parent.parent | |
| sys.path.insert(0, str(SCRIPT_DIR)) | |
| from content_moderation_env import ContentModerationEnv, CampaignModerationEnv | |
| from baseline_inference import run_baseline | |
| # ── env singleton ────────────────────────────────────────────────────────────── | |
| SCENARIOS_PATH = SCRIPT_DIR / "moderation_benchmark.json" | |
| CAMPAIGNS_PATH = SCRIPT_DIR / "campaign_benchmark.json" | |
| env = ContentModerationEnv(str(SCENARIOS_PATH), seed=42) | |
| campaign_env = CampaignModerationEnv(str(CAMPAIGNS_PATH), seed=42) | |
| ALL_IDS = env.scenario_ids | |
| CAMPAIGN_IDS = campaign_env._campaign_ids | |
| # ── helpers ─────────────────────────────────────────────────────────────────── | |
| def _fmt_state(s: dict) -> str: | |
| lines = [f"**Text:** {s['text']}"] | |
| if s.get("audio_transcript"): | |
| lines.append(f"**Audio:** {s['audio_transcript']}") | |
| if s.get("visual_tags"): | |
| lines.append(f"**Visual tags:** {', '.join(s['visual_tags'])}") | |
| lines.append(f"**Previous flags:** {s['previous_flags']} | **Policy:** {s['platform_policy']}") | |
| return "\n\n".join(lines) | |
| def _reward_bar(reward: float) -> str: | |
| filled = int(reward * 20) | |
| bar = "█" * filled + "░" * (20 - filled) | |
| emoji = "✅" if reward >= 0.8 else ("🟡" if reward >= 0.4 else "❌") | |
| return f"{emoji} [{bar}] {reward:.2f}" | |
| # ── Tab 1: Try It ───────────────────────────────────────────────────────────── | |
| def load_scenario(scenario_id: str): | |
| try: | |
| state = env.reset(scenario_id) | |
| except Exception as e: | |
| return f"Error: {e}", "", gr.update(visible=False) | |
| tier = env._current_scenario["tier"] | |
| show_sev = tier == "hard" | |
| return _fmt_state(state), f"**Tier:** `{tier}`", gr.update(visible=show_sev) | |
| def submit_action(scenario_id: str, label: str, action: str, severity: int, rationale: str): | |
| try: | |
| env.reset(scenario_id) | |
| except Exception as e: | |
| return f"Error resetting: {e}", "" | |
| act_dict = {"label": label, "action": action, "severity": severity, "rationale": rationale} | |
| try: | |
| result = env.step(act_dict) | |
| except Exception as e: | |
| return f"Error in step(): {e}", "" | |
| info = result["info"] | |
| gt = info["ground_truth"] | |
| bd = info["score_breakdown"] | |
| reward = result["reward"] | |
| out_md = f""" | |
| ### Result | |
| {_reward_bar(reward)} | |
| | Component | Score | | |
| |-----------|-------| | |
| | Label correct | `{bd.get('label_correct', 'n/a')}` | | |
| | Action correct | `{bd.get('action_correct', 'n/a')}` | | |
| | Severity ±1 | `{bd.get('severity_within_1', 'n/a')}` | | |
| **Ground truth:** label=`{gt['label']}` action=`{gt['action']}` severity=`{gt.get('severity', 'n/a')}` | |
| > {gt.get('rationale', '')} | |
| """ | |
| raw = json.dumps(result, indent=2, default=str) | |
| return out_md, f"```json\n{raw}\n```" | |
| # ── Tab 2: Baseline ─────────────────────────────────────────────────────────── | |
| def run_baseline_tab(tier_filter: str): | |
| tf = None if tier_filter == "all" else tier_filter | |
| results = run_baseline(tier_filter=tf, seed=42, verbose=False) | |
| tiers = ["easy", "medium", "hard"] | |
| rows = [] | |
| for t in tiers: | |
| rs = [r for r in results if r["tier"] == t] | |
| if not rs: | |
| continue | |
| rw = [r["reward"] for r in rs] | |
| mn = sum(rw) / len(rw) | |
| pct = sum(1 for r in rw if r == 1.0) | |
| rows.append([t, len(rs), f"{mn:.3f}", pct, sum(1 for r in rw if r == 0.0)]) | |
| all_rw = [r["reward"] for r in results] | |
| overall = sum(all_rw) / len(all_rw) if all_rw else 0.0 | |
| rows.append(["**OVERALL**", len(all_rw), f"{overall:.3f}", | |
| sum(1 for r in all_rw if r == 1.0), sum(1 for r in all_rw if r == 0.0)]) | |
| headers = ["Tier", "N", "Mean Reward", "Perfect (1.0)", "Zero (0.0)"] | |
| return rows, f"Baseline complete. Overall mean reward: **{overall:.3f}**" | |
| # ── Tab 3: Campaign Detection ──────────────────────────────────────────────── | |
| def load_campaign(campaign_id=None): | |
| """Load a campaign scenario for the Campaign Detection tab""" | |
| try: | |
| state = campaign_env.reset(campaign_id=campaign_id) | |
| except Exception as e: | |
| return f"Error: {e}", "Failed to load campaign." | |
| posts_md = "" | |
| for i, p in enumerate(state.get("posts", []), 1): | |
| posts_md += f"**Post {i}** — account: `{p.get('account_id', 'N/A')}`" | |
| posts_md += f" | +{p.get('posted_at_offset_minutes', 0)} min" | |
| posts_md += f" | platform: `{p.get('platform', 'unknown')}`\n\n" | |
| posts_md += f"> {p.get('text', '')}\n\n" | |
| if p.get("visual_tags"): | |
| posts_md += f"*Visual signals: {', '.join(p['visual_tags'])}*\n\n" | |
| posts_md += "---\n\n" | |
| return ( | |
| f"**Campaign:** `{state.get('campaign_id', 'N/A')}` |" | |
| f" {state.get('num_posts', 0)} posts\n", | |
| posts_md | |
| ) | |
| def submit_campaign(campaign_id, is_coord_str, action, reasoning): | |
| """Submit campaign detection decision""" | |
| try: | |
| campaign_env.reset(campaign_id=campaign_id) | |
| except Exception as e: | |
| return f"Error resetting campaign: {e}" | |
| action_dict = { | |
| "is_coordinated": is_coord_str == "true", | |
| "action": action, | |
| "reasoning": reasoning, | |
| } | |
| result = campaign_env.step(action_dict) | |
| r = result.get("reward", 0.0) | |
| info = result.get("info", {}) | |
| gt = info.get("ground_truth", {"is_coordinated": False, "correct_action": "None"}) | |
| bd = info.get("score_breakdown", {}) | |
| filled = int(max(r, 0) * 20) | |
| bar = "█" * filled + "░" * (20 - filled) | |
| emoji = "✅" if r >= 0.8 else ("🟡" if r >= 0.4 else "❌") | |
| out = f"{emoji} [{bar}] {r:.2f}\n\n" | |
| out += f"**Ground truth:** coordinated=`{gt['is_coordinated']}`" | |
| out += f" action=`{gt['correct_action']}`\n\n" | |
| out += f"**Score breakdown:**\n\n" | |
| for k, v in bd.items(): | |
| out += f" - `{k}`: `{v}`\n" | |
| return out | |
| # ── Tab 4: API examples ─────────────────────────────────────────────────────── | |
| API_CURL = """\ | |
| # 1. Reset (load a random scenario) | |
| STATE=$(python -c " | |
| import json, sys | |
| sys.path.insert(0, '.') | |
| from content_moderation_env import ContentModerationEnv | |
| env = ContentModerationEnv('moderation_benchmark.json', seed=42) | |
| state = env.reset() | |
| print(json.dumps(state, indent=2)) | |
| ") | |
| # 2. Step (submit your action) | |
| python -c " | |
| import json, sys | |
| sys.path.insert(0, '.') | |
| from content_moderation_env import ContentModerationEnv | |
| env = ContentModerationEnv('moderation_benchmark.json', seed=42) | |
| env.reset('scen_hard_1') | |
| result = env.step({ | |
| 'label': 'toxic', | |
| 'action': 'escalate', | |
| 'severity': 5, | |
| 'rationale': 'Coordinated physical threat.' | |
| }) | |
| print(json.dumps(result, indent=2)) | |
| " | |
| """ | |
| API_PYTHON = """\ | |
| from content_moderation_env import ContentModerationEnv | |
| # Instantiate | |
| env = ContentModerationEnv("moderation_benchmark.json", seed=42) | |
| print(f"Loaded {env.num_scenarios} scenarios") | |
| # Episode | |
| state = env.reset() # random | |
| # state = env.reset("scen_hard_1") # specific | |
| print(state["text"]) | |
| result = env.step({ | |
| "label": "toxic", | |
| "action": "escalate", | |
| "severity": 4, | |
| "rationale": "Threat indicators detected." | |
| }) | |
| print(f"Reward: {result['reward']}") | |
| print(f"Breakdown: {result['info']['score_breakdown']}") | |
| """ | |
| # ── Build UI ────────────────────────────────────────────────────────────────── | |
| THEME = gr.themes.Soft( | |
| primary_hue="emerald", | |
| neutral_hue="zinc", | |
| font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"], | |
| ).set( | |
| button_primary_background_fill="*primary_500", | |
| button_primary_background_fill_hover="*primary_600", | |
| block_radius="12px", | |
| block_border_width="1px", | |
| block_border_color="*neutral_200", | |
| block_border_color_dark="*neutral_700", | |
| block_background_fill="*background_fill_secondary", | |
| ) | |
| CSS = """ | |
| .gradio-container { | |
| max-width: 1100px !important; | |
| margin: 0 auto; | |
| } | |
| .header { | |
| text-align: center; | |
| padding: 3rem 0 2rem; | |
| margin-bottom: 2rem; | |
| background: linear-gradient(135deg, rgba(16,185,129,0.1) 0%, rgba(59,130,246,0.1) 100%); | |
| border-radius: 12px; | |
| border: 1px solid rgba(0,0,0,0.05); | |
| } | |
| .dark .header { | |
| background: linear-gradient(135deg, rgba(16,185,129,0.05) 0%, rgba(59,130,246,0.05) 100%); | |
| border-color: rgba(255,255,255,0.05); | |
| } | |
| .action-btn { | |
| font-weight: 600 !important; | |
| font-size: 1.1rem !important; | |
| padding: 0.75rem !important; | |
| box-shadow: 0 4px 6px -1px rgba(0,0,0,0.1) !important; | |
| transition: transform 0.1s ease !important; | |
| } | |
| .action-btn:hover { | |
| transform: translateY(-1px) !important; | |
| } | |
| .observation-card { | |
| background: white; | |
| padding: 1.5rem; | |
| border-radius: 12px; | |
| border: 1px solid #e5e7eb; | |
| box-shadow: 0 1px 3px 0 rgba(0,0,0,0.05); | |
| } | |
| .dark .observation-card { | |
| background: #1f2937; | |
| border-color: #374151; | |
| } | |
| """ | |
| with gr.Blocks(theme=THEME, css=CSS, title="ContentModerationEnv — OpenEnv Benchmark") as demo: | |
| with gr.Column(elem_classes=["header"]): | |
| gr.Markdown(""" | |
| # 🛡️ ContentModerationEnv | |
| ### An OpenEnv benchmark for evaluating AI content moderation agents | |
| > **128 scenarios** across 3 difficulty tiers (easy / medium / hard) · | |
| > **Partial-credit scoring** (0.0 – 1.0) · **Full OpenEnv API** | |
| > `reset()` · `step()` · `state()` · `appeal()` · campaign mode · adversarial scenarios | |
| """) | |
| with gr.Tabs(): | |
| # ── Tab 1: Try It ───────────────────────────────────────────────────── | |
| with gr.Tab("🎮 Try It"): | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| sid_dd = gr.Dropdown( | |
| choices=ALL_IDS, | |
| value=ALL_IDS[0], | |
| label="Select Scenario ID to Load", | |
| interactive=True, | |
| ) | |
| tier_md = gr.Markdown() | |
| with gr.Column(scale=2): | |
| state_md = gr.Markdown("**Select a scenario** from the dropdown to begin →", label="Observation", elem_classes=["observation-card"]) | |
| gr.Markdown("### Your moderation decision") | |
| with gr.Row(): | |
| label_dd = gr.Dropdown( | |
| choices=["safe", "toxic", "spam", "misleading"], | |
| value="safe", label="Label" | |
| ) | |
| action_dd = gr.Dropdown( | |
| choices=["allow", "warn", "remove", "shadowban", "escalate"], | |
| value="allow", label="Action" | |
| ) | |
| sev_slider = gr.Slider(1, 5, value=3, step=1, | |
| label="Severity (hard tier)", visible=False) | |
| rationale_tb = gr.Textbox(label="Rationale (optional)", lines=2, | |
| placeholder="Brief explanation …") | |
| step_btn = gr.Button("Submit → env.step()", variant="primary", elem_classes=["action-btn"]) | |
| result_md = gr.Markdown() | |
| result_raw = gr.Markdown() | |
| sid_dd.change( | |
| load_scenario, | |
| inputs=[sid_dd], | |
| outputs=[state_md, tier_md, sev_slider], | |
| ) | |
| step_btn.click( | |
| submit_action, | |
| inputs=[sid_dd, label_dd, action_dd, sev_slider, rationale_tb], | |
| outputs=[result_md, result_raw], | |
| ) | |
| # ── Tab 2: Baseline ─────────────────────────────────────────────────── | |
| with gr.Tab("📊 Baseline"): | |
| gr.Markdown(""" | |
| ### Lexical Rule-Based Baseline | |
| A deterministic, no-LLM agent that uses regex patterns to classify content | |
| and policy-based rules to choose an action. Run it to verify the environment | |
| and as a comparison floor for LLM agents. | |
| """) | |
| tier_radio = gr.Radio( | |
| choices=["all", "easy", "medium", "hard"], | |
| value="all", label="Tier to evaluate" | |
| ) | |
| run_btn = gr.Button("Run Baseline", variant="primary") | |
| status_md = gr.Markdown() | |
| result_tbl = gr.Dataframe( | |
| headers=["Tier", "N", "Mean Reward", "Perfect (1.0)", "Zero (0.0)"], | |
| interactive=False, | |
| ) | |
| run_btn.click( | |
| run_baseline_tab, | |
| inputs=[tier_radio], | |
| outputs=[result_tbl, status_md], | |
| ) | |
| # ── Tab 3: API Docs ─────────────────────────────────────────────────── | |
| with gr.Tab("📖 API Docs"): | |
| gr.Markdown(""" | |
| ## Quick Start | |
| ```bash | |
| git clone https://huggingface.co/spaces/sohambanerjee/content-moderation-env | |
| cd content-moderation-env | |
| pip install -r requirements.txt | |
| ``` | |
| ### Python API | |
| """) | |
| gr.Code(API_PYTHON, language="python", label="Python usage") | |
| gr.Markdown("### Shell / curl equivalent") | |
| gr.Textbox(API_CURL, label="Shell usage", lines=20, interactive=False) | |
| gr.Markdown(""" | |
| ## Action Space | |
| | Field | Type | Required | Values | | |
| |-------|------|----------|--------| | |
| | `label` | str | ✅ | `safe` · `toxic` · `spam` · `misleading` | | |
| | `action` | str | ✅ | `allow` · `warn` · `remove` · `shadowban` · `escalate` | | |
| | `severity` | int 1-5 | ❌ (scored in hard) | `1` (mild) → `5` (critical) | | |
| | `rationale` | str | ❌ | Free text explanation | | |
| ## Reward Function | |
| | Tier | Label | Action | Severity ±1 | | |
| |------|-------|--------|-------------| | |
| | easy / medium | 0.5 | 0.5 | — | | |
| | hard | 0.4 | 0.4 | 0.2 | | |
| ## Baseline Scores (lexical agent, seed=42) | |
| | Tier | N | Mean Reward | | |
| |------|---|-------------| | |
| | easy | 52 | 0.375 | | |
| | medium | 25 | 0.460 | | |
| | hard | 51 | 0.144 | | |
| | **overall** | **128** | **0.300** | | |
| """) | |
| # ── Tab 4: Campaign Detection ──────────────────────────────────── | |
| with gr.Tab("🎯 Campaign Detection"): | |
| gr.Markdown(""" | |
| ### Coordinated Campaign Detection | |
| Review **3 posts from different accounts** and determine whether they form | |
| a coordinated inauthentic behavior campaign. | |
| | Field | Description | | |
| |-------|-------------| | |
| | `is_coordinated` | True if posts are from a coordinated operation | | |
| | `action` | `allow` / `remove` / `shadowban` / `escalate` | | |
| | Reward | +0.5 coordination detected · +0.5 action correct · -0.2 false positive | | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| camp_sid_dd = gr.Dropdown( | |
| choices=CAMPAIGN_IDS, | |
| value=CAMPAIGN_IDS[0], | |
| label="Select Campaign to Load", | |
| interactive=True, | |
| ) | |
| camp_type_md = gr.Markdown() | |
| with gr.Column(scale=2): | |
| camp_posts_md = gr.Markdown("**Select a campaign** from the dropdown to begin →", elem_classes=["observation-card"]) | |
| with gr.Row(): | |
| is_coord_dd = gr.Dropdown( | |
| choices=["true", "false"], | |
| value="false", | |
| label="Is Coordinated?" | |
| ) | |
| camp_action_dd = gr.Dropdown( | |
| choices=["allow", "remove", "shadowban", "escalate"], | |
| value="allow", | |
| label="Action" | |
| ) | |
| reasoning_tb = gr.Textbox( | |
| label="Reasoning (optional)", lines=2, | |
| placeholder="Explain your coordination assessment..." | |
| ) | |
| camp_submit_btn = gr.Button( | |
| "Submit → campaign_env.step()", variant="primary", elem_classes=["action-btn"] | |
| ) | |
| camp_result_md = gr.Markdown() | |
| camp_sid_dd.change( | |
| load_campaign, | |
| inputs=[camp_sid_dd], | |
| outputs=[camp_type_md, camp_posts_md] | |
| ) | |
| camp_submit_btn.click( | |
| submit_campaign, | |
| inputs=[camp_sid_dd, is_coord_dd, camp_action_dd, reasoning_tb], | |
| outputs=[camp_result_md] | |
| ) | |
| gr.Markdown(""" | |
| --- | |
| <p style="text-align:center; color: #888; font-size: 0.85rem;"> | |
| ContentModerationEnv v2.0 · OpenEnv · MIT License | |
| </p> | |
| """) | |
| # ── OpenEnv HTTP API routes ─────────────────────────────────────────────────── | |
| # Added to the Gradio FastAPI instance so POST /reset returns HTTP 200, | |
| # satisfying the HF Space validator check. | |
| from fastapi import FastAPI, Request | |
| from fastapi.responses import JSONResponse | |
| import uvicorn | |
| app = FastAPI() | |
| async def api_reset(request: Request): | |
| """POST /reset → initial observation, HTTP 200""" | |
| try: | |
| body: dict = {} | |
| if request.headers.get("content-type", "").startswith("application/json"): | |
| body = await request.json() | |
| except Exception: | |
| body = {} | |
| scenario_id = body.get("scenario_id", None) if isinstance(body, dict) else None | |
| try: | |
| state = env.reset(scenario_id=scenario_id) | |
| return JSONResponse({"state": state, "status": "ok"}) | |
| except Exception as exc: | |
| return JSONResponse({"error": str(exc)}, status_code=400) | |
| async def api_step(request: Request): | |
| """POST /step → takes action dict, returns result""" | |
| try: | |
| body: dict = await request.json() | |
| except Exception: | |
| body = {} | |
| action = body.get("action", {}) if isinstance(body, dict) else {} | |
| try: | |
| result = env.step(action) | |
| return JSONResponse(result) | |
| except Exception as exc: | |
| return JSONResponse({"error": str(exc)}, status_code=400) | |
| async def api_state(): | |
| """GET /state → current environment state""" | |
| try: | |
| state = env.state() | |
| return JSONResponse({"state": state, "status": "ok"}) | |
| except Exception as exc: | |
| return JSONResponse({"error": str(exc)}, status_code=400) | |
| app = gr.mount_gradio_app(app, demo, path="/") | |
| def main(): | |
| uvicorn.run("server.app:app", host="0.0.0.0", port=7860) | |
| if __name__ == "__main__": | |
| main() | |