Commit ·
350500c
0
Parent(s):
Initial commit: Meta Ad-Policy Sandbox
Browse files- .dockerignore +3 -0
- README.md +29 -0
- dockerfile +20 -0
- inference.py +105 -0
- main.py +16 -0
- openenv.yaml +20 -0
- pyproject.toml +15 -0
- requirements.txt +5 -0
- src/.gitignore +0 -0
- src/__init__.py +0 -0
- src/__pycache__/__init__.cpython-313.pyc +0 -0
- src/__pycache__/environment.cpython-313.pyc +0 -0
- src/__pycache__/generator.cpython-313.pyc +0 -0
- src/__pycache__/models.cpython-313.pyc +0 -0
- src/environment.py +99 -0
- src/generator.py +97 -0
- src/models.py +28 -0
- test_env.py +57 -0
- validate.sh +48 -0
.dockerignore
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
venv/
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.pyc
|
README.md
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 🛡️ Meta Ad-Policy RL Sandbox
|
| 2 |
+
|
| 3 |
+
A custom, bleeding-edge Reinforcement Learning environment built for the Meta Ad-Policy Hackathon. This sandbox evaluates the ability of Vision-Language Models (VLMs) and LLMs to act as autonomous ad moderators, navigating complex policy violations, multimodal traps, and illegal targeting.
|
| 4 |
+
|
| 5 |
+
## 🚀 Core Features
|
| 6 |
+
* **OpenEnv 0.2.3 Compliant:** Fully implements the latest Meta OpenEnv specifications, including Pydantic `StepResult` state serialization and `/step` & `/reset` API endpoints.
|
| 7 |
+
* **Reward Shaping:** Implements a strict `-0.05` step penalty to force the AI agent to optimize tool usage and prevent infinite analysis loops.
|
| 8 |
+
* **Multimodal Traps:** Tests the limits of VLMs by presenting ads where the text is benign, but the visual elements contain severe policy violations.
|
| 9 |
+
* **Containerized Infrastructure:** Fully Dockerized and highly lightweight, easily running under the 2 vCPU / 8GB RAM hackathon constraints.
|
| 10 |
+
|
| 11 |
+
## 📋 Evaluation Tasks
|
| 12 |
+
The environment natively supports 4 distinct adversarial tasks, loadable via the `task_id` parameter:
|
| 13 |
+
1. `task_1_healthcare`: Evaluates ads for unapproved medical claims, pharmaceuticals, and subtle dog whistles.
|
| 14 |
+
2. `task_2_financial`: Evaluates ads for predatory financial services, scams, and high-pressure tactics.
|
| 15 |
+
3. `task_3_multimodal`: Detects policy violations hidden entirely within visual elements that bypass standard NLP text filters.
|
| 16 |
+
4. `task_4_targeting`: Identifies illegal demographic targeting (e.g., adult financial services targeting minors).
|
| 17 |
+
|
| 18 |
+
## 🛠️ Available Agent Tools
|
| 19 |
+
The environment exposes the following action space to the evaluating LLM:
|
| 20 |
+
* `analyze_image`: Request VLM context for visual elements.
|
| 21 |
+
* `request_landing_page`: Extract simulated URL endpoints.
|
| 22 |
+
* `request_id_verification`: Check advertiser trust scores.
|
| 23 |
+
* `approve` / `reject`: Terminal actions.
|
| 24 |
+
|
| 25 |
+
## 🚦 Quick Start (Local)
|
| 26 |
+
|
| 27 |
+
**1. Build the Docker Image**
|
| 28 |
+
```bash
|
| 29 |
+
docker build -t meta-ad-sandbox .
|
dockerfile
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Use a lightweight Python image
|
| 2 |
+
FROM python:3.11-slim
|
| 3 |
+
|
| 4 |
+
# Set the working directory
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
# Copy requirements first (makes builds faster)
|
| 8 |
+
COPY requirements.txt .
|
| 9 |
+
|
| 10 |
+
# Install dependencies
|
| 11 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 12 |
+
|
| 13 |
+
# Copy the rest of your code
|
| 14 |
+
COPY . .
|
| 15 |
+
|
| 16 |
+
# Expose the port Uvicorn uses
|
| 17 |
+
EXPOSE 8000
|
| 18 |
+
|
| 19 |
+
# Start the server
|
| 20 |
+
CMD ["python", "main.py"]
|
inference.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import requests
|
| 4 |
+
from openai import OpenAI
|
| 5 |
+
|
| 6 |
+
# 1. 🚨 MANDATORY VARIABLES EXACTLY AS REQUESTED BY SCALAR
|
| 7 |
+
API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
|
| 8 |
+
API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY", "dummy_local_token")
|
| 9 |
+
MODEL_NAME = os.getenv("MODEL_NAME", "meta-llama/Llama-3-70b-chat-hf")
|
| 10 |
+
|
| 11 |
+
ENV_URL = "http://localhost:8000"
|
| 12 |
+
MAX_STEPS = 10
|
| 13 |
+
|
| 14 |
+
# 2. MANDATORY: Use OpenAI Client pointed at the HF Router
|
| 15 |
+
client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
|
| 16 |
+
|
| 17 |
+
# The exact tasks defined in your openenv.yaml
|
| 18 |
+
TASKS = [
|
| 19 |
+
"task_1_healthcare",
|
| 20 |
+
"task_2_financial",
|
| 21 |
+
"task_3_multimodal",
|
| 22 |
+
"task_4_targeting"
|
| 23 |
+
]
|
| 24 |
+
|
| 25 |
+
def get_llm_action(observation_data):
|
| 26 |
+
"""Asks the LLM what action to take based on the ad observation."""
|
| 27 |
+
|
| 28 |
+
system_prompt = """You are an expert Meta Ad-Policy Moderator AI.
|
| 29 |
+
Evaluate the ad and output a decision. Using tools costs -0.05 points, so be efficient.
|
| 30 |
+
|
| 31 |
+
AVAILABLE ACTIONS:
|
| 32 |
+
- analyze_image
|
| 33 |
+
- request_landing_page
|
| 34 |
+
- request_id_verification
|
| 35 |
+
- approve
|
| 36 |
+
- reject
|
| 37 |
+
|
| 38 |
+
You MUST respond in valid JSON format containing "action_type" and "reasoning".
|
| 39 |
+
"""
|
| 40 |
+
|
| 41 |
+
user_prompt = f"Current Ad Observation:\n{json.dumps(observation_data, indent=2)}\n\nWhat is your next action?"
|
| 42 |
+
|
| 43 |
+
try:
|
| 44 |
+
response = client.chat.completions.create(
|
| 45 |
+
model=MODEL_NAME,
|
| 46 |
+
messages=[
|
| 47 |
+
{"role": "system", "content": system_prompt},
|
| 48 |
+
{"role": "user", "content": user_prompt}
|
| 49 |
+
],
|
| 50 |
+
response_format={"type": "json_object"},
|
| 51 |
+
temperature=0.1
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
result = json.loads(response.choices[0].message.content)
|
| 55 |
+
return {
|
| 56 |
+
"action_type": result.get("action_type", "analyze_image"),
|
| 57 |
+
"reasoning": result.get("reasoning", "Fallback reasoning")
|
| 58 |
+
}
|
| 59 |
+
except Exception as e:
|
| 60 |
+
print(f"⚠️ LLM Call Failed: {e}. Defaulting to safe fallback.")
|
| 61 |
+
return {"action_type": "analyze_image", "reasoning": "Error recovery."}
|
| 62 |
+
|
| 63 |
+
def main() -> None:
|
| 64 |
+
print("🚀 Starting Meta Ad-Policy Automated Inference...")
|
| 65 |
+
total_score = 0.0
|
| 66 |
+
|
| 67 |
+
for task_id in TASKS:
|
| 68 |
+
print(f"\n--- 🎬 Starting Episode: {task_id} ---")
|
| 69 |
+
|
| 70 |
+
try:
|
| 71 |
+
res = requests.post(f"{ENV_URL}/reset", json={"task_id": task_id})
|
| 72 |
+
if res.status_code != 200:
|
| 73 |
+
print(f"❌ Env connection failed. Check if Docker is running on port 8000.")
|
| 74 |
+
return
|
| 75 |
+
except requests.exceptions.ConnectionError:
|
| 76 |
+
print(f"❌ Env connection refused. Is your OpenEnv Docker container running?")
|
| 77 |
+
return
|
| 78 |
+
|
| 79 |
+
observation = res.json()
|
| 80 |
+
done = False
|
| 81 |
+
step_count = 0
|
| 82 |
+
|
| 83 |
+
while not done and step_count < MAX_STEPS:
|
| 84 |
+
step_count += 1
|
| 85 |
+
print(f" Step {step_count} | Status: {observation.get('status_message', 'No status')}")
|
| 86 |
+
|
| 87 |
+
action_payload = get_llm_action(observation)
|
| 88 |
+
print(f" 🤖 Agent Action: {action_payload['action_type'].upper()}")
|
| 89 |
+
|
| 90 |
+
step_res = requests.post(f"{ENV_URL}/step", json=action_payload)
|
| 91 |
+
step_data = step_res.json()
|
| 92 |
+
|
| 93 |
+
# Extract from the OpenEnv schema
|
| 94 |
+
observation = step_data.get("observation", step_data)
|
| 95 |
+
done = observation.get("done", False)
|
| 96 |
+
reward = observation.get("reward", 0.0)
|
| 97 |
+
|
| 98 |
+
if done:
|
| 99 |
+
print(f" ✅ Episode Finished! Final Step Reward: {reward}")
|
| 100 |
+
total_score += reward
|
| 101 |
+
|
| 102 |
+
print(f"\n🎉 Evaluation Complete! Total Agent Score: {total_score} / {len(TASKS)}")
|
| 103 |
+
|
| 104 |
+
if __name__ == "__main__":
|
| 105 |
+
main()
|
main.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import uvicorn
|
| 2 |
+
from openenv.core.env_server import create_fastapi_app
|
| 3 |
+
from src.environment import AdPolicyEnvironment
|
| 4 |
+
from src.models import AdAction, AdObservation
|
| 5 |
+
|
| 6 |
+
# 1. Create the App
|
| 7 |
+
# NOTICE: We pass the CLASS NAME (AdPolicyEnvironment), not 'env' or 'AdPolicyEnvironment()'
|
| 8 |
+
app = create_fastapi_app(
|
| 9 |
+
AdPolicyEnvironment,
|
| 10 |
+
AdAction,
|
| 11 |
+
AdObservation
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
if __name__ == "__main__":
|
| 15 |
+
print("🚀 Starting Meta Ad-Policy Sandbox on http://localhost:8000")
|
| 16 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
openenv.yaml
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: meta-ad-policy-env
|
| 2 |
+
version: "0.2.3"
|
| 3 |
+
description: "A Reinforcement Learning sandbox for multimodal ad-policy moderation."
|
| 4 |
+
|
| 5 |
+
server:
|
| 6 |
+
host: "0.0.0.0"
|
| 7 |
+
port: 8000
|
| 8 |
+
|
| 9 |
+
tasks:
|
| 10 |
+
- task_id: "task_1_healthcare"
|
| 11 |
+
description: "Evaluate ads for unapproved medical claims, pharmaceuticals, and subtle dog whistles."
|
| 12 |
+
|
| 13 |
+
- task_id: "task_2_financial"
|
| 14 |
+
description: "Evaluate ads for predatory financial services, scams, and high-pressure tactics."
|
| 15 |
+
|
| 16 |
+
- task_id: "task_3_multimodal"
|
| 17 |
+
description: "Detect policy violations hidden entirely within visual elements that bypass text filters."
|
| 18 |
+
|
| 19 |
+
- task_id: "task_4_targeting"
|
| 20 |
+
description: "Identify illegal demographic targeting (e.g., adult financial services targeting minors)."
|
pyproject.toml
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[build-system]
|
| 2 |
+
requires = ["setuptools>=61.0"]
|
| 3 |
+
build-backend = "setuptools.build_meta"
|
| 4 |
+
|
| 5 |
+
[project]
|
| 6 |
+
name = "meta-ad-policy-sandbox"
|
| 7 |
+
version = "0.2.3"
|
| 8 |
+
description = "Meta Ad-Policy RL Sandbox"
|
| 9 |
+
dependencies = [
|
| 10 |
+
"fastapi",
|
| 11 |
+
"uvicorn",
|
| 12 |
+
"pydantic",
|
| 13 |
+
"requests",
|
| 14 |
+
"openai"
|
| 15 |
+
]
|
requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
openenv-core>=0.2.1
|
| 2 |
+
fastapi
|
| 3 |
+
uvicorn
|
| 4 |
+
pydantic
|
| 5 |
+
requests
|
src/.gitignore
ADDED
|
File without changes
|
src/__init__.py
ADDED
|
File without changes
|
src/__pycache__/__init__.cpython-313.pyc
ADDED
|
Binary file (147 Bytes). View file
|
|
|
src/__pycache__/environment.cpython-313.pyc
ADDED
|
Binary file (5.19 kB). View file
|
|
|
src/__pycache__/generator.cpython-313.pyc
ADDED
|
Binary file (4.29 kB). View file
|
|
|
src/__pycache__/models.cpython-313.pyc
ADDED
|
Binary file (1.74 kB). View file
|
|
|
src/environment.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import uuid
|
| 2 |
+
from openenv.core.env_server import Environment
|
| 3 |
+
from src.models import AdAction, AdObservation, AdState
|
| 4 |
+
from src.generator import AdGenerator
|
| 5 |
+
|
| 6 |
+
class AdPolicyEnvironment(Environment):
|
| 7 |
+
def __init__(self):
|
| 8 |
+
super().__init__()
|
| 9 |
+
self.generator = AdGenerator()
|
| 10 |
+
self.current_ad = None
|
| 11 |
+
self.image_analyzed = False
|
| 12 |
+
self.step_count = 0
|
| 13 |
+
self.total_reward = 0.0
|
| 14 |
+
|
| 15 |
+
def _ensure_ad(self):
|
| 16 |
+
if self.current_ad is None:
|
| 17 |
+
self.current_ad = self.generator.generate_random_ad()
|
| 18 |
+
|
| 19 |
+
def state(self) -> AdState:
|
| 20 |
+
self._ensure_ad()
|
| 21 |
+
return AdState(
|
| 22 |
+
step_count=self.step_count,
|
| 23 |
+
total_reward=self.total_reward,
|
| 24 |
+
current_ad_id=self.current_ad.get("ad_id")
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
# Add task_id as an optional parameter
|
| 28 |
+
def reset(self, task_id: str = None) -> AdObservation:
|
| 29 |
+
# Pass the task_id down to the generator
|
| 30 |
+
self.current_ad = self.generator.generate_random_ad(task_id)
|
| 31 |
+
self.image_analyzed = False
|
| 32 |
+
self.step_count = 0
|
| 33 |
+
self.total_reward = 0.0
|
| 34 |
+
|
| 35 |
+
# Add the task_id to the welcome message so the bot knows it worked
|
| 36 |
+
msg = f"Ad loaded for {task_id}. Awaiting review." if task_id else "Random ad loaded. Awaiting review."
|
| 37 |
+
return self._get_obs(msg)
|
| 38 |
+
|
| 39 |
+
def step(self, action: AdAction) -> AdObservation:
|
| 40 |
+
self._ensure_ad()
|
| 41 |
+
self.step_count += 1
|
| 42 |
+
|
| 43 |
+
reward = 0.0
|
| 44 |
+
done = False
|
| 45 |
+
message = "Action processed."
|
| 46 |
+
|
| 47 |
+
if not action or not hasattr(action, 'action_type'):
|
| 48 |
+
# Heavy penalty for invalid formatting to train the agent faster
|
| 49 |
+
reward = -0.1
|
| 50 |
+
self.total_reward += reward
|
| 51 |
+
return self._get_obs("Invalid action.", reward, False)
|
| 52 |
+
|
| 53 |
+
act_type = str(action.action_type).lower()
|
| 54 |
+
|
| 55 |
+
# 🧠 REWARD SHAPING: Tool Usage Penalties
|
| 56 |
+
# We charge the agent -0.05 for using tools to force efficiency
|
| 57 |
+
if act_type in ["analyze_image", "request_landing_page", "request_id_verification"]:
|
| 58 |
+
reward = -0.05
|
| 59 |
+
|
| 60 |
+
if act_type == "analyze_image":
|
| 61 |
+
self.image_analyzed = True
|
| 62 |
+
vlm_text = self.current_ad.get('vlm_desc', 'No visual description.')
|
| 63 |
+
message = vlm_text # Cleaned up the double "VLM Output:" prefix here!
|
| 64 |
+
|
| 65 |
+
elif act_type == "request_landing_page":
|
| 66 |
+
is_bad = self.current_ad.get("ground_truth", False)
|
| 67 |
+
message = "Landing page is high-risk." if is_bad else "Landing page is clean."
|
| 68 |
+
|
| 69 |
+
elif act_type == "request_id_verification":
|
| 70 |
+
message = "ID matches advertiser profile."
|
| 71 |
+
|
| 72 |
+
# 🎯 FINAL DECISION: Big Rewards / Big Penalties
|
| 73 |
+
elif act_type in ["approve", "reject"]:
|
| 74 |
+
done = True
|
| 75 |
+
is_violation = self.current_ad.get("ground_truth", False)
|
| 76 |
+
is_correct = (act_type == "reject" and is_violation) or \
|
| 77 |
+
(act_type == "approve" and not is_violation)
|
| 78 |
+
|
| 79 |
+
reward = 1.0 if is_correct else -1.0
|
| 80 |
+
message = f"Decision: {act_type.upper()}. " + ("Correct!" if is_correct else "Incorrect.")
|
| 81 |
+
|
| 82 |
+
# Accumulate the total reward for the episode state
|
| 83 |
+
self.total_reward += reward
|
| 84 |
+
|
| 85 |
+
return self._get_obs(message, reward, done)
|
| 86 |
+
|
| 87 |
+
def _get_obs(self, message: str, reward: float = 0.0, done: bool = False) -> AdObservation:
|
| 88 |
+
self._ensure_ad()
|
| 89 |
+
return AdObservation(
|
| 90 |
+
ad_id=str(self.current_ad.get("ad_id", "N/A")),
|
| 91 |
+
headline=str(self.current_ad.get("headline", "N/A")),
|
| 92 |
+
body_text=str(self.current_ad.get("body_text", "N/A")),
|
| 93 |
+
advertiser_trust_score=float(self.current_ad.get("advertiser_trust_score", 0.0)),
|
| 94 |
+
targeting_data=dict(self.current_ad.get("targeting_data", {})),
|
| 95 |
+
image_url=str(self.current_ad.get("image_url", "N/A")),
|
| 96 |
+
status_message=str(message),
|
| 97 |
+
reward=reward,
|
| 98 |
+
done=done
|
| 99 |
+
)
|
src/generator.py
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
import uuid
|
| 3 |
+
|
| 4 |
+
class AdGenerator:
|
| 5 |
+
# 1. Obvious Violations (Easy mode for the agent)
|
| 6 |
+
OBVIOUS_VIOLATIONS = [
|
| 7 |
+
("HEALTHCARE", "Secret D1abet3s Cure", "Reverse it today with this root extract!", "A bottle of pills with no label.", True),
|
| 8 |
+
("FINANCIAL", "Garanteed 500% Returns", "Invest in LlamaCoin now. Zero risk.", "A stack of cash and a rocket emoji.", True)
|
| 9 |
+
]
|
| 10 |
+
|
| 11 |
+
# 2. Multimodal Traps (Text is safe, but the image violates policy)
|
| 12 |
+
# The agent MUST use `analyze_image` to catch these!
|
| 13 |
+
MULTIMODAL_TRAPS = [
|
| 14 |
+
("HEALTHCARE", "Achieve Your Dream Body", "Hard work and our new program will get you there.", "Before and after weight loss comparison photos. (Meta Policy Violation)", True),
|
| 15 |
+
("FINANCIAL", "Get Approved in Minutes", "We help you navigate the complex world of finance.", "Fake government ID and a 'Guaranteed Loan Approval' stamp.", True),
|
| 16 |
+
("HEALTHCARE", "Stay Focused All Day", "Our study supplement helps you crush your exams.", "Close up of an Adderall prescription pill.", True)
|
| 17 |
+
]
|
| 18 |
+
|
| 19 |
+
# 3. Subtle "Dog Whistle" Violations (Hard mode)
|
| 20 |
+
SUBTLE_VIOLATIONS = [
|
| 21 |
+
("HEALTHCARE", "Optimize Metabolic Markers", "Our proprietary GLP-1 analog ships directly to your door without a visit.", "A sleek, branded injection pen.", True), # Selling prescription drugs
|
| 22 |
+
("FINANCIAL", "Beat the System", "Leverage our algorithmic trading bot to exploit market inefficiencies.", "A complex trading chart with green arrows pointing up.", True) # High-pressure/deceptive claims
|
| 23 |
+
]
|
| 24 |
+
|
| 25 |
+
# 4. Perfectly Safe Ads (To prevent the agent from just rejecting everything)
|
| 26 |
+
SAFE_ADS = [
|
| 27 |
+
("NONE", "Spring Collection 2026", "Check out our latest seasonal apparel.", "A model wearing a floral dress.", False),
|
| 28 |
+
("NONE", "Learn Python Online", "Master coding with our 10-week interactive bootcamp.", "A laptop screen showing clean Python code.", False),
|
| 29 |
+
("NONE", "Local Bakery Opening", "Come get fresh sourdough bread this Saturday!", "A warm, freshly baked loaf of bread.", False),
|
| 30 |
+
("FINANCIAL", "Plan For Retirement", "Speak with our certified fiduciaries to secure your future.", "A happy older couple walking on a beach.", False), # Safe financial ad
|
| 31 |
+
("HEALTHCARE", "Daily Multivitamin", "Support your immune system with Vitamin C and Zinc.", "A bottle of standard, FDA-approved daily vitamins.", False) # Safe health ad
|
| 32 |
+
]
|
| 33 |
+
|
| 34 |
+
@staticmethod
|
| 35 |
+
def generate_random_ad(task_id: str = None):
|
| 36 |
+
# Default to completely random if no task is specified
|
| 37 |
+
scenario_type = random.choices(
|
| 38 |
+
["obvious", "multimodal", "subtle", "safe", "targeting_trap"],
|
| 39 |
+
weights=[0.15, 0.25, 0.20, 0.30, 0.10],
|
| 40 |
+
k=1
|
| 41 |
+
)[0]
|
| 42 |
+
|
| 43 |
+
# 🧠 TASK OVERRIDES: Force specific scenarios based on the Meta Grader's request
|
| 44 |
+
if task_id == "task_1_healthcare":
|
| 45 |
+
# Force a healthcare ad (either obvious or subtle)
|
| 46 |
+
pool = [ad for ad in AdGenerator.OBVIOUS_VIOLATIONS + AdGenerator.SUBTLE_VIOLATIONS if ad[0] == "HEALTHCARE"]
|
| 47 |
+
category, headline, body, vlm_desc, is_violation = random.choice(pool)
|
| 48 |
+
scenario_type = "forced_health"
|
| 49 |
+
|
| 50 |
+
elif task_id == "task_2_financial":
|
| 51 |
+
# Force a financial ad
|
| 52 |
+
pool = [ad for ad in AdGenerator.OBVIOUS_VIOLATIONS + AdGenerator.SUBTLE_VIOLATIONS if ad[0] == "FINANCIAL"]
|
| 53 |
+
category, headline, body, vlm_desc, is_violation = random.choice(pool)
|
| 54 |
+
scenario_type = "forced_finance"
|
| 55 |
+
|
| 56 |
+
elif task_id == "task_3_multimodal":
|
| 57 |
+
scenario_type = "multimodal"
|
| 58 |
+
|
| 59 |
+
elif task_id == "task_4_targeting":
|
| 60 |
+
scenario_type = "targeting_trap"
|
| 61 |
+
|
| 62 |
+
# --- Base Logic Execution ---
|
| 63 |
+
age_limit = random.randint(18, 55)
|
| 64 |
+
category = category if 'category' in locals() else "NONE"
|
| 65 |
+
is_violation = is_violation if 'is_violation' in locals() else False
|
| 66 |
+
|
| 67 |
+
if scenario_type == "obvious":
|
| 68 |
+
category, headline, body, vlm_desc, is_violation = random.choice(AdGenerator.OBVIOUS_VIOLATIONS)
|
| 69 |
+
elif scenario_type == "multimodal":
|
| 70 |
+
category, headline, body, vlm_desc, is_violation = random.choice(AdGenerator.MULTIMODAL_TRAPS)
|
| 71 |
+
elif scenario_type == "subtle":
|
| 72 |
+
category, headline, body, vlm_desc, is_violation = random.choice(AdGenerator.SUBTLE_VIOLATIONS)
|
| 73 |
+
elif scenario_type == "safe":
|
| 74 |
+
category, headline, body, vlm_desc, is_violation = random.choice(AdGenerator.SAFE_ADS)
|
| 75 |
+
elif scenario_type == "targeting_trap":
|
| 76 |
+
category = "FINANCIAL"
|
| 77 |
+
headline = "Start Your First Investment Portfolio"
|
| 78 |
+
body = "Learn the basics of stock trading with our beginner-friendly app."
|
| 79 |
+
vlm_desc = "A smartphone showing a simple stock graph."
|
| 80 |
+
age_limit = 15 # The trap!
|
| 81 |
+
is_violation = True
|
| 82 |
+
|
| 83 |
+
trust_score = round(random.uniform(0.1, 0.5), 2) if is_violation else round(random.uniform(0.7, 0.99), 2)
|
| 84 |
+
if random.random() > 0.85:
|
| 85 |
+
trust_score = 0.95
|
| 86 |
+
|
| 87 |
+
return {
|
| 88 |
+
"ad_id": str(uuid.uuid4())[:8],
|
| 89 |
+
"headline": headline,
|
| 90 |
+
"body_text": body,
|
| 91 |
+
"advertiser_trust_score": trust_score,
|
| 92 |
+
"targeting_data": {"min_age": age_limit, "geo": "US"},
|
| 93 |
+
"image_url": f"https://mock-meta.com/img/{uuid.uuid4()}.jpg",
|
| 94 |
+
"ground_truth": is_violation,
|
| 95 |
+
"category": category,
|
| 96 |
+
"vlm_desc": vlm_desc
|
| 97 |
+
}
|
src/models.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Literal, Optional, Dict, Any
|
| 2 |
+
from openenv.core.env_server import Action, Observation, State
|
| 3 |
+
|
| 4 |
+
class AdObservation(Observation):
|
| 5 |
+
ad_id: str
|
| 6 |
+
headline: str
|
| 7 |
+
body_text: str
|
| 8 |
+
advertiser_trust_score: float
|
| 9 |
+
targeting_data: Dict[str, Any]
|
| 10 |
+
image_url: str
|
| 11 |
+
status_message: str
|
| 12 |
+
|
| 13 |
+
# 🚨 NEW: OpenEnv requires these to be part of the Observation!
|
| 14 |
+
reward: float = 0.0
|
| 15 |
+
done: bool = False
|
| 16 |
+
|
| 17 |
+
class AdAction(Action):
|
| 18 |
+
action_type: Literal[
|
| 19 |
+
"approve", "reject", "analyze_image",
|
| 20 |
+
"request_landing_page", "request_id_verification"
|
| 21 |
+
]
|
| 22 |
+
reasoning: str
|
| 23 |
+
violation_category: Optional[Literal["HEALTHCARE", "FINANCIAL", "NONE"]] = None
|
| 24 |
+
|
| 25 |
+
class AdState(State):
|
| 26 |
+
step_count: int = 0
|
| 27 |
+
total_reward: float = 0.0
|
| 28 |
+
current_ad_id: Optional[str] = None
|
test_env.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
import json
|
| 3 |
+
|
| 4 |
+
BASE_URL = "http://localhost:8000"
|
| 5 |
+
|
| 6 |
+
def safe_post(endpoint, data=None):
|
| 7 |
+
"""Helper to catch errors before they crash the script."""
|
| 8 |
+
try:
|
| 9 |
+
url = f"{BASE_URL}/{endpoint}"
|
| 10 |
+
response = requests.post(url, json=data)
|
| 11 |
+
|
| 12 |
+
# If the server sent an error code (4xx or 5xx), print the text
|
| 13 |
+
if response.status_code != 200:
|
| 14 |
+
print(f"❌ Server Error {response.status_code}: {response.text}")
|
| 15 |
+
return None
|
| 16 |
+
|
| 17 |
+
return response.json()
|
| 18 |
+
except Exception as e:
|
| 19 |
+
print(f"⚠️ Request Failed: {e}")
|
| 20 |
+
return None
|
| 21 |
+
|
| 22 |
+
def run_test():
|
| 23 |
+
print("--- 🔄 Testing /reset ---")
|
| 24 |
+
reset_data = safe_post("reset")
|
| 25 |
+
if not reset_data: return
|
| 26 |
+
|
| 27 |
+
obs = reset_data.get('observation', reset_data)
|
| 28 |
+
print(f"Ad Loaded: {obs.get('headline', 'N/A')}\n")
|
| 29 |
+
|
| 30 |
+
print("--- 🔍 Testing 'analyze_image' Tool ---")
|
| 31 |
+
# Payload must be wrapped in 'action' for OpenEnv 2026
|
| 32 |
+
step1_payload = {
|
| 33 |
+
"action": {
|
| 34 |
+
"action_type": "analyze_image",
|
| 35 |
+
"reasoning": "Standard adversarial check."
|
| 36 |
+
}
|
| 37 |
+
}
|
| 38 |
+
s1_data = safe_post("step", step1_payload)
|
| 39 |
+
if s1_data:
|
| 40 |
+
s1_obs = s1_data.get('observation', s1_data)
|
| 41 |
+
print(f" {s1_obs.get('status_message', 'N/A')}\n")
|
| 42 |
+
|
| 43 |
+
print("--- ✅ Testing Final Decision ---")
|
| 44 |
+
step2_payload = {
|
| 45 |
+
"action": {
|
| 46 |
+
"action_type": "reject",
|
| 47 |
+
"reasoning": "Detected policy violation."
|
| 48 |
+
}
|
| 49 |
+
}
|
| 50 |
+
s2_data = safe_post("step", step2_payload)
|
| 51 |
+
if s2_data:
|
| 52 |
+
reward = s2_data.get('reward', 0.0)
|
| 53 |
+
done = s2_data.get('done', s2_data.get('terminal', False))
|
| 54 |
+
print(f"Final Reward: {reward} | Done: {done}")
|
| 55 |
+
|
| 56 |
+
if __name__ == "__main__":
|
| 57 |
+
run_test()
|
validate.sh
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
set -uo pipefail
|
| 3 |
+
|
| 4 |
+
DOCKER_BUILD_TIMEOUT=600
|
| 5 |
+
RED='\033[0;31m'
|
| 6 |
+
GREEN='\033[0;32m'
|
| 7 |
+
YELLOW='\033[1;33m'
|
| 8 |
+
BOLD='\033[1m'
|
| 9 |
+
NC='\033[0m'
|
| 10 |
+
|
| 11 |
+
PING_URL="${1:-}"
|
| 12 |
+
REPO_DIR="${2:-.}"
|
| 13 |
+
|
| 14 |
+
if [ -z "$PING_URL" ]; then
|
| 15 |
+
printf "Usage: %s <ping_url> [repo_dir]\n" "$0"
|
| 16 |
+
exit 1
|
| 17 |
+
fi
|
| 18 |
+
|
| 19 |
+
log() { printf "[%s] %b\n" "$(date -u +%H:%M:%S)" "$*"; }
|
| 20 |
+
pass() { log "${GREEN}PASSED${NC} -- $1"; }
|
| 21 |
+
fail() { log "${RED}FAILED${NC} -- $1"; exit 1; }
|
| 22 |
+
|
| 23 |
+
printf "\n${BOLD}=== OpenEnv Validator ===${NC}\n"
|
| 24 |
+
|
| 25 |
+
log "Step 1/3: Pinging HF Space ($PING_URL/reset) ..."
|
| 26 |
+
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -X POST -H "Content-Type: application/json" -d '{}' "$PING_URL/reset" --max-time 30 || echo "000")
|
| 27 |
+
|
| 28 |
+
if [ "$HTTP_CODE" = "200" ]; then
|
| 29 |
+
pass "HF Space is live!"
|
| 30 |
+
else
|
| 31 |
+
fail "HF Space /reset returned HTTP $HTTP_CODE (expected 200). Is your Space running?"
|
| 32 |
+
fi
|
| 33 |
+
|
| 34 |
+
log "Step 2/3: Running docker build ..."
|
| 35 |
+
if docker build "$REPO_DIR" > /dev/null 2>&1; then
|
| 36 |
+
pass "Docker build succeeded"
|
| 37 |
+
else
|
| 38 |
+
fail "Docker build failed"
|
| 39 |
+
fi
|
| 40 |
+
|
| 41 |
+
log "Step 3/3: Running openenv validate ..."
|
| 42 |
+
if cd "$REPO_DIR" && openenv validate > /dev/null 2>&1; then
|
| 43 |
+
pass "openenv validate passed"
|
| 44 |
+
else
|
| 45 |
+
fail "openenv validate failed. Check openenv.yaml"
|
| 46 |
+
fi
|
| 47 |
+
|
| 48 |
+
printf "\n${GREEN}${BOLD} All 3/3 checks passed! Ready to submit.${NC}\n"
|