Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """End-to-end test for Round 2 implementation. | |
| Tests: | |
| 1. Platform-specific episode loading | |
| 2. New tool actions (GET_POLICY, REVERSE_IMAGE_SEARCH, ANALYZE_BIO, CHECK_IP) | |
| 3. Platform-adaptive scoring | |
| 4. Hidden signals revelation | |
| """ | |
| from pathlib import Path | |
| import sys | |
| sys.path.insert(0, str(Path(__file__).parent)) | |
| from server.environment import FakeGangEnvironment | |
| from models import FakeGangAction, ActionType | |
| def test_round2(): | |
| """Run comprehensive Round 2 test.""" | |
| print("=" * 70) | |
| print("ROUND 2 END-TO-END TEST") | |
| print("=" * 70) | |
| env = FakeGangEnvironment() | |
| # Test 1: Instagram episode (even seed) | |
| print("\n[Test 1] Instagram Episode (seed=0)") | |
| print("-" * 70) | |
| obs = env.reset(task="easy", seed=0) | |
| print(f"β Platform: {obs.platform}") | |
| assert obs.platform == "Instagram", f"Expected Instagram, got {obs.platform}" | |
| print(f"β Steps remaining: {obs.steps_remaining}") | |
| print(f"β Starting visible: {len(obs.visible_account_ids)} accounts") | |
| # Test 2: GET_POLICY action | |
| print("\n[Test 2] GET_POLICY Action") | |
| print("-" * 70) | |
| action = FakeGangAction(action_type=ActionType.GET_POLICY) | |
| obs = env.step(action) | |
| print(f"β Message: {obs.message[:200]}") | |
| assert "Instagram" in obs.message or "threshold" in obs.message.lower(), "Policy not returned" | |
| assert obs.steps_remaining == 30, "GET_POLICY should not consume steps" | |
| # Test 3: INSPECT to find accounts | |
| print("\n[Test 3] INSPECT Action") | |
| print("-" * 70) | |
| acc_id = obs.visible_account_ids[0] | |
| action = FakeGangAction(action_type=ActionType.INSPECT, account_id=acc_id) | |
| obs = env.step(action) | |
| print(f"β Inspected: {acc_id}") | |
| print(f"β Steps remaining: {obs.steps_remaining}") | |
| assert obs.steps_remaining == 29, "INSPECT should consume 1 step" | |
| # Check that profile exists | |
| profile = next((p for p in obs.visible_accounts if p.account_id == acc_id), None) | |
| assert profile is not None, f"Profile for {acc_id} not found" | |
| print(f"β Profile created: fake_risk={profile.fake_risk_score:.3f}") | |
| # Test 4: REVERSE_IMAGE_SEARCH (hidden signal revelation) | |
| print("\n[Test 4] REVERSE_IMAGE_SEARCH Action") | |
| print("-" * 70) | |
| photo_before = profile.photo_reuse_score | |
| print(f" Before: photo_reuse_score = {photo_before:.3f}") | |
| action = FakeGangAction(action_type=ActionType.REVERSE_IMAGE_SEARCH, account_id=acc_id) | |
| obs = env.step(action) | |
| print(f"β Steps remaining: {obs.steps_remaining}") | |
| assert obs.steps_remaining == 28, "REVERSE_IMAGE_SEARCH should consume 1 step" | |
| profile = next((p for p in obs.visible_accounts if p.account_id == acc_id), None) | |
| photo_after = profile.photo_reuse_score | |
| print(f" After: photo_reuse_score = {photo_after:.3f}") | |
| print(f"β Signal revealed (changed: {photo_before != photo_after})") | |
| # Test 5: ANALYZE_BIO | |
| print("\n[Test 5] ANALYZE_BIO Action") | |
| print("-" * 70) | |
| bio_before = profile.bio_template_score | |
| print(f" Before: bio_template_score = {bio_before:.3f}") | |
| action = FakeGangAction(action_type=ActionType.ANALYZE_BIO, account_id=acc_id) | |
| obs = env.step(action) | |
| assert obs.steps_remaining == 27, "ANALYZE_BIO should consume 1 step" | |
| profile = next((p for p in obs.visible_accounts if p.account_id == acc_id), None) | |
| bio_after = profile.bio_template_score | |
| print(f" After: bio_template_score = {bio_after:.3f}") | |
| print(f"β Signal revealed (changed: {bio_before != bio_after})") | |
| # Test 6: CHECK_IP (expensive action) | |
| print("\n[Test 6] CHECK_IP Action") | |
| print("-" * 70) | |
| steps_before = obs.steps_remaining | |
| action = FakeGangAction(action_type=ActionType.CHECK_IP, account_id=acc_id) | |
| obs = env.step(action) | |
| print(f"β Steps consumed: {steps_before - obs.steps_remaining}") | |
| assert steps_before - obs.steps_remaining == 2, "CHECK_IP should consume 2 steps" | |
| print(f"β Message: {obs.message[:150]}") | |
| # Test 7: Snapchat episode (odd seed) | |
| print("\n[Test 7] Snapchat Episode (seed=1)") | |
| print("-" * 70) | |
| obs = env.reset(task="easy", seed=1) | |
| print(f"β Platform: {obs.platform}") | |
| assert obs.platform == "Snapchat", f"Expected Snapchat, got {obs.platform}" | |
| action = FakeGangAction(action_type=ActionType.GET_POLICY) | |
| obs = env.step(action) | |
| print(f"β Message: {obs.message[:200]}") | |
| assert "Snapchat" in obs.message or "threshold" in obs.message.lower() | |
| # Test 8: Platform-adaptive scoring | |
| print("\n[Test 8] Platform-Adaptive Scoring") | |
| print("-" * 70) | |
| # Reset to Instagram | |
| obs = env.reset(task="easy", seed=0) | |
| action = FakeGangAction(action_type=ActionType.GET_POLICY) | |
| obs = env.step(action) | |
| # Inspect and flag an account | |
| acc_id = obs.visible_account_ids[0] | |
| action = FakeGangAction(action_type=ActionType.INSPECT, account_id=acc_id) | |
| obs = env.step(action) | |
| profile = next((p for p in obs.visible_accounts if p.account_id == acc_id), None) | |
| print(f" Account: {acc_id}") | |
| print(f" fake_risk_score: {profile.fake_risk_score:.3f}") | |
| print(f" status: {profile.status}") | |
| print(f"β Risk computed with platform-adaptive weights") | |
| # Test 9: SUBMIT with platform-specific rewards | |
| print("\n[Test 9] SUBMIT with Platform Rewards") | |
| print("-" * 70) | |
| # Flag gang members if we can identify them | |
| obs = env.reset(task="easy", seed=2) | |
| # Inspect a few accounts | |
| for acc_id in obs.visible_account_ids[:5]: | |
| action = FakeGangAction(action_type=ActionType.INSPECT, account_id=acc_id) | |
| obs = env.step(action) | |
| # Flag high-risk accounts | |
| flagged_count = 0 | |
| for profile in obs.visible_accounts: | |
| if profile.fake_risk_score > 0.6 and flagged_count < 5: | |
| action = FakeGangAction(action_type=ActionType.FLAG, account_id=profile.account_id) | |
| obs = env.step(action) | |
| flagged_count += 1 | |
| print(f" Flagged: {len(obs.flagged_ids)} accounts") | |
| action = FakeGangAction(action_type=ActionType.SUBMIT) | |
| obs = env.step(action) | |
| print(f"β Episode complete: done={obs.done}") | |
| print(f"β Final reward: {obs.reward:.3f}") | |
| print(f"β Message: {obs.message[:200]}") | |
| print("\n" + "=" * 70) | |
| print("ALL TESTS PASSED β") | |
| print("=" * 70) | |
| print("\nRound 2 implementation verified:") | |
| print(" β Platform-specific episodes (Instagram/Snapchat)") | |
| print(" β GET_POLICY action (0 steps)") | |
| print(" β REVERSE_IMAGE_SEARCH (1 step)") | |
| print(" β ANALYZE_BIO (1 step)") | |
| print(" β CHECK_IP (2 steps)") | |
| print(" β Hidden signals revelation") | |
| print(" β Platform-adaptive scoring") | |
| print(" β Complete episode flow") | |
| if __name__ == "__main__": | |
| test_round2() | |