| from __future__ import annotations |
|
|
| import argparse |
| import json |
| import os |
| import sys |
| from pathlib import Path |
|
|
| |
| PROJECT_ROOT = Path(__file__).resolve().parent.parent |
| sys.path.insert(0, str(PROJECT_ROOT)) |
|
|
| import modal |
|
|
| from dotenv import load_dotenv |
| load_dotenv() |
|
|
|
|
| def _require_env(name: str) -> str: |
| value = os.environ.get(name, "").strip() |
| if not value: |
| raise ValueError(f"Missing required env var: {name}") |
| return value |
|
|
|
|
| def _seed_dummy_dataset(app_name: str) -> None: |
| seed_fn = modal.Function.from_name(app_name, "seed_dummy_dataset") |
| seed = seed_fn.remote(num_images=6, image_size=224, dataset_name="dummy") |
| print(f"Seeded dataset at {seed['dataset_root']}") |
|
|
|
|
| def _load_stimuli_from_catalog(stimuli_path: str) -> list[dict[str, str]]: |
| """Load stimuli from a JSONL catalog file.""" |
| stimuli = [] |
| with open(stimuli_path, "r") as f: |
| for line in f: |
| line = line.strip() |
| if line: |
| stimuli.append(json.loads(line)) |
| return stimuli |
|
|
|
|
| def main() -> None: |
| parser = argparse.ArgumentParser(description="Red team smoke test") |
| parser.add_argument( |
| "--stimuli", |
| type=str, |
| default=None, |
| help="Path to custom stimuli JSONL file. If not provided, uses HACKATHON_STIMULI_CATALOG env var.", |
| ) |
| parser.add_argument( |
| "--s3", |
| action="store_true", |
| help="Use S3-backed datasets (requires aws-s3-credentials Modal secret).", |
| ) |
| parser.add_argument( |
| "--skip-seed", |
| action="store_true", |
| help="Skip seeding dummy dataset (use when testing with real images).", |
| ) |
| args = parser.parse_args() |
|
|
| _require_env("HACKATHON_MODAL_ENABLE") |
| _require_env("HACKATHON_MODEL_REGISTRY") |
| data_dir = _require_env("HACKATHON_DATA_DIR") |
|
|
| |
| if args.stimuli: |
| stimuli_path = Path(args.stimuli).resolve() |
| if not stimuli_path.exists(): |
| raise ValueError(f"Stimuli file not found: {stimuli_path}") |
| os.environ["HACKATHON_STIMULI_CATALOG"] = str(stimuli_path) |
| print(f"Using custom stimuli: {stimuli_path}") |
| else: |
| _require_env("HACKATHON_STIMULI_CATALOG") |
|
|
| stimuli_catalog_path = os.environ.get("HACKATHON_STIMULI_CATALOG", "").strip() |
|
|
| |
| if args.s3: |
| os.environ["HACKATHON_USE_S3"] = "true" |
| print("S3 mode enabled - will use extract_embeddings_s3 function") |
|
|
| app_name = os.environ.get("HACKATHON_MODAL_APP", "iclr2026-eval") |
|
|
| |
| if not args.skip_seed and not args.s3: |
| _seed_dummy_dataset(app_name) |
| elif args.skip_seed: |
| print("Skipping dummy dataset seeding") |
| elif args.s3: |
| print("S3 mode: skipping dummy dataset seeding (using real images)") |
|
|
| from app import submit_red |
|
|
| |
| if args.s3: |
| |
| catalog_stimuli = _load_stimuli_from_catalog(stimuli_catalog_path) |
| if len(catalog_stimuli) < 6: |
| raise ValueError(f"Need at least 6 stimuli in catalog, found {len(catalog_stimuli)}") |
| |
| |
| stimulus_sets = [ |
| catalog_stimuli[0:2], |
| catalog_stimuli[2:5], |
| [catalog_stimuli[1], catalog_stimuli[5]], |
| ] |
| print(f"Using {len(catalog_stimuli)} stimuli from catalog for S3 mode") |
| else: |
| |
| stimulus_sets = [ |
| [ |
| {"dataset_name": "dummy", "image_identifier": "images/img_0000.png"}, |
| {"dataset_name": "dummy", "image_identifier": "images/img_0001.png"}, |
| ], |
| [ |
| {"dataset_name": "dummy", "image_identifier": "images/img_0002.png"}, |
| {"dataset_name": "dummy", "image_identifier": "images/img_0003.png"}, |
| {"dataset_name": "dummy", "image_identifier": "images/img_0004.png"}, |
| ], |
| [ |
| {"dataset_name": "dummy", "image_identifier": "images/img_0001.png"}, |
| {"dataset_name": "dummy", "image_identifier": "images/img_0005.png"}, |
| ], |
| ] |
|
|
| for idx, stimuli in enumerate(stimulus_sets, start=1): |
| payload = json.dumps({"differentiating_images": stimuli}) |
| submitter = f"red-test-{idx}" |
| msg, leaderboard, pairwise = submit_red(submitter, payload) |
| print(f"Submission {idx} message: {msg}") |
| print(f"Submission {idx} leaderboard: {leaderboard.tail(1).to_dict(orient='records')}") |
| print(f"Submission {idx} pairwise: {pairwise.to_dict(orient='records')}") |
| assert not pairwise.empty, "Pairwise table should not be empty." |
|
|
| red_path = Path(data_dir) / "red_submissions.json" |
| assert red_path.exists(), f"Missing submission file: {red_path}" |
| print("Red team smoke test complete.") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|