Spaces:

representational-alignment
/

iclr2026-realign-challenge

Running

App Files Files Community

iclr2026-realign-challenge / src /hackathon /modal_client.py

siddsuresh97

Thread hf_link to Modal and require unique submitter names

ebf9b32 about 2 months ago

raw

history blame contribute delete

5.51 kB

	"""Thin client for the private Modal evaluation backend.

	The actual evaluation pipeline (embedding extraction, CKA scoring) lives in
	a private repository and is deployed as a Modal app. This module calls the
	deployed functions by name — no backend code is imported here.

	Public configs (blue model registry) can still be controlled via HF Space env
	vars for convenience. Secret configs (red team registry, blue heldout images)
	are loaded server-side from the Modal volume — never sent from here.

	The backend must be deployed first:
	modal deploy scripts/modal_backend.py # from the private eval-backend repo
	"""
	from __future__ import annotations

	import json
	import os
	from pathlib import Path
	from typing import Any, Iterable

	from src.hackathon.validation import (
	BLUE_MODEL_REGISTRY_ENV,
	MODEL_REGISTRY_ENV,
	)


	MODAL_ENABLE_ENV = "HACKATHON_MODAL_ENABLE"
	MODAL_APP_ENV = "HACKATHON_MODAL_APP"
	MODAL_BATCH_SIZE_ENV = "HACKATHON_MODAL_BATCH_SIZE"
	DEFAULT_MODAL_APP = "iclr2026-eval"
	DEFAULT_BATCH_SIZE = 64


	def _is_truthy(value: str \| None) -> bool:
	if value is None:
	return False
	return value.strip().lower() in {"1", "true", "yes", "y", "on"}


	def is_modal_enabled() -> bool:
	return _is_truthy(os.environ.get(MODAL_ENABLE_ENV))


	def _get_batch_size() -> int:
	raw = os.environ.get(MODAL_BATCH_SIZE_ENV, "").strip()
	if raw:
	return int(raw)
	return DEFAULT_BATCH_SIZE


	def _get_modal_function(function_name: str) -> Any:
	import modal

	app_name = os.environ.get(MODAL_APP_ENV, "").strip() or DEFAULT_MODAL_APP
	return modal.Function.from_name(app_name, function_name)


	def _load_json_file(path: str) -> Any:
	"""Load a JSON or JSONL file from a local path."""
	p = Path(path)
	if p.suffix == ".jsonl":
	lines = p.read_text().splitlines()
	return [json.loads(line) for line in lines if line.strip()]
	return json.loads(p.read_text())


	def _load_blue_model_registry() -> list[dict[str, Any]] \| None:
	"""Load blue model registry from env var if set, else return None.

	When None is returned the backend loads its copy from the Modal volume.
	"""
	path = os.environ.get(BLUE_MODEL_REGISTRY_ENV, "").strip()
	if not path:
	path = os.environ.get(MODEL_REGISTRY_ENV, "").strip()
	if not path:
	return None

	data = _load_json_file(path)
	if isinstance(data, dict):
	data = data.get("models", data)
	return data


	def score_blue_with_pairwise(
	model_names: Iterable[str],
	*,
	submission_id: str \| None = None,
	submitter: str \| None = None,
	hf_link: str \| None = None,
	) -> tuple[float, list[dict[str, Any]]]:
	"""Score a blue team submission via the deployed Modal backend.

	If HACKATHON_BLUE_MODEL_REGISTRY (or HACKATHON_MODEL_REGISTRY) is set,
	the registry is sent to the backend. Otherwise the backend loads its
	own copy from the Modal volume.

	Blue heldout images are always loaded server-side (secret).
	When submission_id is provided, the backend saves the result to the
	Modal volume for crash recovery.
	"""
	model_registry = _load_blue_model_registry()

	fn = _get_modal_function("score_blue_submission")
	result = fn.remote(
	model_names=list(model_names),
	model_registry=model_registry,
	batch_size=_get_batch_size(),
	submission_id=submission_id,
	submitter=submitter,
	hf_link=hf_link,
	)
	avg_cka = float(result.get("avg_cka", 0.0))
	return avg_cka, list(result.get("pairwise", []))


	def score_red_with_pairwise(
	selected_stimuli: Iterable[dict[str, str] \| str],
	*,
	submission_id: str \| None = None,
	submitter: str \| None = None,
	hf_link: str \| None = None,
	) -> tuple[float, list[dict[str, Any]]]:
	"""Score a red team submission via the deployed Modal backend.

	The red team model registry is always loaded server-side from the
	Modal volume (secret — never sent from the public Space).
	When submission_id is provided, the backend saves the result to the
	Modal volume for crash recovery.
	"""
	stimuli_list: list[dict[str, str]] = []
	for item in selected_stimuli:
	if isinstance(item, str):
	parts = item.split("::", 1)
	if len(parts) == 2:
	stimuli_list.append({"dataset_name": parts[0], "image_identifier": parts[1]})
	else:
	raise ValueError(f"Invalid stimulus key format: {item}")
	else:
	stimuli_list.append(dict(item))

	fn = _get_modal_function("score_red_submission")
	result = fn.remote(
	selected_stimuli=stimuli_list,
	batch_size=_get_batch_size(),
	submission_id=submission_id,
	submitter=submitter,
	hf_link=hf_link,
	)
	score = float(result.get("score", 0.0))
	return score, list(result.get("pairwise", []))


	def fetch_volume_submissions(team: str \| None = None) -> list[dict[str, Any]]:
	"""Fetch submissions saved on the Modal volume.

	Used to sync submissions after a Space restart.
	"""
	fn = _get_modal_function("list_submissions")
	return fn.remote(team=team)


	def push_submissions_to_volume(submissions: list[dict[str, Any]]) -> dict[str, int]:
	"""Push local submissions to the Modal volume.

	Used to backfill the volume after a Modal crash or volume wipe.
	Returns {"added": N, "skipped": M}.
	"""
	fn = _get_modal_function("backfill_submissions")
	return fn.remote(submissions=submissions)