siddsuresh97's picture
Thread hf_link to Modal and require unique submitter names
ebf9b32
from __future__ import annotations
from itertools import combinations
from typing import Iterable
import numpy as np
from src.cka.compute import linear_cka
from src.hackathon.data import get_dummy_model_embeddings, list_dummy_stimuli, resolve_stimulus_indices
from src.hackathon.modal_client import (
is_modal_enabled,
score_blue_with_pairwise as modal_score_blue_with_pairwise,
score_red_with_pairwise as modal_score_red_with_pairwise,
)
def _validate_models(model_names: Iterable[str], embeddings_by_model: dict[str, np.ndarray]) -> list[str]:
names = [name.strip() for name in model_names if name.strip()]
if len(names) < 2:
raise ValueError("Select at least two models.")
if len(names) != len(set(names)):
raise ValueError("Model selections must be unique.")
missing = [name for name in names if name not in embeddings_by_model]
if missing:
missing_str = ", ".join(missing)
raise ValueError(f"Unknown models requested: {missing_str}")
return names
def _format_score(score: float) -> float:
return round(float(score), 4)
def _pairwise_scores(
model_names: Iterable[str],
embeddings_by_model: dict[str, np.ndarray],
) -> tuple[float, list[dict[str, float | str]]]:
scores = []
pairwise: list[dict[str, float | str]] = []
for model_a, model_b in combinations(model_names, 2):
score = linear_cka(embeddings_by_model[model_a], embeddings_by_model[model_b])
scores.append(score)
pairwise.append(
{
"Model A": model_a,
"Model B": model_b,
"CKA": _format_score(score),
}
)
if not scores:
return 0.0, []
return float(np.mean(scores)), pairwise
def score_blue_with_pairwise(
model_names: Iterable[str],
*,
embeddings_by_model: dict[str, np.ndarray] | None = None,
submission_id: str | None = None,
submitter: str | None = None,
hf_link: str | None = None,
) -> tuple[float, list[dict[str, float | str]]]:
if embeddings_by_model is None and is_modal_enabled():
avg_cka, pairwise = modal_score_blue_with_pairwise(
model_names, submission_id=submission_id, submitter=submitter, hf_link=hf_link,
)
formatted = [
{"Model A": item["model_a"], "Model B": item["model_b"], "CKA": _format_score(item["cka"])}
for item in pairwise
]
return float(avg_cka), formatted
if embeddings_by_model is None:
embeddings_by_model = get_dummy_model_embeddings()
model_names = _validate_models(model_names, embeddings_by_model)
avg_cka, pairwise = _pairwise_scores(model_names, embeddings_by_model)
return float(avg_cka), pairwise
def score_blue(
model_names: Iterable[str],
*,
embeddings_by_model: dict[str, np.ndarray] | None = None,
) -> float:
avg_cka, _ = score_blue_with_pairwise(model_names, embeddings_by_model=embeddings_by_model)
return float(avg_cka)
def score_red_with_pairwise(
selected_stimuli: Iterable[dict[str, str] | str],
*,
embeddings_by_model: dict[str, np.ndarray] | None = None,
stimuli_catalog: Iterable[dict[str, str]] | None = None,
submission_id: str | None = None,
submitter: str | None = None,
hf_link: str | None = None,
) -> tuple[float, list[dict[str, float | str]]]:
if embeddings_by_model is None and is_modal_enabled():
score, pairwise = modal_score_red_with_pairwise(
selected_stimuli, submission_id=submission_id, submitter=submitter, hf_link=hf_link,
)
formatted = [
{"Model A": item["model_a"], "Model B": item["model_b"], "CKA": _format_score(item["cka"])}
for item in pairwise
]
return float(score), formatted
if embeddings_by_model is None:
embeddings_by_model = get_dummy_model_embeddings()
if stimuli_catalog is None:
stimuli_catalog = list_dummy_stimuli()
model_names = _validate_models(embeddings_by_model.keys(), embeddings_by_model)
stimulus_indices = resolve_stimulus_indices(selected_stimuli, stimuli_catalog)
if len(stimulus_indices) < 2:
raise ValueError("Select at least two stimuli.")
filtered = {name: embeddings_by_model[name][stimulus_indices] for name in model_names}
avg_cka, pairwise = _pairwise_scores(model_names, filtered)
return float(1.0 - avg_cka), pairwise
def score_red(
selected_stimuli: Iterable[dict[str, str] | str],
*,
embeddings_by_model: dict[str, np.ndarray] | None = None,
stimuli_catalog: Iterable[dict[str, str]] | None = None,
) -> float:
score, _ = score_red_with_pairwise(
selected_stimuli,
embeddings_by_model=embeddings_by_model,
stimuli_catalog=stimuli_catalog,
)
return float(score)