Spaces:
Paused
Paused
| """ | |
| Multi-label IAA metrics for schemas where each annotator can select a set of labels | |
| per item (e.g., multiselect, hierarchical_multiselect, card_sort). | |
| Provides MASI distance, Jaccard distance, and pairwise alpha-MASI. | |
| """ | |
| from __future__ import annotations | |
| from typing import Dict, Iterable, Sequence | |
| from potato.server_utils.iaa.alpha import krippendorff_alpha, _masi_distance | |
| def jaccard_distance(set_a: Iterable, set_b: Iterable) -> float: | |
| a = frozenset(set_a) | |
| b = frozenset(set_b) | |
| if not a and not b: | |
| return 0.0 | |
| union = a | b | |
| if not union: | |
| return 0.0 | |
| return 1.0 - len(a & b) / len(union) | |
| def masi_distance(set_a: Iterable, set_b: Iterable) -> float: | |
| return _masi_distance(set_a, set_b) | |
| def mean_jaccard(label_sets_by_user: Dict[str, Sequence[Iterable]]) -> float: | |
| """Average pairwise (1 - Jaccard distance) across users and items.""" | |
| users = list(label_sets_by_user) | |
| if len(users) < 2: | |
| return float("nan") | |
| sims = [] | |
| for i in range(len(users)): | |
| a = list(label_sets_by_user[users[i]]) | |
| for j in range(i + 1, len(users)): | |
| b = list(label_sets_by_user[users[j]]) | |
| m = min(len(a), len(b)) | |
| if m == 0: | |
| continue | |
| for k in range(m): | |
| sims.append(1.0 - jaccard_distance(a[k], b[k])) | |
| if not sims: | |
| return float("nan") | |
| return sum(sims) / len(sims) | |
| def alpha_masi(long_format_sets) -> float: | |
| """Krippendorff's alpha with MASI distance on multi-label sets.""" | |
| return krippendorff_alpha(long_format_sets, level="masi") | |