|
|
from typing import Set |
|
|
|
|
|
def jaccard_index(set_a, set_b): |
|
|
intersection = len(set_a.intersection(set_b)) |
|
|
union = len(set_a.union(set_b)) |
|
|
if union == 0: |
|
|
return 0.0 |
|
|
return intersection / union |
|
|
|
|
|
def fidelity_metric(commitments_a, commitments_b): |
|
|
return jaccard_index(set(commitments_a), set(commitments_b)) |
|
|
|
|
|
def jaccard(a: Set[str], b: Set[str]) -> float: |
|
|
if not a and not b: |
|
|
return 1.0 |
|
|
if not a or not b: |
|
|
return 0.0 |
|
|
inter = len(a & b) |
|
|
uni = len(a | b) |
|
|
return inter / uni |
|
|
|
|
|
def fid_hard(base: Set[str], comp: Set[str]): |
|
|
return jaccard(base, comp) |
|
|
|
|
|
def delta_hard(base: Set[str], cyc: Set[str]): |
|
|
return 1.0 - jaccard(base, cyc) |
|
|
|
|
|
def hybrid_fidelity(base_set: Set[str], comp_set: Set[str]) -> float: |
|
|
""" |
|
|
Hybrid fidelity: Jaccard on exact match, fallback to semantic similarity. |
|
|
Smooths binary 0/1 behavior for better visualization. |
|
|
""" |
|
|
if not base_set: |
|
|
return 0.0 |
|
|
|
|
|
|
|
|
jacc = jaccard(base_set, comp_set) |
|
|
if jacc > 0: |
|
|
return jacc |
|
|
|
|
|
|
|
|
if not comp_set: |
|
|
return 0.0 |
|
|
|
|
|
|
|
|
base_words = set() |
|
|
comp_words = set() |
|
|
for s in base_set: |
|
|
base_words.update(s.lower().split()) |
|
|
for s in comp_set: |
|
|
comp_words.update(s.lower().split()) |
|
|
|
|
|
word_overlap = len(base_words & comp_words) |
|
|
word_union = len(base_words | comp_words) |
|
|
|
|
|
soft_sim = word_overlap / word_union if word_union > 0 else 0.0 |
|
|
return soft_sim * 0.5 |