File size: 1,676 Bytes
2a64ad4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
from typing import Set
def jaccard_index(set_a, set_b):
intersection = len(set_a.intersection(set_b))
union = len(set_a.union(set_b))
if union == 0:
return 0.0
return intersection / union
def fidelity_metric(commitments_a, commitments_b):
return jaccard_index(set(commitments_a), set(commitments_b))
def jaccard(a: Set[str], b: Set[str]) -> float:
if not a and not b:
return 1.0
if not a or not b:
return 0.0
inter = len(a & b)
uni = len(a | b)
return inter / uni
def fid_hard(base: Set[str], comp: Set[str]):
return jaccard(base, comp)
def delta_hard(base: Set[str], cyc: Set[str]):
return 1.0 - jaccard(base, cyc)
def hybrid_fidelity(base_set: Set[str], comp_set: Set[str]) -> float:
"""
Hybrid fidelity: Jaccard on exact match, fallback to semantic similarity.
Smooths binary 0/1 behavior for better visualization.
"""
if not base_set:
return 0.0
# Try exact Jaccard first
jacc = jaccard(base_set, comp_set)
if jacc > 0:
return jacc
# Fallback: if Jaccard is 0, use partial string matching as soft similarity
if not comp_set:
return 0.0
# Simple soft similarity: measure word overlap
base_words = set()
comp_words = set()
for s in base_set:
base_words.update(s.lower().split())
for s in comp_set:
comp_words.update(s.lower().split())
word_overlap = len(base_words & comp_words)
word_union = len(base_words | comp_words)
soft_sim = word_overlap / word_union if word_union > 0 else 0.0
return soft_sim * 0.5 # Weight soft similarity lower than exact match |