File size: 1,676 Bytes
2a64ad4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
from typing import Set

def jaccard_index(set_a, set_b):
    intersection = len(set_a.intersection(set_b))
    union = len(set_a.union(set_b))
    if union == 0:
        return 0.0
    return intersection / union

def fidelity_metric(commitments_a, commitments_b):
    return jaccard_index(set(commitments_a), set(commitments_b))

def jaccard(a: Set[str], b: Set[str]) -> float:
    if not a and not b:
        return 1.0
    if not a or not b:
        return 0.0
    inter = len(a & b)
    uni = len(a | b)
    return inter / uni

def fid_hard(base: Set[str], comp: Set[str]):
    return jaccard(base, comp)

def delta_hard(base: Set[str], cyc: Set[str]):
    return 1.0 - jaccard(base, cyc)

def hybrid_fidelity(base_set: Set[str], comp_set: Set[str]) -> float:
    """
    Hybrid fidelity: Jaccard on exact match, fallback to semantic similarity.
    Smooths binary 0/1 behavior for better visualization.
    """
    if not base_set:
        return 0.0
    
    # Try exact Jaccard first
    jacc = jaccard(base_set, comp_set)
    if jacc > 0:
        return jacc
    
    # Fallback: if Jaccard is 0, use partial string matching as soft similarity
    if not comp_set:
        return 0.0
    
    # Simple soft similarity: measure word overlap
    base_words = set()
    comp_words = set()
    for s in base_set:
        base_words.update(s.lower().split())
    for s in comp_set:
        comp_words.update(s.lower().split())
    
    word_overlap = len(base_words & comp_words)
    word_union = len(base_words | comp_words)
    
    soft_sim = word_overlap / word_union if word_union > 0 else 0.0
    return soft_sim * 0.5  # Weight soft similarity lower than exact match