File size: 7,083 Bytes
28db5b3
6893de4
 
 
 
 
 
 
 
28db5b3
 
 
 
 
 
 
 
 
6893de4
28db5b3
 
 
 
 
 
 
af910e9
28db5b3
 
 
 
 
 
 
 
 
 
 
 
af910e9
 
 
 
6893de4
 
 
af910e9
 
6893de4
af910e9
 
 
 
 
 
28db5b3
 
af910e9
28db5b3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
af910e9
 
28db5b3
 
 
af910e9
 
28db5b3
 
af910e9
28db5b3
af910e9
28db5b3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6893de4
28db5b3
 
 
 
 
 
 
 
 
 
 
 
 
 
6893de4
 
 
 
 
 
 
 
 
 
 
28db5b3
6893de4
af910e9
 
28db5b3
6893de4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28db5b3
6893de4
 
28db5b3
6893de4
28db5b3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
# inference.py
# v5.4 β€” Confidence calibration fixed.
# Root cause of "Neural=0.993, Confidence=LOW 21%" bug:
#   The agreement factor penalized high-neural / zero-symbolic as "disagreement",
#   but this is a legitimate state (neural model is certain; no rules triggered).
#   Fix: agreement is now only computed between the two scores when BOTH are
#   non-trivial (> 0.05). When symbolic is near zero, we treat the neural score
#   alone as the evidence and give a neutral agreement factor of 0.70 rather
#   than nearly 0.

from __future__ import annotations

IP_CLAUSE_TYPES = {
    "IP Ownership Assignment", "Joint IP Ownership",
    "Irrevocable Or Perpetual License",
    "Unlimited/All-You-Can-Eat-License", "Source Code Escrow",
}

# Rule -> feature dependencies (unchanged)
RULE_FEATURE_DEPS = {
    "ICA_001": ["has_liability_cap", "excludes_gross_negligence"],
    "ICA_002": ["unilateral_termination", "notice_period_defined"],
    "ICA_003": ["non_compete_years"],
    "ICA_004": ["has_liquidated_damages", "damages_exceed_loss"],
    "ICA_005": ["is_wagering_clause"],
    "ICA_006": ["restrains_legal_proceedings"],
    "ICA_007": ["has_indemnity_clause", "indemnity_capped", "has_uncapped_signal"],
    "ICA_008": ["has_auto_renewal", "has_opt_out_window"],
    "ICA_009": ["has_arbitration", "arbitration_distant_venue"],
    "ICA_010": ["has_exclusivity", "exclusivity_term_defined"],
    "ICA_011": ["unilateral_price_change"],
    "DPDPA_001": ["processes_personal_data", "has_data_retention_clause"],
    "DPDPA_002": ["assigns_all_ip", "includes_pre_existing_ip"],
    "DPDPA_003": ["processes_sensitive_data", "has_consent_clause"],
    "DPDPA_004": ["processes_personal_data", "has_breach_notification"],
    "ITA_001":   ["handles_digital_data", "has_security_clause"],
    "CPA_001":   ["is_consumer_contract", "has_one_sided_clause"],
}

# ── Risk-level thresholds (single source of truth) ──────────────────────────
RISK_LOW_MAX    = 0.50    # < 0.50 β†’ Low
RISK_MEDIUM_MAX = 0.80    # 0.50–0.80 β†’ Medium; > 0.80 β†’ High

# Threshold below which a score is considered "near zero" for agreement logic
_TRIVIAL_SCORE = 0.05


def level_from_score(score: float) -> tuple[str, str]:
    """Return (level_label, emoji) for a fused score under the v5.4 thresholds."""
    if score < RISK_LOW_MAX:
        return "Low",    "🟒"
    if score <= RISK_MEDIUM_MAX:
        return "Medium", "🟑"
    return "High", "πŸ”΄"


def _symbolic_rule_score(features: dict, symbolic_rules: list) -> dict:
    """Evaluate symbolic rules. Score is clamped to [0, 1]."""
    triggered, total = [], 0.0
    for rule in symbolic_rules:
        try:
            if rule["condition"](features):
                triggered.append(rule)
                total += rule["penalty"]
        except Exception:
            pass
    return {
        "symbolic_score":  round(min(total, 1.0), 3),
        "triggered_rules": triggered,
    }


def _neuro_symbolic_fusion(

    neural: float,

    symbolic: float,

    is_ip_clause: bool = False,

) -> dict:
    """

    Weighted fusion β€” neural-dominant by design.

    No artificial floor: a weak symbolic trigger no longer inflates risk.

    """
    if is_ip_clause and symbolic > 0:
        w_n, w_s = 0.60, 0.40
    else:
        w_n, w_s = 0.75, 0.25

    raw   = w_n * neural + w_s * symbolic
    score = round(min(max(raw, 0.0), 1.0), 3)

    level, emoji = level_from_score(score)

    formula = (
        f"({w_n:.2f} Γ— {neural:.3f}) + ({w_s:.2f} Γ— {symbolic:.3f}) "
        f"= {round(raw, 3)}"
    )

    return {
        "score":     score,
        "level":     level,
        "emoji":     emoji,
        "breakdown": {
            "neural_score":   round(neural, 3),
            "symbolic_score": round(symbolic, 3),
            "weights":        {"neural": w_n, "symbolic": w_s},
            "raw_fused":      round(raw, 3),
            "floor_applied":  False,
            "final":          score,
            "formula":        formula,
        },
    }


def _compute_confidence(

    neural: float,

    symbolic: float,

    fused: float,

    num_triggered: int,

    neural_loaded: bool = True,

) -> dict:
    """

    Three-factor confidence calibrated for v5.4.



    boundary_dist  – distance from the nearest risk-level boundary (0.50, 0.80).

                     A score far from any boundary is a clear-cut decision.

    agreement      – alignment between neural and symbolic signals.

                     FIX v5.4: when symbolic is near-zero (no rules triggered),

                     we do NOT treat this as "disagreement". High neural + no

                     symbolic rules is a perfectly consistent, informative state.

                     Agreement is only penalised when BOTH scores are non-trivial

                     and they point in opposite directions.

    rule_strength  – more triggered rules β‡’ stronger deterministic evidence.

    """
    # Factor 1: distance from nearest risk boundary
    boundary_dist = min(abs(fused - RISK_LOW_MAX), abs(fused - RISK_MEDIUM_MAX))
    dist_factor   = min(boundary_dist / 0.20, 1.0)

    # Factor 2: agreement (FIXED)
    if not neural_loaded:
        # No neural signal at all β€” moderate confidence
        agree_factor = 0.50
    elif symbolic <= _TRIVIAL_SCORE:
        # Symbolic is near-zero: no rules fired. Neural is the only signal.
        # This is NOT disagreement β€” treat as a confident neural-only verdict.
        # Scale agreement by how decisive the neural score is:
        #   neural close to 0 or 1  β†’ high confidence (0.80)
        #   neural near 0.50 (borderline) β†’ lower confidence (0.50)
        neural_decisiveness = abs(neural - 0.50) / 0.50   # 0 at boundary, 1 at extremes
        agree_factor = 0.50 + 0.30 * neural_decisiveness   # range [0.50, 0.80]
    elif neural <= _TRIVIAL_SCORE:
        # Neural is near-zero: symbolic rules fired but model disagrees.
        # Genuine disagreement β†’ low agreement factor.
        agree_factor = 0.30
    else:
        # Both signals are non-trivial: measure actual divergence.
        agree_factor = 1.0 - min(abs(neural - symbolic), 1.0)

    # Factor 3: rule strength
    if   num_triggered == 0: rule_factor = 0.40
    elif num_triggered == 1: rule_factor = 0.70
    else:                    rule_factor = min(0.70 + 0.10 * (num_triggered - 1), 1.0)

    score = 0.40 * dist_factor + 0.35 * agree_factor + 0.25 * rule_factor
    score = round(score, 3)

    if   score >= 0.65: level = "High"
    elif score >= 0.40: level = "Medium"
    else:               level = "Low"

    return {
        "level":   level,
        "score":   score,
        "factors": {
            "boundary_dist": round(boundary_dist, 3),
            "agreement":     round(agree_factor, 3),
            "rule_strength": round(rule_factor, 3),
        },
    }