Spaces:
Sleeping
Sleeping
File size: 7,083 Bytes
28db5b3 6893de4 28db5b3 6893de4 28db5b3 af910e9 28db5b3 af910e9 6893de4 af910e9 6893de4 af910e9 28db5b3 af910e9 28db5b3 af910e9 28db5b3 af910e9 28db5b3 af910e9 28db5b3 af910e9 28db5b3 6893de4 28db5b3 6893de4 28db5b3 6893de4 af910e9 28db5b3 6893de4 28db5b3 6893de4 28db5b3 6893de4 28db5b3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 | # inference.py
# v5.4 β Confidence calibration fixed.
# Root cause of "Neural=0.993, Confidence=LOW 21%" bug:
# The agreement factor penalized high-neural / zero-symbolic as "disagreement",
# but this is a legitimate state (neural model is certain; no rules triggered).
# Fix: agreement is now only computed between the two scores when BOTH are
# non-trivial (> 0.05). When symbolic is near zero, we treat the neural score
# alone as the evidence and give a neutral agreement factor of 0.70 rather
# than nearly 0.
from __future__ import annotations
IP_CLAUSE_TYPES = {
"IP Ownership Assignment", "Joint IP Ownership",
"Irrevocable Or Perpetual License",
"Unlimited/All-You-Can-Eat-License", "Source Code Escrow",
}
# Rule -> feature dependencies (unchanged)
RULE_FEATURE_DEPS = {
"ICA_001": ["has_liability_cap", "excludes_gross_negligence"],
"ICA_002": ["unilateral_termination", "notice_period_defined"],
"ICA_003": ["non_compete_years"],
"ICA_004": ["has_liquidated_damages", "damages_exceed_loss"],
"ICA_005": ["is_wagering_clause"],
"ICA_006": ["restrains_legal_proceedings"],
"ICA_007": ["has_indemnity_clause", "indemnity_capped", "has_uncapped_signal"],
"ICA_008": ["has_auto_renewal", "has_opt_out_window"],
"ICA_009": ["has_arbitration", "arbitration_distant_venue"],
"ICA_010": ["has_exclusivity", "exclusivity_term_defined"],
"ICA_011": ["unilateral_price_change"],
"DPDPA_001": ["processes_personal_data", "has_data_retention_clause"],
"DPDPA_002": ["assigns_all_ip", "includes_pre_existing_ip"],
"DPDPA_003": ["processes_sensitive_data", "has_consent_clause"],
"DPDPA_004": ["processes_personal_data", "has_breach_notification"],
"ITA_001": ["handles_digital_data", "has_security_clause"],
"CPA_001": ["is_consumer_contract", "has_one_sided_clause"],
}
# ββ Risk-level thresholds (single source of truth) ββββββββββββββββββββββββββ
RISK_LOW_MAX = 0.50 # < 0.50 β Low
RISK_MEDIUM_MAX = 0.80 # 0.50β0.80 β Medium; > 0.80 β High
# Threshold below which a score is considered "near zero" for agreement logic
_TRIVIAL_SCORE = 0.05
def level_from_score(score: float) -> tuple[str, str]:
"""Return (level_label, emoji) for a fused score under the v5.4 thresholds."""
if score < RISK_LOW_MAX:
return "Low", "π’"
if score <= RISK_MEDIUM_MAX:
return "Medium", "π‘"
return "High", "π΄"
def _symbolic_rule_score(features: dict, symbolic_rules: list) -> dict:
"""Evaluate symbolic rules. Score is clamped to [0, 1]."""
triggered, total = [], 0.0
for rule in symbolic_rules:
try:
if rule["condition"](features):
triggered.append(rule)
total += rule["penalty"]
except Exception:
pass
return {
"symbolic_score": round(min(total, 1.0), 3),
"triggered_rules": triggered,
}
def _neuro_symbolic_fusion(
neural: float,
symbolic: float,
is_ip_clause: bool = False,
) -> dict:
"""
Weighted fusion β neural-dominant by design.
No artificial floor: a weak symbolic trigger no longer inflates risk.
"""
if is_ip_clause and symbolic > 0:
w_n, w_s = 0.60, 0.40
else:
w_n, w_s = 0.75, 0.25
raw = w_n * neural + w_s * symbolic
score = round(min(max(raw, 0.0), 1.0), 3)
level, emoji = level_from_score(score)
formula = (
f"({w_n:.2f} Γ {neural:.3f}) + ({w_s:.2f} Γ {symbolic:.3f}) "
f"= {round(raw, 3)}"
)
return {
"score": score,
"level": level,
"emoji": emoji,
"breakdown": {
"neural_score": round(neural, 3),
"symbolic_score": round(symbolic, 3),
"weights": {"neural": w_n, "symbolic": w_s},
"raw_fused": round(raw, 3),
"floor_applied": False,
"final": score,
"formula": formula,
},
}
def _compute_confidence(
neural: float,
symbolic: float,
fused: float,
num_triggered: int,
neural_loaded: bool = True,
) -> dict:
"""
Three-factor confidence calibrated for v5.4.
boundary_dist β distance from the nearest risk-level boundary (0.50, 0.80).
A score far from any boundary is a clear-cut decision.
agreement β alignment between neural and symbolic signals.
FIX v5.4: when symbolic is near-zero (no rules triggered),
we do NOT treat this as "disagreement". High neural + no
symbolic rules is a perfectly consistent, informative state.
Agreement is only penalised when BOTH scores are non-trivial
and they point in opposite directions.
rule_strength β more triggered rules β stronger deterministic evidence.
"""
# Factor 1: distance from nearest risk boundary
boundary_dist = min(abs(fused - RISK_LOW_MAX), abs(fused - RISK_MEDIUM_MAX))
dist_factor = min(boundary_dist / 0.20, 1.0)
# Factor 2: agreement (FIXED)
if not neural_loaded:
# No neural signal at all β moderate confidence
agree_factor = 0.50
elif symbolic <= _TRIVIAL_SCORE:
# Symbolic is near-zero: no rules fired. Neural is the only signal.
# This is NOT disagreement β treat as a confident neural-only verdict.
# Scale agreement by how decisive the neural score is:
# neural close to 0 or 1 β high confidence (0.80)
# neural near 0.50 (borderline) β lower confidence (0.50)
neural_decisiveness = abs(neural - 0.50) / 0.50 # 0 at boundary, 1 at extremes
agree_factor = 0.50 + 0.30 * neural_decisiveness # range [0.50, 0.80]
elif neural <= _TRIVIAL_SCORE:
# Neural is near-zero: symbolic rules fired but model disagrees.
# Genuine disagreement β low agreement factor.
agree_factor = 0.30
else:
# Both signals are non-trivial: measure actual divergence.
agree_factor = 1.0 - min(abs(neural - symbolic), 1.0)
# Factor 3: rule strength
if num_triggered == 0: rule_factor = 0.40
elif num_triggered == 1: rule_factor = 0.70
else: rule_factor = min(0.70 + 0.10 * (num_triggered - 1), 1.0)
score = 0.40 * dist_factor + 0.35 * agree_factor + 0.25 * rule_factor
score = round(score, 3)
if score >= 0.65: level = "High"
elif score >= 0.40: level = "Medium"
else: level = "Low"
return {
"level": level,
"score": score,
"factors": {
"boundary_dist": round(boundary_dist, 3),
"agreement": round(agree_factor, 3),
"rule_strength": round(rule_factor, 3),
},
} |