Spaces:

riyasuryawanshi746
/

Major_Project

Sleeping

File size: 7,083 Bytes

# inference.py
# v5.4 — Confidence calibration fixed.
# Root cause of "Neural=0.993, Confidence=LOW 21%" bug:
#   The agreement factor penalized high-neural / zero-symbolic as "disagreement",
#   but this is a legitimate state (neural model is certain; no rules triggered).
#   Fix: agreement is now only computed between the two scores when BOTH are
#   non-trivial (> 0.05). When symbolic is near zero, we treat the neural score
#   alone as the evidence and give a neutral agreement factor of 0.70 rather
#   than nearly 0.

from __future__ import annotations

IP_CLAUSE_TYPES = {
    "IP Ownership Assignment", "Joint IP Ownership",
    "Irrevocable Or Perpetual License",
    "Unlimited/All-You-Can-Eat-License", "Source Code Escrow",
}

# Rule -> feature dependencies (unchanged)
RULE_FEATURE_DEPS = {
    "ICA_001": ["has_liability_cap", "excludes_gross_negligence"],
    "ICA_002": ["unilateral_termination", "notice_period_defined"],
    "ICA_003": ["non_compete_years"],
    "ICA_004": ["has_liquidated_damages", "damages_exceed_loss"],
    "ICA_005": ["is_wagering_clause"],
    "ICA_006": ["restrains_legal_proceedings"],
    "ICA_007": ["has_indemnity_clause", "indemnity_capped", "has_uncapped_signal"],
    "ICA_008": ["has_auto_renewal", "has_opt_out_window"],
    "ICA_009": ["has_arbitration", "arbitration_distant_venue"],
    "ICA_010": ["has_exclusivity", "exclusivity_term_defined"],
    "ICA_011": ["unilateral_price_change"],
    "DPDPA_001": ["processes_personal_data", "has_data_retention_clause"],
    "DPDPA_002": ["assigns_all_ip", "includes_pre_existing_ip"],
    "DPDPA_003": ["processes_sensitive_data", "has_consent_clause"],
    "DPDPA_004": ["processes_personal_data", "has_breach_notification"],
    "ITA_001":   ["handles_digital_data", "has_security_clause"],
    "CPA_001":   ["is_consumer_contract", "has_one_sided_clause"],
}

# ── Risk-level thresholds (single source of truth) ──────────────────────────
RISK_LOW_MAX    = 0.50    # < 0.50 → Low
RISK_MEDIUM_MAX = 0.80    # 0.50–0.80 → Medium; > 0.80 → High

# Threshold below which a score is considered "near zero" for agreement logic
_TRIVIAL_SCORE = 0.05


def level_from_score(score: float) -> tuple[str, str]:
    """Return (level_label, emoji) for a fused score under the v5.4 thresholds."""
    if score < RISK_LOW_MAX:
        return "Low",    "🟢"
    if score <= RISK_MEDIUM_MAX:
        return "Medium", "🟡"
    return "High", "🔴"


def _symbolic_rule_score(features: dict, symbolic_rules: list) -> dict:
    """Evaluate symbolic rules. Score is clamped to [0, 1]."""
    triggered, total = [], 0.0
    for rule in symbolic_rules:
        try:
            if rule["condition"](features):
                triggered.append(rule)
                total += rule["penalty"]
        except Exception:
            pass
    return {
        "symbolic_score":  round(min(total, 1.0), 3),
        "triggered_rules": triggered,
    }


def _neuro_symbolic_fusion(

    neural: float,

    symbolic: float,

    is_ip_clause: bool = False,

) -> dict:
    """

    Weighted fusion — neural-dominant by design.

    No artificial floor: a weak symbolic trigger no longer inflates risk.

    """
    if is_ip_clause and symbolic > 0:
        w_n, w_s = 0.60, 0.40
    else:
        w_n, w_s = 0.75, 0.25

    raw   = w_n * neural + w_s * symbolic
    score = round(min(max(raw, 0.0), 1.0), 3)

    level, emoji = level_from_score(score)

    formula = (
        f"({w_n:.2f} × {neural:.3f}) + ({w_s:.2f} × {symbolic:.3f}) "
        f"= {round(raw, 3)}"
    )

    return {
        "score":     score,
        "level":     level,
        "emoji":     emoji,
        "breakdown": {
            "neural_score":   round(neural, 3),
            "symbolic_score": round(symbolic, 3),
            "weights":        {"neural": w_n, "symbolic": w_s},
            "raw_fused":      round(raw, 3),
            "floor_applied":  False,
            "final":          score,
            "formula":        formula,
        },
    }


def _compute_confidence(

    neural: float,

    symbolic: float,

    fused: float,

    num_triggered: int,

    neural_loaded: bool = True,

) -> dict:
    """

    Three-factor confidence calibrated for v5.4.



    boundary_dist  – distance from the nearest risk-level boundary (0.50, 0.80).

                     A score far from any boundary is a clear-cut decision.

    agreement      – alignment between neural and symbolic signals.

                     FIX v5.4: when symbolic is near-zero (no rules triggered),

                     we do NOT treat this as "disagreement". High neural + no

                     symbolic rules is a perfectly consistent, informative state.

                     Agreement is only penalised when BOTH scores are non-trivial

                     and they point in opposite directions.

    rule_strength  – more triggered rules ⇒ stronger deterministic evidence.

    """
    # Factor 1: distance from nearest risk boundary
    boundary_dist = min(abs(fused - RISK_LOW_MAX), abs(fused - RISK_MEDIUM_MAX))
    dist_factor   = min(boundary_dist / 0.20, 1.0)

    # Factor 2: agreement (FIXED)
    if not neural_loaded:
        # No neural signal at all — moderate confidence
        agree_factor = 0.50
    elif symbolic <= _TRIVIAL_SCORE:
        # Symbolic is near-zero: no rules fired. Neural is the only signal.
        # This is NOT disagreement — treat as a confident neural-only verdict.
        # Scale agreement by how decisive the neural score is:
        #   neural close to 0 or 1  → high confidence (0.80)
        #   neural near 0.50 (borderline) → lower confidence (0.50)
        neural_decisiveness = abs(neural - 0.50) / 0.50   # 0 at boundary, 1 at extremes
        agree_factor = 0.50 + 0.30 * neural_decisiveness   # range [0.50, 0.80]
    elif neural <= _TRIVIAL_SCORE:
        # Neural is near-zero: symbolic rules fired but model disagrees.
        # Genuine disagreement → low agreement factor.
        agree_factor = 0.30
    else:
        # Both signals are non-trivial: measure actual divergence.
        agree_factor = 1.0 - min(abs(neural - symbolic), 1.0)

    # Factor 3: rule strength
    if   num_triggered == 0: rule_factor = 0.40
    elif num_triggered == 1: rule_factor = 0.70
    else:                    rule_factor = min(0.70 + 0.10 * (num_triggered - 1), 1.0)

    score = 0.40 * dist_factor + 0.35 * agree_factor + 0.25 * rule_factor
    score = round(score, 3)

    if   score >= 0.65: level = "High"
    elif score >= 0.40: level = "Medium"
    else:               level = "Low"

    return {
        "level":   level,
        "score":   score,
        "factors": {
            "boundary_dist": round(boundary_dist, 3),
            "agreement":     round(agree_factor, 3),
            "rule_strength": round(rule_factor, 3),
        },
    }