Spaces:

dexakif
/

clienttarget-python

Running

File size: 5,477 Bytes

bd28470

"""
Hallucination Guard v2 — Grounded Verification

Old approach: "Ask LLM for confidence" → LLM grades own exam → useless
New approach: Cross-reference every claim against evidence → real verification

Every LLM output field is checked:
- Employee count → matches scraped data?
- Industry → matches detected industry?
- AI readiness "high" → do we actually have AI job postings?
- PII in output → strip immediately
"""

import re
import logging

logger = logging.getLogger(__name__)


def validate_profile_grounded(profile: dict, evidence: dict) -> dict:
    """
    Cross-check profile output against evidence.
    Returns grounding result with corrections.
    """
    verified = []
    unverified = []
    corrections = {}

    # ── Employee count ────────────────────────────────────────
    summary = str(profile.get("profile_summary", ""))
    known_emp = evidence.get("employee_count")
    
    emp_match = re.search(r'(\d[\d,]+)\s*(employees?|people|staff)', summary, re.I)
    if emp_match and known_emp:
        claimed = int(emp_match.group(1).replace(",", ""))
        if abs(claimed - known_emp) > known_emp * 0.3:
            corrections["employee_count"] = {"claimed": claimed, "actual": known_emp}
            verified.append("employee_count_corrected")
        else:
            verified.append("employee_count_accurate")

    # ── AI readiness vs actual signals ────────────────────────
    claimed_readiness = profile.get("ai_readiness", "")
    ai_jobs = evidence.get("ai_job_count", 0)
    tech_stack = evidence.get("tech_stack", [])

    if claimed_readiness == "high" and ai_jobs == 0 and len(tech_stack) == 0:
        corrections["ai_readiness"] = {"claimed": "high", "actual": "low"}
        verified.append("ai_readiness_corrected")
    elif claimed_readiness == "low" and ai_jobs >= 3:
        corrections["ai_readiness"] = {"claimed": "low", "actual": "high"}
        verified.append("ai_readiness_corrected")
    else:
        verified.append("ai_readiness_plausible")

    # ── Company name in summary ───────────────────────────────
    known_name = evidence.get("name", "")
    if known_name and len(known_name) > 3:
        name_words = known_name.lower().split()
        summary_lower = summary.lower()
        if any(w in summary_lower for w in name_words if len(w) > 2):
            verified.append("company_name_present")
        else:
            unverified.append("company_name_may_differ")

    # ── Evidence claims ───────────────────────────────────────
    evidence_used = profile.get("evidence_used", [])
    if isinstance(evidence_used, list):
        all_evidence_text = " ".join([
            str(evidence.get("website_text", "")),
            " ".join(evidence.get("tech_stack", [])),
            " ".join(evidence.get("pain_signals", [])),
            str(evidence.get("description", "")),
        ]).lower()

        for claim in evidence_used:
            claim_words = str(claim).lower().split()[:4]
            if any(w in all_evidence_text for w in claim_words if len(w) > 3):
                verified.append(f"evidence_grounded: {str(claim)[:30]}")
            else:
                unverified.append(f"evidence_unverifiable: {str(claim)[:30]}")

    # ── PII check ─────────────────────────────────────────────
    output_str = str(profile)
    email_found = re.search(r'[\w.+-]+@[\w-]+\.[a-z]{2,}', output_str)
    phone_found = re.search(r'\+?\d[\d\s\-().]{8,}', output_str)

    if email_found:
        unverified.append("pii_email_in_output")
    if phone_found:
        unverified.append("pii_phone_in_output")

    # ── Grounding score ───────────────────────────────────────
    total = len(verified) + len(unverified)
    grounding_score = len(verified) / total if total > 0 else 0.5

    result = {
        "is_grounded": grounding_score >= 0.6,
        "grounding_score": round(grounding_score, 2),
        "verified_claims": verified,
        "unverified_claims": unverified,
        "corrections": corrections,
    }

    if not result["is_grounded"]:
        logger.warning(f"Profile failed grounding: score={grounding_score:.2f}, corrections={len(corrections)}")

    return result


def validate_score_grounded(score: dict, profile: dict) -> dict:
    """Validate scoring output for consistency."""
    issues = []

    total = score.get("total_score", -1)
    if not (0 <= total <= 100):
        issues.append(f"invalid_total_score:{total}")

    tier = score.get("tier")
    if tier not in ("hot", "warm", "nurture", "archive"):
        issues.append(f"invalid_tier:{tier}")

    # Cross-check tier vs score
    expected_tier = (
        "hot" if total >= 85 else
        "warm" if total >= 70 else
        "nurture" if total >= 50 else
        "archive"
    )
    if tier != expected_tier:
        issues.append(f"tier_score_mismatch: score={total} tier={tier} expected={expected_tier}")
        score["tier"] = expected_tier  # auto-correct

    return {
        "is_valid": len(issues) == 0,
        "issues": issues,
    }