File size: 5,477 Bytes
bd28470
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
"""
Hallucination Guard v2 β€” Grounded Verification

Old approach: "Ask LLM for confidence" β†’ LLM grades own exam β†’ useless
New approach: Cross-reference every claim against evidence β†’ real verification

Every LLM output field is checked:
- Employee count β†’ matches scraped data?
- Industry β†’ matches detected industry?
- AI readiness "high" β†’ do we actually have AI job postings?
- PII in output β†’ strip immediately
"""

import re
import logging

logger = logging.getLogger(__name__)


def validate_profile_grounded(profile: dict, evidence: dict) -> dict:
    """
    Cross-check profile output against evidence.
    Returns grounding result with corrections.
    """
    verified = []
    unverified = []
    corrections = {}

    # ── Employee count ────────────────────────────────────────
    summary = str(profile.get("profile_summary", ""))
    known_emp = evidence.get("employee_count")
    
    emp_match = re.search(r'(\d[\d,]+)\s*(employees?|people|staff)', summary, re.I)
    if emp_match and known_emp:
        claimed = int(emp_match.group(1).replace(",", ""))
        if abs(claimed - known_emp) > known_emp * 0.3:
            corrections["employee_count"] = {"claimed": claimed, "actual": known_emp}
            verified.append("employee_count_corrected")
        else:
            verified.append("employee_count_accurate")

    # ── AI readiness vs actual signals ────────────────────────
    claimed_readiness = profile.get("ai_readiness", "")
    ai_jobs = evidence.get("ai_job_count", 0)
    tech_stack = evidence.get("tech_stack", [])

    if claimed_readiness == "high" and ai_jobs == 0 and len(tech_stack) == 0:
        corrections["ai_readiness"] = {"claimed": "high", "actual": "low"}
        verified.append("ai_readiness_corrected")
    elif claimed_readiness == "low" and ai_jobs >= 3:
        corrections["ai_readiness"] = {"claimed": "low", "actual": "high"}
        verified.append("ai_readiness_corrected")
    else:
        verified.append("ai_readiness_plausible")

    # ── Company name in summary ───────────────────────────────
    known_name = evidence.get("name", "")
    if known_name and len(known_name) > 3:
        name_words = known_name.lower().split()
        summary_lower = summary.lower()
        if any(w in summary_lower for w in name_words if len(w) > 2):
            verified.append("company_name_present")
        else:
            unverified.append("company_name_may_differ")

    # ── Evidence claims ───────────────────────────────────────
    evidence_used = profile.get("evidence_used", [])
    if isinstance(evidence_used, list):
        all_evidence_text = " ".join([
            str(evidence.get("website_text", "")),
            " ".join(evidence.get("tech_stack", [])),
            " ".join(evidence.get("pain_signals", [])),
            str(evidence.get("description", "")),
        ]).lower()

        for claim in evidence_used:
            claim_words = str(claim).lower().split()[:4]
            if any(w in all_evidence_text for w in claim_words if len(w) > 3):
                verified.append(f"evidence_grounded: {str(claim)[:30]}")
            else:
                unverified.append(f"evidence_unverifiable: {str(claim)[:30]}")

    # ── PII check ─────────────────────────────────────────────
    output_str = str(profile)
    email_found = re.search(r'[\w.+-]+@[\w-]+\.[a-z]{2,}', output_str)
    phone_found = re.search(r'\+?\d[\d\s\-().]{8,}', output_str)

    if email_found:
        unverified.append("pii_email_in_output")
    if phone_found:
        unverified.append("pii_phone_in_output")

    # ── Grounding score ───────────────────────────────────────
    total = len(verified) + len(unverified)
    grounding_score = len(verified) / total if total > 0 else 0.5

    result = {
        "is_grounded": grounding_score >= 0.6,
        "grounding_score": round(grounding_score, 2),
        "verified_claims": verified,
        "unverified_claims": unverified,
        "corrections": corrections,
    }

    if not result["is_grounded"]:
        logger.warning(f"Profile failed grounding: score={grounding_score:.2f}, corrections={len(corrections)}")

    return result


def validate_score_grounded(score: dict, profile: dict) -> dict:
    """Validate scoring output for consistency."""
    issues = []

    total = score.get("total_score", -1)
    if not (0 <= total <= 100):
        issues.append(f"invalid_total_score:{total}")

    tier = score.get("tier")
    if tier not in ("hot", "warm", "nurture", "archive"):
        issues.append(f"invalid_tier:{tier}")

    # Cross-check tier vs score
    expected_tier = (
        "hot" if total >= 85 else
        "warm" if total >= 70 else
        "nurture" if total >= 50 else
        "archive"
    )
    if tier != expected_tier:
        issues.append(f"tier_score_mismatch: score={total} tier={tier} expected={expected_tier}")
        score["tier"] = expected_tier  # auto-correct

    return {
        "is_valid": len(issues) == 0,
        "issues": issues,
    }