/** * Grounded Hallucination Detection * * Google DeepMind approach: Every LLM claim must be traceable * to a piece of evidence. Claims without evidence are stripped. * * This is NOT "ask LLM for confidence" — that's like asking * a cheater to grade their own exam. * * This IS: cross-reference every output field against source data. */ import { logger } from "../utils/logger"; export interface GroundingResult { isGrounded: boolean; groundingScore: number; // 0.0-1.0 verifiedClaims: string[]; // claims that match evidence unverifiedClaims: string[]; // claims with no evidence strippedClaims: string[]; // claims removed from output corrections: Record; } export interface EvidenceSet { // Factual data we collected from providers/scrapers company_name: string; domain: string; employee_count: number | null; industry: string | null; tech_stack: string[]; description: string | null; website_text: string; job_postings: string[]; ai_job_count: number; linkedin_description: string | null; country: string | null; city: string | null; pain_signals_detected: string[]; } /** * Validates LLM profile output against collected evidence. * Returns cleaned profile with unverifiable claims stripped. */ export function groundProfile( profile: Record, evidence: EvidenceSet ): { cleaned: Record; grounding: GroundingResult } { const verified: string[] = []; const unverified: string[] = []; const stripped: string[] = []; const corrections: Record = {}; const cleaned = { ...profile }; // ── Check profile_summary ────────────────────────────────── const summary = String(profile.profile_summary ?? ""); // Does summary mention the right company? if (summary.length > 20 && !containsName(summary, evidence.company_name)) { stripped.push("summary_wrong_company"); // Don't strip — just flag. LLM may paraphrase the name. } // Does summary claim employee count? const claimedEmpMatch = summary.match(/(\d[\d,]+)\s*(employees?|people|staff|team)/i); if (claimedEmpMatch && evidence.employee_count) { const claimed = parseInt(claimedEmpMatch[1].replace(/,/g, ""), 10); if (Math.abs(claimed - evidence.employee_count) > evidence.employee_count * 0.3) { corrections["employee_count"] = { claimed, actual: evidence.employee_count }; // Fix the claim in the summary cleaned.profile_summary = summary.replace( claimedEmpMatch[0], `${evidence.employee_count} employees` ); verified.push("employee_count_corrected"); } else { verified.push("employee_count_accurate"); } } // ── Check industry claim ─────────────────────────────────── const claimedIndustry = summary.toLowerCase(); if (evidence.industry) { const industryWords = evidence.industry.toLowerCase().split(/[\s_]+/); const hasIndustryMention = industryWords.some(w => claimedIndustry.includes(w)); if (hasIndustryMention) { verified.push("industry_match"); } else { unverified.push("industry_may_differ"); } } // ── Check tech stack claims ───────────────────────────────── if (Array.isArray(profile.evidence_used)) { for (const claim of profile.evidence_used as string[]) { const claimLower = claim.toLowerCase(); const isSupported = evidence.tech_stack.some(t => claimLower.includes(t.toLowerCase())) || evidence.website_text.toLowerCase().includes(claimLower.slice(0, 20)) || evidence.job_postings.some(j => claimLower.includes(j.toLowerCase().slice(0, 15))) || evidence.pain_signals_detected.some(p => claimLower.includes(p.toLowerCase().slice(0, 15))); if (isSupported) { verified.push(`evidence: ${claim.slice(0, 40)}`); } else { unverified.push(`unverifiable: ${claim.slice(0, 40)}`); } } } // ── Check ai_readiness ───────────────────────────────────── const claimedReadiness = String(profile.ai_readiness ?? ""); if (claimedReadiness === "high" && evidence.ai_job_count === 0 && evidence.tech_stack.length === 0) { corrections["ai_readiness"] = { claimed: "high", actual: "low" }; cleaned.ai_readiness = "low"; verified.push("ai_readiness_corrected"); } else if (claimedReadiness === "low" && evidence.ai_job_count >= 3) { corrections["ai_readiness"] = { claimed: "low", actual: "high" }; cleaned.ai_readiness = "high"; verified.push("ai_readiness_corrected"); } else { verified.push("ai_readiness_plausible"); } // ── Check for PII leakage ────────────────────────────────── const outputStr = JSON.stringify(cleaned); const emailPattern = /[\w.+-]+@[\w-]+\.[a-z]{2,}/gi; const phonePattern = /\+?\d[\d\s\-().]{8,}/g; if (emailPattern.test(outputStr)) { stripped.push("pii_email_in_output"); // Strip emails from all string fields for (const [key, val] of Object.entries(cleaned)) { if (typeof val === "string") { cleaned[key] = val.replace(emailPattern, "[EMAIL_REDACTED]"); } } } if (phonePattern.test(outputStr)) { stripped.push("pii_phone_in_output"); for (const [key, val] of Object.entries(cleaned)) { if (typeof val === "string") { cleaned[key] = val.replace(phonePattern, "[PHONE_REDACTED]"); } } } // ── Compute grounding score ──────────────────────────────── const totalChecks = verified.length + unverified.length + stripped.length; const groundingScore = totalChecks === 0 ? 0.5 : verified.length / totalChecks; const result: GroundingResult = { isGrounded: groundingScore >= 0.6 && stripped.length === 0, groundingScore, verifiedClaims: verified, unverifiedClaims: unverified, strippedClaims: stripped, corrections, }; if (!result.isGrounded) { logger.warn( { groundingScore: groundingScore.toFixed(2), corrections: Object.keys(corrections).length }, "Profile failed grounding — corrections applied" ); } return { cleaned, grounding: result }; } /** * Validates scoring signals against evidence. * Scores are computed DETERMINISTICALLY from signals — * LLM only extracts signals, code computes score. */ export function groundSignals( signals: Record, evidence: EvidenceSet ): { cleaned: Record; grounding: GroundingResult } { const verified: string[] = []; const unverified: string[] = []; const corrections: Record = {}; const cleaned = { ...signals }; // Verify company_fit_signals const fitSignals = signals.company_fit_signals as Record | undefined; if (fitSignals) { if (fitSignals.size_appropriate === true && evidence.employee_count !== null && evidence.employee_count < 3) { corrections["size_appropriate"] = { claimed: true, actual: false }; verified.push("size_corrected"); } else { verified.push("size_plausible"); } } // Verify ai_readiness_signals const aiSignals = signals.ai_readiness_signals as Record | undefined; if (aiSignals) { if (aiSignals.ai_jobs_present === true && evidence.ai_job_count === 0) { corrections["ai_jobs_present"] = { claimed: true, actual: false }; verified.push("ai_jobs_corrected"); } else { verified.push("ai_jobs_accurate"); } if (aiSignals.tech_stack_relevant === true && evidence.tech_stack.length === 0) { corrections["tech_stack_relevant"] = { claimed: true, actual: false }; verified.push("tech_stack_corrected"); } else { verified.push("tech_stack_accurate"); } } const totalChecks = verified.length + unverified.length; const groundingScore = totalChecks === 0 ? 0.5 : verified.length / totalChecks; return { cleaned, grounding: { isGrounded: groundingScore >= 0.6, groundingScore, verifiedClaims: verified, unverifiedClaims: unverified, strippedClaims: [], corrections, }, }; } // ─── Helpers ───────────────────────────────────────────────── function containsName(text: string, name: string): boolean { const words = name.toLowerCase().split(/\s+/); const textLower = text.toLowerCase(); // At least one significant word from company name should be present return words.some(w => w.length > 2 && textLower.includes(w)); }