clienttarget-python / src /shared /llm /grounding.ts
iDevBuddy
feat: Phase 1 β€” AI Client Acquisition System
bd28470
raw
history blame
9.05 kB
/**
* Grounded Hallucination Detection
*
* Google DeepMind approach: Every LLM claim must be traceable
* to a piece of evidence. Claims without evidence are stripped.
*
* This is NOT "ask LLM for confidence" β€” that's like asking
* a cheater to grade their own exam.
*
* This IS: cross-reference every output field against source data.
*/
import { logger } from "../utils/logger";
export interface GroundingResult {
isGrounded: boolean;
groundingScore: number; // 0.0-1.0
verifiedClaims: string[]; // claims that match evidence
unverifiedClaims: string[]; // claims with no evidence
strippedClaims: string[]; // claims removed from output
corrections: Record<string, { claimed: unknown; actual: unknown }>;
}
export interface EvidenceSet {
// Factual data we collected from providers/scrapers
company_name: string;
domain: string;
employee_count: number | null;
industry: string | null;
tech_stack: string[];
description: string | null;
website_text: string;
job_postings: string[];
ai_job_count: number;
linkedin_description: string | null;
country: string | null;
city: string | null;
pain_signals_detected: string[];
}
/**
* Validates LLM profile output against collected evidence.
* Returns cleaned profile with unverifiable claims stripped.
*/
export function groundProfile(
profile: Record<string, unknown>,
evidence: EvidenceSet
): { cleaned: Record<string, unknown>; grounding: GroundingResult } {
const verified: string[] = [];
const unverified: string[] = [];
const stripped: string[] = [];
const corrections: Record<string, { claimed: unknown; actual: unknown }> = {};
const cleaned = { ...profile };
// ── Check profile_summary ──────────────────────────────────
const summary = String(profile.profile_summary ?? "");
// Does summary mention the right company?
if (summary.length > 20 && !containsName(summary, evidence.company_name)) {
stripped.push("summary_wrong_company");
// Don't strip β€” just flag. LLM may paraphrase the name.
}
// Does summary claim employee count?
const claimedEmpMatch = summary.match(/(\d[\d,]+)\s*(employees?|people|staff|team)/i);
if (claimedEmpMatch && evidence.employee_count) {
const claimed = parseInt(claimedEmpMatch[1].replace(/,/g, ""), 10);
if (Math.abs(claimed - evidence.employee_count) > evidence.employee_count * 0.3) {
corrections["employee_count"] = { claimed, actual: evidence.employee_count };
// Fix the claim in the summary
cleaned.profile_summary = summary.replace(
claimedEmpMatch[0],
`${evidence.employee_count} employees`
);
verified.push("employee_count_corrected");
} else {
verified.push("employee_count_accurate");
}
}
// ── Check industry claim ───────────────────────────────────
const claimedIndustry = summary.toLowerCase();
if (evidence.industry) {
const industryWords = evidence.industry.toLowerCase().split(/[\s_]+/);
const hasIndustryMention = industryWords.some(w => claimedIndustry.includes(w));
if (hasIndustryMention) {
verified.push("industry_match");
} else {
unverified.push("industry_may_differ");
}
}
// ── Check tech stack claims ─────────────────────────────────
if (Array.isArray(profile.evidence_used)) {
for (const claim of profile.evidence_used as string[]) {
const claimLower = claim.toLowerCase();
const isSupported =
evidence.tech_stack.some(t => claimLower.includes(t.toLowerCase())) ||
evidence.website_text.toLowerCase().includes(claimLower.slice(0, 20)) ||
evidence.job_postings.some(j => claimLower.includes(j.toLowerCase().slice(0, 15))) ||
evidence.pain_signals_detected.some(p => claimLower.includes(p.toLowerCase().slice(0, 15)));
if (isSupported) {
verified.push(`evidence: ${claim.slice(0, 40)}`);
} else {
unverified.push(`unverifiable: ${claim.slice(0, 40)}`);
}
}
}
// ── Check ai_readiness ─────────────────────────────────────
const claimedReadiness = String(profile.ai_readiness ?? "");
if (claimedReadiness === "high" && evidence.ai_job_count === 0 && evidence.tech_stack.length === 0) {
corrections["ai_readiness"] = { claimed: "high", actual: "low" };
cleaned.ai_readiness = "low";
verified.push("ai_readiness_corrected");
} else if (claimedReadiness === "low" && evidence.ai_job_count >= 3) {
corrections["ai_readiness"] = { claimed: "low", actual: "high" };
cleaned.ai_readiness = "high";
verified.push("ai_readiness_corrected");
} else {
verified.push("ai_readiness_plausible");
}
// ── Check for PII leakage ──────────────────────────────────
const outputStr = JSON.stringify(cleaned);
const emailPattern = /[\w.+-]+@[\w-]+\.[a-z]{2,}/gi;
const phonePattern = /\+?\d[\d\s\-().]{8,}/g;
if (emailPattern.test(outputStr)) {
stripped.push("pii_email_in_output");
// Strip emails from all string fields
for (const [key, val] of Object.entries(cleaned)) {
if (typeof val === "string") {
cleaned[key] = val.replace(emailPattern, "[EMAIL_REDACTED]");
}
}
}
if (phonePattern.test(outputStr)) {
stripped.push("pii_phone_in_output");
for (const [key, val] of Object.entries(cleaned)) {
if (typeof val === "string") {
cleaned[key] = val.replace(phonePattern, "[PHONE_REDACTED]");
}
}
}
// ── Compute grounding score ────────────────────────────────
const totalChecks = verified.length + unverified.length + stripped.length;
const groundingScore = totalChecks === 0 ? 0.5 : verified.length / totalChecks;
const result: GroundingResult = {
isGrounded: groundingScore >= 0.6 && stripped.length === 0,
groundingScore,
verifiedClaims: verified,
unverifiedClaims: unverified,
strippedClaims: stripped,
corrections,
};
if (!result.isGrounded) {
logger.warn(
{ groundingScore: groundingScore.toFixed(2), corrections: Object.keys(corrections).length },
"Profile failed grounding β€” corrections applied"
);
}
return { cleaned, grounding: result };
}
/**
* Validates scoring signals against evidence.
* Scores are computed DETERMINISTICALLY from signals β€”
* LLM only extracts signals, code computes score.
*/
export function groundSignals(
signals: Record<string, unknown>,
evidence: EvidenceSet
): { cleaned: Record<string, unknown>; grounding: GroundingResult } {
const verified: string[] = [];
const unverified: string[] = [];
const corrections: Record<string, { claimed: unknown; actual: unknown }> = {};
const cleaned = { ...signals };
// Verify company_fit_signals
const fitSignals = signals.company_fit_signals as Record<string, unknown> | undefined;
if (fitSignals) {
if (fitSignals.size_appropriate === true && evidence.employee_count !== null && evidence.employee_count < 3) {
corrections["size_appropriate"] = { claimed: true, actual: false };
verified.push("size_corrected");
} else {
verified.push("size_plausible");
}
}
// Verify ai_readiness_signals
const aiSignals = signals.ai_readiness_signals as Record<string, unknown> | undefined;
if (aiSignals) {
if (aiSignals.ai_jobs_present === true && evidence.ai_job_count === 0) {
corrections["ai_jobs_present"] = { claimed: true, actual: false };
verified.push("ai_jobs_corrected");
} else {
verified.push("ai_jobs_accurate");
}
if (aiSignals.tech_stack_relevant === true && evidence.tech_stack.length === 0) {
corrections["tech_stack_relevant"] = { claimed: true, actual: false };
verified.push("tech_stack_corrected");
} else {
verified.push("tech_stack_accurate");
}
}
const totalChecks = verified.length + unverified.length;
const groundingScore = totalChecks === 0 ? 0.5 : verified.length / totalChecks;
return {
cleaned,
grounding: {
isGrounded: groundingScore >= 0.6,
groundingScore,
verifiedClaims: verified,
unverifiedClaims: unverified,
strippedClaims: [],
corrections,
},
};
}
// ─── Helpers ─────────────────────────────────────────────────
function containsName(text: string, name: string): boolean {
const words = name.toLowerCase().split(/\s+/);
const textLower = text.toLowerCase();
// At least one significant word from company name should be present
return words.some(w => w.length > 2 && textLower.includes(w));
}