Spaces:
Running
Running
| /** | |
| * Grounded Hallucination Detection | |
| * | |
| * Google DeepMind approach: Every LLM claim must be traceable | |
| * to a piece of evidence. Claims without evidence are stripped. | |
| * | |
| * This is NOT "ask LLM for confidence" β that's like asking | |
| * a cheater to grade their own exam. | |
| * | |
| * This IS: cross-reference every output field against source data. | |
| */ | |
| import { logger } from "../utils/logger"; | |
| export interface GroundingResult { | |
| isGrounded: boolean; | |
| groundingScore: number; // 0.0-1.0 | |
| verifiedClaims: string[]; // claims that match evidence | |
| unverifiedClaims: string[]; // claims with no evidence | |
| strippedClaims: string[]; // claims removed from output | |
| corrections: Record<string, { claimed: unknown; actual: unknown }>; | |
| } | |
| export interface EvidenceSet { | |
| // Factual data we collected from providers/scrapers | |
| company_name: string; | |
| domain: string; | |
| employee_count: number | null; | |
| industry: string | null; | |
| tech_stack: string[]; | |
| description: string | null; | |
| website_text: string; | |
| job_postings: string[]; | |
| ai_job_count: number; | |
| linkedin_description: string | null; | |
| country: string | null; | |
| city: string | null; | |
| pain_signals_detected: string[]; | |
| } | |
| /** | |
| * Validates LLM profile output against collected evidence. | |
| * Returns cleaned profile with unverifiable claims stripped. | |
| */ | |
| export function groundProfile( | |
| profile: Record<string, unknown>, | |
| evidence: EvidenceSet | |
| ): { cleaned: Record<string, unknown>; grounding: GroundingResult } { | |
| const verified: string[] = []; | |
| const unverified: string[] = []; | |
| const stripped: string[] = []; | |
| const corrections: Record<string, { claimed: unknown; actual: unknown }> = {}; | |
| const cleaned = { ...profile }; | |
| // ββ Check profile_summary ββββββββββββββββββββββββββββββββββ | |
| const summary = String(profile.profile_summary ?? ""); | |
| // Does summary mention the right company? | |
| if (summary.length > 20 && !containsName(summary, evidence.company_name)) { | |
| stripped.push("summary_wrong_company"); | |
| // Don't strip β just flag. LLM may paraphrase the name. | |
| } | |
| // Does summary claim employee count? | |
| const claimedEmpMatch = summary.match(/(\d[\d,]+)\s*(employees?|people|staff|team)/i); | |
| if (claimedEmpMatch && evidence.employee_count) { | |
| const claimed = parseInt(claimedEmpMatch[1].replace(/,/g, ""), 10); | |
| if (Math.abs(claimed - evidence.employee_count) > evidence.employee_count * 0.3) { | |
| corrections["employee_count"] = { claimed, actual: evidence.employee_count }; | |
| // Fix the claim in the summary | |
| cleaned.profile_summary = summary.replace( | |
| claimedEmpMatch[0], | |
| `${evidence.employee_count} employees` | |
| ); | |
| verified.push("employee_count_corrected"); | |
| } else { | |
| verified.push("employee_count_accurate"); | |
| } | |
| } | |
| // ββ Check industry claim βββββββββββββββββββββββββββββββββββ | |
| const claimedIndustry = summary.toLowerCase(); | |
| if (evidence.industry) { | |
| const industryWords = evidence.industry.toLowerCase().split(/[\s_]+/); | |
| const hasIndustryMention = industryWords.some(w => claimedIndustry.includes(w)); | |
| if (hasIndustryMention) { | |
| verified.push("industry_match"); | |
| } else { | |
| unverified.push("industry_may_differ"); | |
| } | |
| } | |
| // ββ Check tech stack claims βββββββββββββββββββββββββββββββββ | |
| if (Array.isArray(profile.evidence_used)) { | |
| for (const claim of profile.evidence_used as string[]) { | |
| const claimLower = claim.toLowerCase(); | |
| const isSupported = | |
| evidence.tech_stack.some(t => claimLower.includes(t.toLowerCase())) || | |
| evidence.website_text.toLowerCase().includes(claimLower.slice(0, 20)) || | |
| evidence.job_postings.some(j => claimLower.includes(j.toLowerCase().slice(0, 15))) || | |
| evidence.pain_signals_detected.some(p => claimLower.includes(p.toLowerCase().slice(0, 15))); | |
| if (isSupported) { | |
| verified.push(`evidence: ${claim.slice(0, 40)}`); | |
| } else { | |
| unverified.push(`unverifiable: ${claim.slice(0, 40)}`); | |
| } | |
| } | |
| } | |
| // ββ Check ai_readiness βββββββββββββββββββββββββββββββββββββ | |
| const claimedReadiness = String(profile.ai_readiness ?? ""); | |
| if (claimedReadiness === "high" && evidence.ai_job_count === 0 && evidence.tech_stack.length === 0) { | |
| corrections["ai_readiness"] = { claimed: "high", actual: "low" }; | |
| cleaned.ai_readiness = "low"; | |
| verified.push("ai_readiness_corrected"); | |
| } else if (claimedReadiness === "low" && evidence.ai_job_count >= 3) { | |
| corrections["ai_readiness"] = { claimed: "low", actual: "high" }; | |
| cleaned.ai_readiness = "high"; | |
| verified.push("ai_readiness_corrected"); | |
| } else { | |
| verified.push("ai_readiness_plausible"); | |
| } | |
| // ββ Check for PII leakage ββββββββββββββββββββββββββββββββββ | |
| const outputStr = JSON.stringify(cleaned); | |
| const emailPattern = /[\w.+-]+@[\w-]+\.[a-z]{2,}/gi; | |
| const phonePattern = /\+?\d[\d\s\-().]{8,}/g; | |
| if (emailPattern.test(outputStr)) { | |
| stripped.push("pii_email_in_output"); | |
| // Strip emails from all string fields | |
| for (const [key, val] of Object.entries(cleaned)) { | |
| if (typeof val === "string") { | |
| cleaned[key] = val.replace(emailPattern, "[EMAIL_REDACTED]"); | |
| } | |
| } | |
| } | |
| if (phonePattern.test(outputStr)) { | |
| stripped.push("pii_phone_in_output"); | |
| for (const [key, val] of Object.entries(cleaned)) { | |
| if (typeof val === "string") { | |
| cleaned[key] = val.replace(phonePattern, "[PHONE_REDACTED]"); | |
| } | |
| } | |
| } | |
| // ββ Compute grounding score ββββββββββββββββββββββββββββββββ | |
| const totalChecks = verified.length + unverified.length + stripped.length; | |
| const groundingScore = totalChecks === 0 ? 0.5 : verified.length / totalChecks; | |
| const result: GroundingResult = { | |
| isGrounded: groundingScore >= 0.6 && stripped.length === 0, | |
| groundingScore, | |
| verifiedClaims: verified, | |
| unverifiedClaims: unverified, | |
| strippedClaims: stripped, | |
| corrections, | |
| }; | |
| if (!result.isGrounded) { | |
| logger.warn( | |
| { groundingScore: groundingScore.toFixed(2), corrections: Object.keys(corrections).length }, | |
| "Profile failed grounding β corrections applied" | |
| ); | |
| } | |
| return { cleaned, grounding: result }; | |
| } | |
| /** | |
| * Validates scoring signals against evidence. | |
| * Scores are computed DETERMINISTICALLY from signals β | |
| * LLM only extracts signals, code computes score. | |
| */ | |
| export function groundSignals( | |
| signals: Record<string, unknown>, | |
| evidence: EvidenceSet | |
| ): { cleaned: Record<string, unknown>; grounding: GroundingResult } { | |
| const verified: string[] = []; | |
| const unverified: string[] = []; | |
| const corrections: Record<string, { claimed: unknown; actual: unknown }> = {}; | |
| const cleaned = { ...signals }; | |
| // Verify company_fit_signals | |
| const fitSignals = signals.company_fit_signals as Record<string, unknown> | undefined; | |
| if (fitSignals) { | |
| if (fitSignals.size_appropriate === true && evidence.employee_count !== null && evidence.employee_count < 3) { | |
| corrections["size_appropriate"] = { claimed: true, actual: false }; | |
| verified.push("size_corrected"); | |
| } else { | |
| verified.push("size_plausible"); | |
| } | |
| } | |
| // Verify ai_readiness_signals | |
| const aiSignals = signals.ai_readiness_signals as Record<string, unknown> | undefined; | |
| if (aiSignals) { | |
| if (aiSignals.ai_jobs_present === true && evidence.ai_job_count === 0) { | |
| corrections["ai_jobs_present"] = { claimed: true, actual: false }; | |
| verified.push("ai_jobs_corrected"); | |
| } else { | |
| verified.push("ai_jobs_accurate"); | |
| } | |
| if (aiSignals.tech_stack_relevant === true && evidence.tech_stack.length === 0) { | |
| corrections["tech_stack_relevant"] = { claimed: true, actual: false }; | |
| verified.push("tech_stack_corrected"); | |
| } else { | |
| verified.push("tech_stack_accurate"); | |
| } | |
| } | |
| const totalChecks = verified.length + unverified.length; | |
| const groundingScore = totalChecks === 0 ? 0.5 : verified.length / totalChecks; | |
| return { | |
| cleaned, | |
| grounding: { | |
| isGrounded: groundingScore >= 0.6, | |
| groundingScore, | |
| verifiedClaims: verified, | |
| unverifiedClaims: unverified, | |
| strippedClaims: [], | |
| corrections, | |
| }, | |
| }; | |
| } | |
| // βββ Helpers βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| function containsName(text: string, name: string): boolean { | |
| const words = name.toLowerCase().split(/\s+/); | |
| const textLower = text.toLowerCase(); | |
| // At least one significant word from company name should be present | |
| return words.some(w => w.length > 2 && textLower.includes(w)); | |
| } | |