/** * Pain Signal Detector * * Core philosophy: Don't look for AI signals. * Look for INEFFICIENCY signals. * * A phone number on homepage = manual call handling = pain point. * A "Book by Phone" button = no online scheduling = pain point. * No chatbot = manual customer interaction = pain point. * * These are UNIVERSAL signals — every industry has them. * The LLM then maps these signals to our specific services. */ import { callLLM, MODELS } from "../../shared/llm/nvidia-client"; import { SYSTEM_PROMPTS, buildPainDetectionPrompt } from "../../shared/llm/prompts"; import { logger } from "../../shared/utils/logger"; export interface PainSignal { signal: string; evidence: string; severity: "low" | "medium" | "high"; } export interface PainDetectionResult { painSignals: PainSignal[]; serviceMatch: string | null; // matched service from service_profiles matchConfidence: number; reasoning: string; source: "heuristic" | "llm" | "combined"; } // ─── Heuristic detection (instant, free, no LLM) ──────────── const HEURISTIC_RULES: { pattern: RegExp; signal: string; severity: PainSignal["severity"]; }[] = [ // Phone/call signals → AI Receptionist opportunity { pattern: /(?:call us|call now|phone|dial|ring us)/i, signal: "phone_handling_manual", severity: "high" }, { pattern: /\+?\d[\d\s\-().]{8,}/, signal: "phone_number_prominent", severity: "medium" }, { pattern: /(?:book (?:an? )?appointment|schedule (?:a )?visit|make (?:an? )?appointment)/i, signal: "manual_appointment_booking", severity: "high" }, { pattern: /(?:office hours|opening hours|business hours|we're open)/i, signal: "limited_availability_hours", severity: "medium" }, { pattern: /(?:receptionist|front desk|reception)/i, signal: "human_receptionist_mentioned", severity: "high" }, // Support signals → AI Customer Support opportunity { pattern: /(?:contact us|get in touch|reach out|enquire|inquire)/i, signal: "manual_contact_process", severity: "medium" }, { pattern: /(?:submit (?:a )?ticket|raise (?:a )?ticket)/i, signal: "manual_ticket_system", severity: "medium" }, { pattern: /(?:FAQ|frequently asked|common questions)/i, signal: "faq_exists_no_chatbot", severity: "low" }, { pattern: /(?:email us|send us an email|write to us)/i, signal: "email_only_support", severity: "medium" }, // Data/process signals → AI Data Processing opportunity { pattern: /(?:spreadsheet|excel|csv|manual report)/i, signal: "manual_data_processing", severity: "high" }, { pattern: /(?:legacy|outdated|traditional system)/i, signal: "legacy_system_mentioned", severity: "high" }, { pattern: /(?:compliance|regulatory|audit)/i, signal: "compliance_reporting_burden", severity: "medium" }, // Hiring signals → growth/overwork indicator { pattern: /(?:we're hiring|join our team|open positions|careers)/i, signal: "actively_hiring", severity: "low" }, { pattern: /(?:our team|meet the team|staff|employees)/i, signal: "team_page_exists", severity: "low" }, ]; // Elements on page that indicate ABSENCE of automation const ABSENCE_SIGNALS: { check: (html: string) => boolean; signal: string; severity: PainSignal["severity"]; }[] = [ { check: (html) => !/(intercom|drift|crisp|tidio|zendesk|freshchat|livechat|tawk|hubspot.*chat)/i.test(html), signal: "no_chatbot_detected", severity: "medium", }, { check: (html) => !/(calendly|acuity|booksy|mindbody|simplybook|square.*appointment)/i.test(html), signal: "no_online_scheduling_tool", severity: "high", }, { check: (html) => !/(zapier|make\.com|automate|n8n|workato)/i.test(html), signal: "no_automation_tools", severity: "low", }, ]; /** * Detect pain signals from website text and HTML. * * Step 1: Heuristic detection (instant, free) * Step 2: LLM enhancement (DL reasoning — maps signals to services) */ export async function detectPainSignals( companyName: string, industry: string, employeeCount: number | null, websiteText: string, websiteHtml: string, traceId: string ): Promise { // ── Step 1: Heuristic scan ───────────────────────────────── const heuristicSignals = runHeuristicScan(websiteText, websiteHtml); // If we found enough signals, LLM just confirms and maps to service // If few signals, LLM reasons deeper about the industry context const pageElements = heuristicSignals.map(s => s.signal); // ── Step 2: LLM deep reasoning ──────────────────────────── try { const llmResult = await callLLM({ operation: "pain_detect", model: MODELS.FAST, // 8B for speed — pain detection is pattern-based systemPrompt: SYSTEM_PROMPTS.PAIN_DETECTOR, userPrompt: buildPainDetectionPrompt({ company_name: companyName, industry, employee_count: employeeCount, website_text: websiteText.slice(0, 500), page_elements: pageElements, }), temperature: 0.2, maxTokens: 400, jsonMode: true, traceId, }); if (llmResult.parsed) { // Merge heuristic + LLM signals (dedup) const llmSignals = (llmResult.parsed.pain_signals as PainSignal[]) ?? []; const merged = mergeSignals(heuristicSignals, llmSignals); return { painSignals: merged, serviceMatch: String(llmResult.parsed.service_match ?? "NONE"), matchConfidence: Number(llmResult.parsed.match_confidence ?? 0), reasoning: String(llmResult.parsed.reasoning ?? ""), source: "combined", }; } } catch (err) { logger.warn({ companyName, err }, "LLM pain detection failed — using heuristic only"); } // ── Fallback: heuristic-only result ──────────────────────── return { painSignals: heuristicSignals, serviceMatch: inferServiceFromSignals(heuristicSignals, industry), matchConfidence: heuristicSignals.length >= 3 ? 0.7 : 0.4, reasoning: `Heuristic-only: ${heuristicSignals.length} pain signals detected`, source: "heuristic", }; } function runHeuristicScan(text: string, html: string): PainSignal[] { const signals: PainSignal[] = []; const seen = new Set(); // Pattern-based detection for (const rule of HEURISTIC_RULES) { if (rule.pattern.test(text) && !seen.has(rule.signal)) { seen.add(rule.signal); signals.push({ signal: rule.signal, evidence: `Pattern matched in website text`, severity: rule.severity, }); } } // Absence-based detection (what's NOT on the site) for (const check of ABSENCE_SIGNALS) { if (check.check(html) && !seen.has(check.signal)) { seen.add(check.signal); signals.push({ signal: check.signal, evidence: "Not detected in page source", severity: check.severity, }); } } return signals; } function mergeSignals(heuristic: PainSignal[], llm: PainSignal[]): PainSignal[] { const merged = [...heuristic]; const existing = new Set(heuristic.map(s => s.signal)); for (const signal of llm) { if (!existing.has(signal.signal)) { merged.push(signal); } } // Sort by severity: high → medium → low const severityOrder = { high: 0, medium: 1, low: 2 }; return merged.sort((a, b) => severityOrder[a.severity] - severityOrder[b.severity]); } /** * Deterministic service inference from signals (fallback when LLM fails). */ function inferServiceFromSignals(signals: PainSignal[], industry: string): string | null { const signalNames = signals.map(s => s.signal); // Receptionist signals const receptionistSignals = ["phone_handling_manual", "phone_number_prominent", "manual_appointment_booking", "human_receptionist_mentioned", "limited_availability_hours", "no_online_scheduling_tool"]; const receptionistCount = signalNames.filter(s => receptionistSignals.includes(s)).length; // Support signals const supportSignals = ["manual_contact_process", "manual_ticket_system", "faq_exists_no_chatbot", "email_only_support", "no_chatbot_detected"]; const supportCount = signalNames.filter(s => supportSignals.includes(s)).length; // Data signals const dataSignals = ["manual_data_processing", "legacy_system_mentioned", "compliance_reporting_burden"]; const dataCount = signalNames.filter(s => dataSignals.includes(s)).length; const max = Math.max(receptionistCount, supportCount, dataCount); if (max < 2) return null; if (receptionistCount === max) return "AI Receptionist"; if (supportCount === max) return "AI Customer Support"; if (dataCount === max) return "AI Data Processing"; return null; }