clienttarget-python / src /discovery /lib /pain-signal-detector.ts
iDevBuddy
feat: Phase 1 β€” AI Client Acquisition System
bd28470
/**
* Pain Signal Detector
*
* Core philosophy: Don't look for AI signals.
* Look for INEFFICIENCY signals.
*
* A phone number on homepage = manual call handling = pain point.
* A "Book by Phone" button = no online scheduling = pain point.
* No chatbot = manual customer interaction = pain point.
*
* These are UNIVERSAL signals β€” every industry has them.
* The LLM then maps these signals to our specific services.
*/
import { callLLM, MODELS } from "../../shared/llm/nvidia-client";
import { SYSTEM_PROMPTS, buildPainDetectionPrompt } from "../../shared/llm/prompts";
import { logger } from "../../shared/utils/logger";
export interface PainSignal {
signal: string;
evidence: string;
severity: "low" | "medium" | "high";
}
export interface PainDetectionResult {
painSignals: PainSignal[];
serviceMatch: string | null; // matched service from service_profiles
matchConfidence: number;
reasoning: string;
source: "heuristic" | "llm" | "combined";
}
// ─── Heuristic detection (instant, free, no LLM) ────────────
const HEURISTIC_RULES: {
pattern: RegExp;
signal: string;
severity: PainSignal["severity"];
}[] = [
// Phone/call signals β†’ AI Receptionist opportunity
{ pattern: /(?:call us|call now|phone|dial|ring us)/i, signal: "phone_handling_manual", severity: "high" },
{ pattern: /\+?\d[\d\s\-().]{8,}/, signal: "phone_number_prominent", severity: "medium" },
{ pattern: /(?:book (?:an? )?appointment|schedule (?:a )?visit|make (?:an? )?appointment)/i, signal: "manual_appointment_booking", severity: "high" },
{ pattern: /(?:office hours|opening hours|business hours|we're open)/i, signal: "limited_availability_hours", severity: "medium" },
{ pattern: /(?:receptionist|front desk|reception)/i, signal: "human_receptionist_mentioned", severity: "high" },
// Support signals β†’ AI Customer Support opportunity
{ pattern: /(?:contact us|get in touch|reach out|enquire|inquire)/i, signal: "manual_contact_process", severity: "medium" },
{ pattern: /(?:submit (?:a )?ticket|raise (?:a )?ticket)/i, signal: "manual_ticket_system", severity: "medium" },
{ pattern: /(?:FAQ|frequently asked|common questions)/i, signal: "faq_exists_no_chatbot", severity: "low" },
{ pattern: /(?:email us|send us an email|write to us)/i, signal: "email_only_support", severity: "medium" },
// Data/process signals β†’ AI Data Processing opportunity
{ pattern: /(?:spreadsheet|excel|csv|manual report)/i, signal: "manual_data_processing", severity: "high" },
{ pattern: /(?:legacy|outdated|traditional system)/i, signal: "legacy_system_mentioned", severity: "high" },
{ pattern: /(?:compliance|regulatory|audit)/i, signal: "compliance_reporting_burden", severity: "medium" },
// Hiring signals β†’ growth/overwork indicator
{ pattern: /(?:we're hiring|join our team|open positions|careers)/i, signal: "actively_hiring", severity: "low" },
{ pattern: /(?:our team|meet the team|staff|employees)/i, signal: "team_page_exists", severity: "low" },
];
// Elements on page that indicate ABSENCE of automation
const ABSENCE_SIGNALS: {
check: (html: string) => boolean;
signal: string;
severity: PainSignal["severity"];
}[] = [
{
check: (html) => !/(intercom|drift|crisp|tidio|zendesk|freshchat|livechat|tawk|hubspot.*chat)/i.test(html),
signal: "no_chatbot_detected",
severity: "medium",
},
{
check: (html) => !/(calendly|acuity|booksy|mindbody|simplybook|square.*appointment)/i.test(html),
signal: "no_online_scheduling_tool",
severity: "high",
},
{
check: (html) => !/(zapier|make\.com|automate|n8n|workato)/i.test(html),
signal: "no_automation_tools",
severity: "low",
},
];
/**
* Detect pain signals from website text and HTML.
*
* Step 1: Heuristic detection (instant, free)
* Step 2: LLM enhancement (DL reasoning β€” maps signals to services)
*/
export async function detectPainSignals(
companyName: string,
industry: string,
employeeCount: number | null,
websiteText: string,
websiteHtml: string,
traceId: string
): Promise<PainDetectionResult> {
// ── Step 1: Heuristic scan ─────────────────────────────────
const heuristicSignals = runHeuristicScan(websiteText, websiteHtml);
// If we found enough signals, LLM just confirms and maps to service
// If few signals, LLM reasons deeper about the industry context
const pageElements = heuristicSignals.map(s => s.signal);
// ── Step 2: LLM deep reasoning ────────────────────────────
try {
const llmResult = await callLLM({
operation: "pain_detect",
model: MODELS.FAST, // 8B for speed β€” pain detection is pattern-based
systemPrompt: SYSTEM_PROMPTS.PAIN_DETECTOR,
userPrompt: buildPainDetectionPrompt({
company_name: companyName,
industry,
employee_count: employeeCount,
website_text: websiteText.slice(0, 500),
page_elements: pageElements,
}),
temperature: 0.2,
maxTokens: 400,
jsonMode: true,
traceId,
});
if (llmResult.parsed) {
// Merge heuristic + LLM signals (dedup)
const llmSignals = (llmResult.parsed.pain_signals as PainSignal[]) ?? [];
const merged = mergeSignals(heuristicSignals, llmSignals);
return {
painSignals: merged,
serviceMatch: String(llmResult.parsed.service_match ?? "NONE"),
matchConfidence: Number(llmResult.parsed.match_confidence ?? 0),
reasoning: String(llmResult.parsed.reasoning ?? ""),
source: "combined",
};
}
} catch (err) {
logger.warn({ companyName, err }, "LLM pain detection failed β€” using heuristic only");
}
// ── Fallback: heuristic-only result ────────────────────────
return {
painSignals: heuristicSignals,
serviceMatch: inferServiceFromSignals(heuristicSignals, industry),
matchConfidence: heuristicSignals.length >= 3 ? 0.7 : 0.4,
reasoning: `Heuristic-only: ${heuristicSignals.length} pain signals detected`,
source: "heuristic",
};
}
function runHeuristicScan(text: string, html: string): PainSignal[] {
const signals: PainSignal[] = [];
const seen = new Set<string>();
// Pattern-based detection
for (const rule of HEURISTIC_RULES) {
if (rule.pattern.test(text) && !seen.has(rule.signal)) {
seen.add(rule.signal);
signals.push({
signal: rule.signal,
evidence: `Pattern matched in website text`,
severity: rule.severity,
});
}
}
// Absence-based detection (what's NOT on the site)
for (const check of ABSENCE_SIGNALS) {
if (check.check(html) && !seen.has(check.signal)) {
seen.add(check.signal);
signals.push({
signal: check.signal,
evidence: "Not detected in page source",
severity: check.severity,
});
}
}
return signals;
}
function mergeSignals(heuristic: PainSignal[], llm: PainSignal[]): PainSignal[] {
const merged = [...heuristic];
const existing = new Set(heuristic.map(s => s.signal));
for (const signal of llm) {
if (!existing.has(signal.signal)) {
merged.push(signal);
}
}
// Sort by severity: high β†’ medium β†’ low
const severityOrder = { high: 0, medium: 1, low: 2 };
return merged.sort((a, b) => severityOrder[a.severity] - severityOrder[b.severity]);
}
/**
* Deterministic service inference from signals (fallback when LLM fails).
*/
function inferServiceFromSignals(signals: PainSignal[], industry: string): string | null {
const signalNames = signals.map(s => s.signal);
// Receptionist signals
const receptionistSignals = ["phone_handling_manual", "phone_number_prominent",
"manual_appointment_booking", "human_receptionist_mentioned", "limited_availability_hours",
"no_online_scheduling_tool"];
const receptionistCount = signalNames.filter(s => receptionistSignals.includes(s)).length;
// Support signals
const supportSignals = ["manual_contact_process", "manual_ticket_system",
"faq_exists_no_chatbot", "email_only_support", "no_chatbot_detected"];
const supportCount = signalNames.filter(s => supportSignals.includes(s)).length;
// Data signals
const dataSignals = ["manual_data_processing", "legacy_system_mentioned",
"compliance_reporting_burden"];
const dataCount = signalNames.filter(s => dataSignals.includes(s)).length;
const max = Math.max(receptionistCount, supportCount, dataCount);
if (max < 2) return null;
if (receptionistCount === max) return "AI Receptionist";
if (supportCount === max) return "AI Customer Support";
if (dataCount === max) return "AI Data Processing";
return null;
}