Spaces:
Sleeping
Sleeping
File size: 8,837 Bytes
bd28470 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 | /**
* Pain Signal Detector
*
* Core philosophy: Don't look for AI signals.
* Look for INEFFICIENCY signals.
*
* A phone number on homepage = manual call handling = pain point.
* A "Book by Phone" button = no online scheduling = pain point.
* No chatbot = manual customer interaction = pain point.
*
* These are UNIVERSAL signals β every industry has them.
* The LLM then maps these signals to our specific services.
*/
import { callLLM, MODELS } from "../../shared/llm/nvidia-client";
import { SYSTEM_PROMPTS, buildPainDetectionPrompt } from "../../shared/llm/prompts";
import { logger } from "../../shared/utils/logger";
export interface PainSignal {
signal: string;
evidence: string;
severity: "low" | "medium" | "high";
}
export interface PainDetectionResult {
painSignals: PainSignal[];
serviceMatch: string | null; // matched service from service_profiles
matchConfidence: number;
reasoning: string;
source: "heuristic" | "llm" | "combined";
}
// βββ Heuristic detection (instant, free, no LLM) ββββββββββββ
const HEURISTIC_RULES: {
pattern: RegExp;
signal: string;
severity: PainSignal["severity"];
}[] = [
// Phone/call signals β AI Receptionist opportunity
{ pattern: /(?:call us|call now|phone|dial|ring us)/i, signal: "phone_handling_manual", severity: "high" },
{ pattern: /\+?\d[\d\s\-().]{8,}/, signal: "phone_number_prominent", severity: "medium" },
{ pattern: /(?:book (?:an? )?appointment|schedule (?:a )?visit|make (?:an? )?appointment)/i, signal: "manual_appointment_booking", severity: "high" },
{ pattern: /(?:office hours|opening hours|business hours|we're open)/i, signal: "limited_availability_hours", severity: "medium" },
{ pattern: /(?:receptionist|front desk|reception)/i, signal: "human_receptionist_mentioned", severity: "high" },
// Support signals β AI Customer Support opportunity
{ pattern: /(?:contact us|get in touch|reach out|enquire|inquire)/i, signal: "manual_contact_process", severity: "medium" },
{ pattern: /(?:submit (?:a )?ticket|raise (?:a )?ticket)/i, signal: "manual_ticket_system", severity: "medium" },
{ pattern: /(?:FAQ|frequently asked|common questions)/i, signal: "faq_exists_no_chatbot", severity: "low" },
{ pattern: /(?:email us|send us an email|write to us)/i, signal: "email_only_support", severity: "medium" },
// Data/process signals β AI Data Processing opportunity
{ pattern: /(?:spreadsheet|excel|csv|manual report)/i, signal: "manual_data_processing", severity: "high" },
{ pattern: /(?:legacy|outdated|traditional system)/i, signal: "legacy_system_mentioned", severity: "high" },
{ pattern: /(?:compliance|regulatory|audit)/i, signal: "compliance_reporting_burden", severity: "medium" },
// Hiring signals β growth/overwork indicator
{ pattern: /(?:we're hiring|join our team|open positions|careers)/i, signal: "actively_hiring", severity: "low" },
{ pattern: /(?:our team|meet the team|staff|employees)/i, signal: "team_page_exists", severity: "low" },
];
// Elements on page that indicate ABSENCE of automation
const ABSENCE_SIGNALS: {
check: (html: string) => boolean;
signal: string;
severity: PainSignal["severity"];
}[] = [
{
check: (html) => !/(intercom|drift|crisp|tidio|zendesk|freshchat|livechat|tawk|hubspot.*chat)/i.test(html),
signal: "no_chatbot_detected",
severity: "medium",
},
{
check: (html) => !/(calendly|acuity|booksy|mindbody|simplybook|square.*appointment)/i.test(html),
signal: "no_online_scheduling_tool",
severity: "high",
},
{
check: (html) => !/(zapier|make\.com|automate|n8n|workato)/i.test(html),
signal: "no_automation_tools",
severity: "low",
},
];
/**
* Detect pain signals from website text and HTML.
*
* Step 1: Heuristic detection (instant, free)
* Step 2: LLM enhancement (DL reasoning β maps signals to services)
*/
export async function detectPainSignals(
companyName: string,
industry: string,
employeeCount: number | null,
websiteText: string,
websiteHtml: string,
traceId: string
): Promise<PainDetectionResult> {
// ββ Step 1: Heuristic scan βββββββββββββββββββββββββββββββββ
const heuristicSignals = runHeuristicScan(websiteText, websiteHtml);
// If we found enough signals, LLM just confirms and maps to service
// If few signals, LLM reasons deeper about the industry context
const pageElements = heuristicSignals.map(s => s.signal);
// ββ Step 2: LLM deep reasoning ββββββββββββββββββββββββββββ
try {
const llmResult = await callLLM({
operation: "pain_detect",
model: MODELS.FAST, // 8B for speed β pain detection is pattern-based
systemPrompt: SYSTEM_PROMPTS.PAIN_DETECTOR,
userPrompt: buildPainDetectionPrompt({
company_name: companyName,
industry,
employee_count: employeeCount,
website_text: websiteText.slice(0, 500),
page_elements: pageElements,
}),
temperature: 0.2,
maxTokens: 400,
jsonMode: true,
traceId,
});
if (llmResult.parsed) {
// Merge heuristic + LLM signals (dedup)
const llmSignals = (llmResult.parsed.pain_signals as PainSignal[]) ?? [];
const merged = mergeSignals(heuristicSignals, llmSignals);
return {
painSignals: merged,
serviceMatch: String(llmResult.parsed.service_match ?? "NONE"),
matchConfidence: Number(llmResult.parsed.match_confidence ?? 0),
reasoning: String(llmResult.parsed.reasoning ?? ""),
source: "combined",
};
}
} catch (err) {
logger.warn({ companyName, err }, "LLM pain detection failed β using heuristic only");
}
// ββ Fallback: heuristic-only result ββββββββββββββββββββββββ
return {
painSignals: heuristicSignals,
serviceMatch: inferServiceFromSignals(heuristicSignals, industry),
matchConfidence: heuristicSignals.length >= 3 ? 0.7 : 0.4,
reasoning: `Heuristic-only: ${heuristicSignals.length} pain signals detected`,
source: "heuristic",
};
}
function runHeuristicScan(text: string, html: string): PainSignal[] {
const signals: PainSignal[] = [];
const seen = new Set<string>();
// Pattern-based detection
for (const rule of HEURISTIC_RULES) {
if (rule.pattern.test(text) && !seen.has(rule.signal)) {
seen.add(rule.signal);
signals.push({
signal: rule.signal,
evidence: `Pattern matched in website text`,
severity: rule.severity,
});
}
}
// Absence-based detection (what's NOT on the site)
for (const check of ABSENCE_SIGNALS) {
if (check.check(html) && !seen.has(check.signal)) {
seen.add(check.signal);
signals.push({
signal: check.signal,
evidence: "Not detected in page source",
severity: check.severity,
});
}
}
return signals;
}
function mergeSignals(heuristic: PainSignal[], llm: PainSignal[]): PainSignal[] {
const merged = [...heuristic];
const existing = new Set(heuristic.map(s => s.signal));
for (const signal of llm) {
if (!existing.has(signal.signal)) {
merged.push(signal);
}
}
// Sort by severity: high β medium β low
const severityOrder = { high: 0, medium: 1, low: 2 };
return merged.sort((a, b) => severityOrder[a.severity] - severityOrder[b.severity]);
}
/**
* Deterministic service inference from signals (fallback when LLM fails).
*/
function inferServiceFromSignals(signals: PainSignal[], industry: string): string | null {
const signalNames = signals.map(s => s.signal);
// Receptionist signals
const receptionistSignals = ["phone_handling_manual", "phone_number_prominent",
"manual_appointment_booking", "human_receptionist_mentioned", "limited_availability_hours",
"no_online_scheduling_tool"];
const receptionistCount = signalNames.filter(s => receptionistSignals.includes(s)).length;
// Support signals
const supportSignals = ["manual_contact_process", "manual_ticket_system",
"faq_exists_no_chatbot", "email_only_support", "no_chatbot_detected"];
const supportCount = signalNames.filter(s => supportSignals.includes(s)).length;
// Data signals
const dataSignals = ["manual_data_processing", "legacy_system_mentioned",
"compliance_reporting_burden"];
const dataCount = signalNames.filter(s => dataSignals.includes(s)).length;
const max = Math.max(receptionistCount, supportCount, dataCount);
if (max < 2) return null;
if (receptionistCount === max) return "AI Receptionist";
if (supportCount === max) return "AI Customer Support";
if (dataCount === max) return "AI Data Processing";
return null;
}
|