Spaces:
Running
Running
| /** | |
| * Pain Signal Detector | |
| * | |
| * Core philosophy: Don't look for AI signals. | |
| * Look for INEFFICIENCY signals. | |
| * | |
| * A phone number on homepage = manual call handling = pain point. | |
| * A "Book by Phone" button = no online scheduling = pain point. | |
| * No chatbot = manual customer interaction = pain point. | |
| * | |
| * These are UNIVERSAL signals β every industry has them. | |
| * The LLM then maps these signals to our specific services. | |
| */ | |
| import { callLLM, MODELS } from "../../shared/llm/nvidia-client"; | |
| import { SYSTEM_PROMPTS, buildPainDetectionPrompt } from "../../shared/llm/prompts"; | |
| import { logger } from "../../shared/utils/logger"; | |
| export interface PainSignal { | |
| signal: string; | |
| evidence: string; | |
| severity: "low" | "medium" | "high"; | |
| } | |
| export interface PainDetectionResult { | |
| painSignals: PainSignal[]; | |
| serviceMatch: string | null; // matched service from service_profiles | |
| matchConfidence: number; | |
| reasoning: string; | |
| source: "heuristic" | "llm" | "combined"; | |
| } | |
| // βββ Heuristic detection (instant, free, no LLM) ββββββββββββ | |
| const HEURISTIC_RULES: { | |
| pattern: RegExp; | |
| signal: string; | |
| severity: PainSignal["severity"]; | |
| }[] = [ | |
| // Phone/call signals β AI Receptionist opportunity | |
| { pattern: /(?:call us|call now|phone|dial|ring us)/i, signal: "phone_handling_manual", severity: "high" }, | |
| { pattern: /\+?\d[\d\s\-().]{8,}/, signal: "phone_number_prominent", severity: "medium" }, | |
| { pattern: /(?:book (?:an? )?appointment|schedule (?:a )?visit|make (?:an? )?appointment)/i, signal: "manual_appointment_booking", severity: "high" }, | |
| { pattern: /(?:office hours|opening hours|business hours|we're open)/i, signal: "limited_availability_hours", severity: "medium" }, | |
| { pattern: /(?:receptionist|front desk|reception)/i, signal: "human_receptionist_mentioned", severity: "high" }, | |
| // Support signals β AI Customer Support opportunity | |
| { pattern: /(?:contact us|get in touch|reach out|enquire|inquire)/i, signal: "manual_contact_process", severity: "medium" }, | |
| { pattern: /(?:submit (?:a )?ticket|raise (?:a )?ticket)/i, signal: "manual_ticket_system", severity: "medium" }, | |
| { pattern: /(?:FAQ|frequently asked|common questions)/i, signal: "faq_exists_no_chatbot", severity: "low" }, | |
| { pattern: /(?:email us|send us an email|write to us)/i, signal: "email_only_support", severity: "medium" }, | |
| // Data/process signals β AI Data Processing opportunity | |
| { pattern: /(?:spreadsheet|excel|csv|manual report)/i, signal: "manual_data_processing", severity: "high" }, | |
| { pattern: /(?:legacy|outdated|traditional system)/i, signal: "legacy_system_mentioned", severity: "high" }, | |
| { pattern: /(?:compliance|regulatory|audit)/i, signal: "compliance_reporting_burden", severity: "medium" }, | |
| // Hiring signals β growth/overwork indicator | |
| { pattern: /(?:we're hiring|join our team|open positions|careers)/i, signal: "actively_hiring", severity: "low" }, | |
| { pattern: /(?:our team|meet the team|staff|employees)/i, signal: "team_page_exists", severity: "low" }, | |
| ]; | |
| // Elements on page that indicate ABSENCE of automation | |
| const ABSENCE_SIGNALS: { | |
| check: (html: string) => boolean; | |
| signal: string; | |
| severity: PainSignal["severity"]; | |
| }[] = [ | |
| { | |
| check: (html) => !/(intercom|drift|crisp|tidio|zendesk|freshchat|livechat|tawk|hubspot.*chat)/i.test(html), | |
| signal: "no_chatbot_detected", | |
| severity: "medium", | |
| }, | |
| { | |
| check: (html) => !/(calendly|acuity|booksy|mindbody|simplybook|square.*appointment)/i.test(html), | |
| signal: "no_online_scheduling_tool", | |
| severity: "high", | |
| }, | |
| { | |
| check: (html) => !/(zapier|make\.com|automate|n8n|workato)/i.test(html), | |
| signal: "no_automation_tools", | |
| severity: "low", | |
| }, | |
| ]; | |
| /** | |
| * Detect pain signals from website text and HTML. | |
| * | |
| * Step 1: Heuristic detection (instant, free) | |
| * Step 2: LLM enhancement (DL reasoning β maps signals to services) | |
| */ | |
| export async function detectPainSignals( | |
| companyName: string, | |
| industry: string, | |
| employeeCount: number | null, | |
| websiteText: string, | |
| websiteHtml: string, | |
| traceId: string | |
| ): Promise<PainDetectionResult> { | |
| // ββ Step 1: Heuristic scan βββββββββββββββββββββββββββββββββ | |
| const heuristicSignals = runHeuristicScan(websiteText, websiteHtml); | |
| // If we found enough signals, LLM just confirms and maps to service | |
| // If few signals, LLM reasons deeper about the industry context | |
| const pageElements = heuristicSignals.map(s => s.signal); | |
| // ββ Step 2: LLM deep reasoning ββββββββββββββββββββββββββββ | |
| try { | |
| const llmResult = await callLLM({ | |
| operation: "pain_detect", | |
| model: MODELS.FAST, // 8B for speed β pain detection is pattern-based | |
| systemPrompt: SYSTEM_PROMPTS.PAIN_DETECTOR, | |
| userPrompt: buildPainDetectionPrompt({ | |
| company_name: companyName, | |
| industry, | |
| employee_count: employeeCount, | |
| website_text: websiteText.slice(0, 500), | |
| page_elements: pageElements, | |
| }), | |
| temperature: 0.2, | |
| maxTokens: 400, | |
| jsonMode: true, | |
| traceId, | |
| }); | |
| if (llmResult.parsed) { | |
| // Merge heuristic + LLM signals (dedup) | |
| const llmSignals = (llmResult.parsed.pain_signals as PainSignal[]) ?? []; | |
| const merged = mergeSignals(heuristicSignals, llmSignals); | |
| return { | |
| painSignals: merged, | |
| serviceMatch: String(llmResult.parsed.service_match ?? "NONE"), | |
| matchConfidence: Number(llmResult.parsed.match_confidence ?? 0), | |
| reasoning: String(llmResult.parsed.reasoning ?? ""), | |
| source: "combined", | |
| }; | |
| } | |
| } catch (err) { | |
| logger.warn({ companyName, err }, "LLM pain detection failed β using heuristic only"); | |
| } | |
| // ββ Fallback: heuristic-only result ββββββββββββββββββββββββ | |
| return { | |
| painSignals: heuristicSignals, | |
| serviceMatch: inferServiceFromSignals(heuristicSignals, industry), | |
| matchConfidence: heuristicSignals.length >= 3 ? 0.7 : 0.4, | |
| reasoning: `Heuristic-only: ${heuristicSignals.length} pain signals detected`, | |
| source: "heuristic", | |
| }; | |
| } | |
| function runHeuristicScan(text: string, html: string): PainSignal[] { | |
| const signals: PainSignal[] = []; | |
| const seen = new Set<string>(); | |
| // Pattern-based detection | |
| for (const rule of HEURISTIC_RULES) { | |
| if (rule.pattern.test(text) && !seen.has(rule.signal)) { | |
| seen.add(rule.signal); | |
| signals.push({ | |
| signal: rule.signal, | |
| evidence: `Pattern matched in website text`, | |
| severity: rule.severity, | |
| }); | |
| } | |
| } | |
| // Absence-based detection (what's NOT on the site) | |
| for (const check of ABSENCE_SIGNALS) { | |
| if (check.check(html) && !seen.has(check.signal)) { | |
| seen.add(check.signal); | |
| signals.push({ | |
| signal: check.signal, | |
| evidence: "Not detected in page source", | |
| severity: check.severity, | |
| }); | |
| } | |
| } | |
| return signals; | |
| } | |
| function mergeSignals(heuristic: PainSignal[], llm: PainSignal[]): PainSignal[] { | |
| const merged = [...heuristic]; | |
| const existing = new Set(heuristic.map(s => s.signal)); | |
| for (const signal of llm) { | |
| if (!existing.has(signal.signal)) { | |
| merged.push(signal); | |
| } | |
| } | |
| // Sort by severity: high β medium β low | |
| const severityOrder = { high: 0, medium: 1, low: 2 }; | |
| return merged.sort((a, b) => severityOrder[a.severity] - severityOrder[b.severity]); | |
| } | |
| /** | |
| * Deterministic service inference from signals (fallback when LLM fails). | |
| */ | |
| function inferServiceFromSignals(signals: PainSignal[], industry: string): string | null { | |
| const signalNames = signals.map(s => s.signal); | |
| // Receptionist signals | |
| const receptionistSignals = ["phone_handling_manual", "phone_number_prominent", | |
| "manual_appointment_booking", "human_receptionist_mentioned", "limited_availability_hours", | |
| "no_online_scheduling_tool"]; | |
| const receptionistCount = signalNames.filter(s => receptionistSignals.includes(s)).length; | |
| // Support signals | |
| const supportSignals = ["manual_contact_process", "manual_ticket_system", | |
| "faq_exists_no_chatbot", "email_only_support", "no_chatbot_detected"]; | |
| const supportCount = signalNames.filter(s => supportSignals.includes(s)).length; | |
| // Data signals | |
| const dataSignals = ["manual_data_processing", "legacy_system_mentioned", | |
| "compliance_reporting_burden"]; | |
| const dataCount = signalNames.filter(s => dataSignals.includes(s)).length; | |
| const max = Math.max(receptionistCount, supportCount, dataCount); | |
| if (max < 2) return null; | |
| if (receptionistCount === max) return "AI Receptionist"; | |
| if (supportCount === max) return "AI Customer Support"; | |
| if (dataCount === max) return "AI Data Processing"; | |
| return null; | |
| } | |