File size: 8,837 Bytes
bd28470
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
/**
 * Pain Signal Detector
 * 
 * Core philosophy: Don't look for AI signals.
 * Look for INEFFICIENCY signals.
 * 
 * A phone number on homepage = manual call handling = pain point.
 * A "Book by Phone" button = no online scheduling = pain point.
 * No chatbot = manual customer interaction = pain point.
 * 
 * These are UNIVERSAL signals β€” every industry has them.
 * The LLM then maps these signals to our specific services.
 */

import { callLLM, MODELS } from "../../shared/llm/nvidia-client";
import { SYSTEM_PROMPTS, buildPainDetectionPrompt } from "../../shared/llm/prompts";
import { logger } from "../../shared/utils/logger";

export interface PainSignal {
  signal: string;
  evidence: string;
  severity: "low" | "medium" | "high";
}

export interface PainDetectionResult {
  painSignals: PainSignal[];
  serviceMatch: string | null;          // matched service from service_profiles
  matchConfidence: number;
  reasoning: string;
  source: "heuristic" | "llm" | "combined";
}

// ─── Heuristic detection (instant, free, no LLM) ────────────

const HEURISTIC_RULES: {
  pattern: RegExp;
  signal: string;
  severity: PainSignal["severity"];
}[] = [
  // Phone/call signals β†’ AI Receptionist opportunity
  { pattern: /(?:call us|call now|phone|dial|ring us)/i, signal: "phone_handling_manual", severity: "high" },
  { pattern: /\+?\d[\d\s\-().]{8,}/, signal: "phone_number_prominent", severity: "medium" },
  { pattern: /(?:book (?:an? )?appointment|schedule (?:a )?visit|make (?:an? )?appointment)/i, signal: "manual_appointment_booking", severity: "high" },
  { pattern: /(?:office hours|opening hours|business hours|we're open)/i, signal: "limited_availability_hours", severity: "medium" },
  { pattern: /(?:receptionist|front desk|reception)/i, signal: "human_receptionist_mentioned", severity: "high" },
  
  // Support signals β†’ AI Customer Support opportunity
  { pattern: /(?:contact us|get in touch|reach out|enquire|inquire)/i, signal: "manual_contact_process", severity: "medium" },
  { pattern: /(?:submit (?:a )?ticket|raise (?:a )?ticket)/i, signal: "manual_ticket_system", severity: "medium" },
  { pattern: /(?:FAQ|frequently asked|common questions)/i, signal: "faq_exists_no_chatbot", severity: "low" },
  { pattern: /(?:email us|send us an email|write to us)/i, signal: "email_only_support", severity: "medium" },
  
  // Data/process signals β†’ AI Data Processing opportunity
  { pattern: /(?:spreadsheet|excel|csv|manual report)/i, signal: "manual_data_processing", severity: "high" },
  { pattern: /(?:legacy|outdated|traditional system)/i, signal: "legacy_system_mentioned", severity: "high" },
  { pattern: /(?:compliance|regulatory|audit)/i, signal: "compliance_reporting_burden", severity: "medium" },
  
  // Hiring signals β†’ growth/overwork indicator
  { pattern: /(?:we're hiring|join our team|open positions|careers)/i, signal: "actively_hiring", severity: "low" },
  { pattern: /(?:our team|meet the team|staff|employees)/i, signal: "team_page_exists", severity: "low" },
];

// Elements on page that indicate ABSENCE of automation
const ABSENCE_SIGNALS: {
  check: (html: string) => boolean;
  signal: string;
  severity: PainSignal["severity"];
}[] = [
  {
    check: (html) => !/(intercom|drift|crisp|tidio|zendesk|freshchat|livechat|tawk|hubspot.*chat)/i.test(html),
    signal: "no_chatbot_detected",
    severity: "medium",
  },
  {
    check: (html) => !/(calendly|acuity|booksy|mindbody|simplybook|square.*appointment)/i.test(html),
    signal: "no_online_scheduling_tool",
    severity: "high",
  },
  {
    check: (html) => !/(zapier|make\.com|automate|n8n|workato)/i.test(html),
    signal: "no_automation_tools",
    severity: "low",
  },
];

/**
 * Detect pain signals from website text and HTML.
 * 
 * Step 1: Heuristic detection (instant, free)
 * Step 2: LLM enhancement (DL reasoning β€” maps signals to services)
 */
export async function detectPainSignals(
  companyName: string,
  industry: string,
  employeeCount: number | null,
  websiteText: string,
  websiteHtml: string,
  traceId: string
): Promise<PainDetectionResult> {
  // ── Step 1: Heuristic scan ─────────────────────────────────
  const heuristicSignals = runHeuristicScan(websiteText, websiteHtml);

  // If we found enough signals, LLM just confirms and maps to service
  // If few signals, LLM reasons deeper about the industry context
  const pageElements = heuristicSignals.map(s => s.signal);

  // ── Step 2: LLM deep reasoning ────────────────────────────
  try {
    const llmResult = await callLLM({
      operation: "pain_detect",
      model: MODELS.FAST,      // 8B for speed β€” pain detection is pattern-based
      systemPrompt: SYSTEM_PROMPTS.PAIN_DETECTOR,
      userPrompt: buildPainDetectionPrompt({
        company_name: companyName,
        industry,
        employee_count: employeeCount,
        website_text: websiteText.slice(0, 500),
        page_elements: pageElements,
      }),
      temperature: 0.2,
      maxTokens: 400,
      jsonMode: true,
      traceId,
    });

    if (llmResult.parsed) {
      // Merge heuristic + LLM signals (dedup)
      const llmSignals = (llmResult.parsed.pain_signals as PainSignal[]) ?? [];
      const merged = mergeSignals(heuristicSignals, llmSignals);

      return {
        painSignals: merged,
        serviceMatch: String(llmResult.parsed.service_match ?? "NONE"),
        matchConfidence: Number(llmResult.parsed.match_confidence ?? 0),
        reasoning: String(llmResult.parsed.reasoning ?? ""),
        source: "combined",
      };
    }
  } catch (err) {
    logger.warn({ companyName, err }, "LLM pain detection failed β€” using heuristic only");
  }

  // ── Fallback: heuristic-only result ────────────────────────
  return {
    painSignals: heuristicSignals,
    serviceMatch: inferServiceFromSignals(heuristicSignals, industry),
    matchConfidence: heuristicSignals.length >= 3 ? 0.7 : 0.4,
    reasoning: `Heuristic-only: ${heuristicSignals.length} pain signals detected`,
    source: "heuristic",
  };
}

function runHeuristicScan(text: string, html: string): PainSignal[] {
  const signals: PainSignal[] = [];
  const seen = new Set<string>();

  // Pattern-based detection
  for (const rule of HEURISTIC_RULES) {
    if (rule.pattern.test(text) && !seen.has(rule.signal)) {
      seen.add(rule.signal);
      signals.push({
        signal: rule.signal,
        evidence: `Pattern matched in website text`,
        severity: rule.severity,
      });
    }
  }

  // Absence-based detection (what's NOT on the site)
  for (const check of ABSENCE_SIGNALS) {
    if (check.check(html) && !seen.has(check.signal)) {
      seen.add(check.signal);
      signals.push({
        signal: check.signal,
        evidence: "Not detected in page source",
        severity: check.severity,
      });
    }
  }

  return signals;
}

function mergeSignals(heuristic: PainSignal[], llm: PainSignal[]): PainSignal[] {
  const merged = [...heuristic];
  const existing = new Set(heuristic.map(s => s.signal));

  for (const signal of llm) {
    if (!existing.has(signal.signal)) {
      merged.push(signal);
    }
  }

  // Sort by severity: high β†’ medium β†’ low
  const severityOrder = { high: 0, medium: 1, low: 2 };
  return merged.sort((a, b) => severityOrder[a.severity] - severityOrder[b.severity]);
}

/**
 * Deterministic service inference from signals (fallback when LLM fails).
 */
function inferServiceFromSignals(signals: PainSignal[], industry: string): string | null {
  const signalNames = signals.map(s => s.signal);

  // Receptionist signals
  const receptionistSignals = ["phone_handling_manual", "phone_number_prominent",
    "manual_appointment_booking", "human_receptionist_mentioned", "limited_availability_hours",
    "no_online_scheduling_tool"];
  const receptionistCount = signalNames.filter(s => receptionistSignals.includes(s)).length;

  // Support signals
  const supportSignals = ["manual_contact_process", "manual_ticket_system",
    "faq_exists_no_chatbot", "email_only_support", "no_chatbot_detected"];
  const supportCount = signalNames.filter(s => supportSignals.includes(s)).length;

  // Data signals
  const dataSignals = ["manual_data_processing", "legacy_system_mentioned",
    "compliance_reporting_burden"];
  const dataCount = signalNames.filter(s => dataSignals.includes(s)).length;

  const max = Math.max(receptionistCount, supportCount, dataCount);
  if (max < 2) return null;

  if (receptionistCount === max) return "AI Receptionist";
  if (supportCount === max) return "AI Customer Support";
  if (dataCount === max) return "AI Data Processing";
  return null;
}