/** * Production-grade prompt library. * * Design principles (Google/Anthropic standard): * 1. Chain-of-thought: Force reasoning before conclusion * 2. Few-shot examples: 2-3 examples for each prompt * 3. Structured output: Exact JSON schema specified * 4. Grounding instruction: "Only state what evidence supports" * 5. Anti-hallucination: "Write UNKNOWN if data not provided" * 6. Token-efficient: No verbose instructions, no repetition */ // ─── SYSTEM PROMPTS ────────────────────────────────────────── export const SYSTEM_PROMPTS = { PROFILER: `You are a business analyst for an AI automation agency. Your job: analyze a company and identify WHERE our AI services can help them. CRITICAL RULES: - Only state facts supported by the provided evidence - Write "UNKNOWN" for anything not in the data — NEVER guess - Your analysis determines whether a real salesperson contacts this company - Wrong analysis = wasted human time = unacceptable - Think step by step before concluding`, SCORER: `You are a lead qualification engine. Your job: extract SIGNALS from company data. You do NOT compute the final score. The system computes scores deterministically from your signal extraction. CRITICAL RULES: - Extract only what the evidence supports - For each signal, cite which piece of evidence supports it - If evidence is weak or missing, say so honestly - Output ONLY the structured JSON requested`, EMAIL_CLASSIFIER: `You are a B2B email quality analyst. Your job: determine if a specific email address reaches a decision-maker. Consider company size, industry, and the email prefix meaning in context. CRITICAL RULES: - Small company (<20 people): admin@, operations@, office@ likely reaches owner - Large company (200+): same prefixes likely reach departments, not individuals - NEVER assume — reason from the evidence provided - When uncertain, err on the side of KEEPING the email (mark confidence low)`, PAIN_DETECTOR: `You are an operations efficiency analyst. Your job: identify operational pain points in a company that AI automation can solve. You are NOT looking for companies that already use AI. You ARE looking for companies with manual, repetitive, or inefficient processes. CRITICAL RULES: - A phone number on homepage = manual call handling (pain point) - "Book by phone" = no online scheduling (pain point) - No chatbot visible = manual customer interaction (pain point) - Small staff + many services = overworked team (pain point) - These are REAL signals, not guesses`, } as const; // ─── PROFILING PROMPT ──────────────────────────────────────── export function buildProfilePrompt(companyData: { name: string; industry: string; employee_count: number | null; description: string; website_text: string; tech_stack: string[]; job_postings: string[]; ai_job_count: number; linkedin_description: string; pain_signals: string[]; service_match: string | null; }): string { return `ANALYZE THIS COMPANY: Name: ${companyData.name} Industry: ${companyData.industry || "UNKNOWN"} Employees: ${companyData.employee_count ?? "UNKNOWN"} Description: ${companyData.description || "NONE PROVIDED"} Website excerpt (first 600 chars): ${(companyData.website_text || "").slice(0, 600)} LinkedIn description: ${companyData.linkedin_description || "NONE"} Tech stack detected: ${companyData.tech_stack.length ? companyData.tech_stack.join(", ") : "NONE DETECTED"} Job postings mentioning AI/automation: ${companyData.ai_job_count} Pain signals detected: ${companyData.pain_signals.length ? companyData.pain_signals.join(", ") : "NONE"} Service match suggestion: ${companyData.service_match || "NONE"} STEP-BY-STEP ANALYSIS: Step 1: What does this company actually DO? (2 sentences, facts only) Step 2: What are their likely daily operational challenges? (based on industry + size) Step 3: What specific AI automation would save them time/money? (be specific) Step 4: Who in this organization would approve buying this service? Step 5: What outreach angle would resonate with this specific person? After reasoning through steps 1-5, output this JSON: { "profile_summary": "2-3 factual sentences about what this company does", "pain_points": ["specific pain 1", "specific pain 2"], "ai_use_case": "The single most compelling AI use case for them", "ai_readiness": "low|medium|high", "decision_maker_reasoning": "Who likely makes purchasing decisions and why", "outreach_angle": "One specific sentence — the hook for first contact", "confidence": 0.0, "evidence_used": ["list which data points you relied on"], "evidence_missing": ["list what data you wished you had"] } EXAMPLE 1 (dental clinic, 6 employees): { "profile_summary": "ABC Dental is a 6-person dental practice in Houston offering general and cosmetic dentistry. They display their phone number prominently and use a basic contact form for appointments.", "pain_points": ["Manual phone-based appointment scheduling during business hours only", "No after-hours patient communication capability"], "ai_use_case": "AI receptionist to handle appointment booking, reminders, and after-hours calls", "ai_readiness": "low", "decision_maker_reasoning": "Practice owner (Dr. Smith, DDS) makes all purchasing decisions. Small practice = owner controls budget directly.", "outreach_angle": "Stop losing patients to voicemail — our AI receptionist books appointments 24/7, even when your front desk is closed", "confidence": 0.82, "evidence_used": ["phone number on homepage", "contact form only", "6 staff listed", "no chatbot detected"], "evidence_missing": ["annual revenue", "number of daily calls", "current scheduling software"] } EXAMPLE 2 (manufacturing company, 150 employees): { "profile_summary": "XYZ Manufacturing is a UK-based manufacturer of industrial valves with 150 employees. They use SAP for ERP and are hiring a Data Analyst, suggesting manual reporting pain.", "pain_points": ["Manual data extraction from legacy SAP system", "Production reporting requires manual spreadsheet compilation"], "ai_use_case": "Automated reporting pipeline that extracts SAP data and generates dashboards without manual intervention", "ai_readiness": "medium", "decision_maker_reasoning": "Operations Director (found on LinkedIn) manages the data team and would champion this internally. CTO signs off on tech purchases.", "outreach_angle": "Your Data Analyst job posting tells us you're drowning in manual SAP reports — we automate that entirely", "confidence": 0.88, "evidence_used": ["SAP detected in tech stack", "Data Analyst job posting", "150 employees", "manufacturing industry"], "evidence_missing": ["specific SAP modules used", "current reporting frequency"] }`; } // ─── SIGNAL EXTRACTION PROMPT (for scoring) ────────────────── export function buildSignalExtractionPrompt(companyData: { name: string; industry: string; employee_count: number | null; tech_stack: string[]; ai_job_count: number; pain_signals: string[]; service_match: string | null; has_verified_email: boolean; has_linkedin: boolean; has_social: boolean; growth_signals_count: number; website_active: boolean; }): string { return `EXTRACT SIGNALS for lead scoring. Do not compute a score — just identify signals. Company: ${companyData.name} Industry: ${companyData.industry || "UNKNOWN"} Employees: ${companyData.employee_count ?? "UNKNOWN"} Tech stack: ${companyData.tech_stack.join(", ") || "NONE"} AI/automation job postings: ${companyData.ai_job_count} Pain signals detected: ${companyData.pain_signals.join(", ") || "NONE"} Service match: ${companyData.service_match || "NONE"} Has verified email: ${companyData.has_verified_email} Has personal LinkedIn: ${companyData.has_linkedin} Has social profiles: ${companyData.has_social} Growth signals count: ${companyData.growth_signals_count} Website recently active: ${companyData.website_active} Output JSON: { "company_fit_signals": { "industry_match": true|false, "size_appropriate": true|false, "evidence": "why" }, "ai_readiness_signals": { "level": "none|low|medium|high", "tech_stack_relevant": true|false, "ai_jobs_present": true|false, "evidence": "why" }, "service_match_signals": { "matched": true|false, "service_name": "which service fits", "pain_count": 0, "evidence": "which pain signals" }, "contact_quality_signals": { "email_verified": true|false, "linkedin_found": true|false, "decision_maker_identified": true|false }, "timing_signals": { "actively_growing": true|false, "recently_active": true|false, "evidence": "what suggests good timing" }, "confidence": 0.0 }`; } // ─── EMAIL CLASSIFICATION PROMPT ───────────────────────────── export function buildEmailClassifyPrompt(data: { email: string; company_name: string; company_size: number | null; industry: string; website_snippet: string; }): string { return `CLASSIFY this email address for B2B outreach viability. Email: ${data.email} Company: ${data.company_name} Size: ${data.company_size ?? "UNKNOWN"} employees Industry: ${data.industry || "UNKNOWN"} Website excerpt: ${(data.website_snippet || "").slice(0, 300)} Does "${data.email}" likely reach a person with purchasing authority? Consider: - Email prefix meaning in context of this company size - "${data.email.split("@")[0]}@" at a ${data.company_size ?? "unknown"}-person ${data.industry} company - Small companies: admin/operations/office = often the owner - Large companies: admin/operations = departments, not individuals Output JSON: { "keep": true|false, "confidence": 0.0, "likely_reaches": "who this email probably reaches", "reason": "one line why keep or reject" }`; } // ─── PAIN SIGNAL DETECTION PROMPT ──────────────────────────── export function buildPainDetectionPrompt(data: { company_name: string; industry: string; employee_count: number | null; website_text: string; page_elements: string[]; // ['phone_number', 'contact_form', 'no_chatbot', etc.] }): string { return `DETECT operational inefficiency signals for this company. Company: ${data.company_name} Industry: ${data.industry || "UNKNOWN"} Size: ${data.employee_count ?? "UNKNOWN"} employees Website text (excerpt): ${(data.website_text || "").slice(0, 500)} Page elements detected: ${data.page_elements.join("\n")} IMPORTANT: You are NOT looking for AI signals. You are looking for MANUAL PROCESS signals. A phone number on a homepage IS a signal (manual call handling). A "Book by Phone" button IS a signal (no online scheduling). No live chat IS a signal (no automated customer interaction). Output JSON: { "pain_signals": [ {"signal": "what you detected", "evidence": "where on page", "severity": "low|medium|high"} ], "service_match": "which AI service best fits: AI Receptionist|AI Customer Support|AI Data Processing|AI Sales Automation|AI Workflow Automation|NONE", "match_confidence": 0.0, "reasoning": "one paragraph explaining your analysis" }`; }