File size: 10,076 Bytes
bd28470
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
"""
Profiler β€” Production-grade company profiling using NVIDIA NIM.

Key differences from v1:
1. Chain-of-thought reasoning forced (Step 1-5 before JSON)
2. Few-shot examples (2 real-world examples in prompt)
3. Grounding instruction ("UNKNOWN" for missing data)
4. Evidence tracking (what data supported each claim)
5. Deterministic fallback (zero hallucination when LLM fails)
"""

import logging
from nvidia_client import call_with_consistency, MODELS
from hallucination_guard import validate_profile_grounded

logger = logging.getLogger(__name__)


# ─── System prompt ────────────────────────────────────────────

SYSTEM_PROMPT = """You are a business analyst for an AI automation agency.
Your job: analyze a company and identify WHERE our AI services can help them.

CRITICAL RULES:
- Only state facts supported by the provided evidence
- Write "UNKNOWN" for anything not in the data β€” NEVER guess
- Your analysis determines whether a real salesperson contacts this company
- Wrong analysis = wasted human time = unacceptable
- Think step by step before concluding"""


# ─── User prompt builder ─────────────────────────────────────

def build_profile_prompt(data: dict) -> str:
    return f"""ANALYZE THIS COMPANY:

Name: {data.get('name', 'UNKNOWN')}
Industry: {data.get('industry', 'UNKNOWN')}
Employees: {data.get('employee_count', 'UNKNOWN')}
Description: {(data.get('description') or 'NONE PROVIDED')[:400]}

Website excerpt:
{(data.get('website_text') or '')[:600]}

LinkedIn description:
{data.get('linkedin_description') or 'NONE'}

Tech stack detected: {', '.join(data.get('tech_stack', [])) or 'NONE DETECTED'}
Job postings mentioning AI/automation: {data.get('ai_job_count', 0)}
Pain signals detected: {', '.join(data.get('pain_signals', [])) or 'NONE'}
Service match suggestion: {data.get('service_match') or 'NONE'}

STEP-BY-STEP ANALYSIS:

Step 1: What does this company actually DO? (2 sentences, facts only)
Step 2: What are their likely daily operational challenges? (based on industry + size)
Step 3: What specific AI automation would save them time/money? (be specific)
Step 4: Who in this organization would approve buying this service?
Step 5: What outreach angle would resonate with this specific person?

After reasoning through steps 1-5, output this JSON:
{{
  "profile_summary": "2-3 factual sentences about what this company does",
  "pain_points": ["specific pain 1", "specific pain 2"],
  "ai_use_case": "The single most compelling AI use case for them",
  "ai_readiness": "low|medium|high",
  "decision_maker_reasoning": "Who likely makes purchasing decisions and why",
  "outreach_angle": "One specific sentence β€” the hook for first contact",
  "confidence": 0.0,
  "evidence_used": ["list which data points you relied on"],
  "evidence_missing": ["list what data you wished you had"]
}}

EXAMPLE 1 (dental clinic, 6 employees):
{{
  "profile_summary": "ABC Dental is a 6-person dental practice in Houston offering general and cosmetic dentistry. They display their phone number prominently and use a basic contact form for appointments.",
  "pain_points": ["Manual phone-based appointment scheduling during business hours only", "No after-hours patient communication capability"],
  "ai_use_case": "AI receptionist to handle appointment booking, reminders, and after-hours calls",
  "ai_readiness": "low",
  "decision_maker_reasoning": "Practice owner (Dr. Smith, DDS) makes all purchasing decisions. Small practice = owner controls budget directly.",
  "outreach_angle": "Stop losing patients to voicemail β€” our AI receptionist books appointments 24/7",
  "confidence": 0.82,
  "evidence_used": ["phone number on homepage", "contact form only", "6 staff listed", "no chatbot detected"],
  "evidence_missing": ["annual revenue", "number of daily calls"]
}}

EXAMPLE 2 (manufacturing company, 150 employees):
{{
  "profile_summary": "XYZ Manufacturing is a UK-based manufacturer of industrial valves with 150 employees. They use SAP for ERP and are hiring a Data Analyst.",
  "pain_points": ["Manual data extraction from legacy SAP system", "Production reporting requires manual spreadsheet compilation"],
  "ai_use_case": "Automated reporting pipeline that extracts SAP data and generates dashboards",
  "ai_readiness": "medium",
  "decision_maker_reasoning": "Operations Director manages the data team and would champion this internally. CTO signs off on tech purchases.",
  "outreach_angle": "Your Data Analyst job posting tells us you're drowning in manual SAP reports β€” we automate that entirely",
  "confidence": 0.88,
  "evidence_used": ["SAP detected in tech stack", "Data Analyst job posting", "150 employees"],
  "evidence_missing": ["specific SAP modules used", "current reporting frequency"]
}}"""


# ─── Main profiling function ─────────────────────────────────

async def generate_profile(company_data: dict, trace_id: str = "") -> dict:
    """
    Generate LLM profile with consistency checking and grounding.
    Returns cleaned, grounded profile or deterministic fallback.
    """
    prompt = build_profile_prompt(company_data)
    
    # Call with consistency check (2 temperatures, compare)
    result = await call_with_consistency(
        operation="profile",
        system_prompt=SYSTEM_PROMPT,
        user_prompt=prompt,
        trace_id=trace_id,
        company_id=company_data.get("id"),
    )

    # All models failed β†’ deterministic fallback
    if result.get("fallback_used") or not result.get("parsed"):
        logger.warning(f"All LLM models failed for {company_data.get('name')} β€” using fallback")
        return _deterministic_fallback(company_data)

    profile = result["parsed"]
    profile["llm_model"] = result["model"]
    profile["is_fallback"] = False
    profile["is_consistent"] = result.get("is_consistent", True)
    profile["consistency_score"] = result.get("consistency_score", 1.0)
    profile["tokens_used"] = result["tokens"]["total"]

    # Grounding validation
    grounding_result = validate_profile_grounded(profile, company_data)
    profile["grounding_score"] = grounding_result["grounding_score"]
    profile["corrections"] = grounding_result.get("corrections", {})
    
    # Apply corrections
    if grounding_result.get("corrections"):
        for key, correction in grounding_result["corrections"].items():
            if key in profile:
                profile[key] = correction["actual"]

    return profile


# ─── Deterministic fallback ──────────────────────────────────

def _deterministic_fallback(data: dict) -> dict:
    """Zero-hallucination fallback. Only uses available facts."""
    industry = data.get("industry", "business")
    size = data.get("employee_count", "unknown")
    name = data.get("name", "this company")
    pain_signals = data.get("pain_signals", [])
    service_match = data.get("service_match")

    # Map service to pain points
    pain_points = _get_pain_points(service_match, industry, pain_signals)

    # AI readiness from evidence
    ai_jobs = data.get("ai_job_count", 0)
    tech_stack = data.get("tech_stack", [])
    if ai_jobs >= 2: ai_readiness = "high"
    elif tech_stack or ai_jobs >= 1: ai_readiness = "medium"
    else: ai_readiness = "low"

    return {
        "profile_summary": f"{name} is a {industry} company with approximately {size} employees.",
        "pain_points": pain_points,
        "ai_use_case": _get_use_case(service_match, industry),
        "ai_readiness": ai_readiness,
        "decision_maker_reasoning": f"At a {size}-employee {industry} company, purchasing decisions are likely made by the owner or managing director.",
        "outreach_angle": _get_outreach_angle(service_match, name),
        "confidence": 0.5,
        "evidence_used": [f"employee_count: {size}", f"industry: {industry}"] + pain_signals[:3],
        "evidence_missing": ["revenue", "growth rate", "current tools"],
        "llm_model": "deterministic_fallback",
        "is_fallback": True,
        "is_consistent": True,
        "consistency_score": 1.0,
        "grounding_score": 1.0,
        "tokens_used": 0,
        "corrections": {},
    }


def _get_pain_points(service, industry, detected_signals):
    if detected_signals and len(detected_signals) >= 2:
        return detected_signals[:2]
    
    service_pains = {
        "AI Receptionist": ["Manual phone handling during business hours only", "Missed calls and appointments outside working hours"],
        "AI Customer Support": ["Manual ticket handling and slow response times", "No automated FAQ or chatbot for common questions"],
        "AI Data Processing": ["Manual data entry and reporting overhead", "Legacy system inefficiencies"],
        "AI Sales Automation": ["Manual outbound sales process", "Unqualified leads consuming sales team time"],
        "AI Workflow Automation": ["Manual approval workflows", "Multiple disconnected tools and platforms"],
    }
    return service_pains.get(service, ["Manual operational processes", "Unoptimized workflow efficiency"])


def _get_use_case(service, industry):
    if service:
        return f"{service} for {industry} operations"
    return f"AI workflow automation for {industry} processes"


def _get_outreach_angle(service, name):
    angles = {
        "AI Receptionist": f"Stop losing customers to voicemail β€” our AI handles calls 24/7 for {name}",
        "AI Customer Support": f"Reduce support costs by 60% with AI-powered customer service for {name}",
        "AI Data Processing": f"Eliminate manual reporting β€” our AI automates your data pipeline",
        "AI Sales Automation": f"Double your sales pipeline efficiency with AI-powered outreach",
    }
    return angles.get(service, f"Reduce operational overhead with targeted AI automation for {name}")