Spaces:
Running
Running
File size: 10,068 Bytes
bd28470 5f138d4 bd28470 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 | """
Profiler β Production-grade company profiling using NVIDIA NIM.
Key differences from v1:
1. Chain-of-thought reasoning forced (Step 1-5 before JSON)
2. Few-shot examples (2 real-world examples in prompt)
3. Grounding instruction ("UNKNOWN" for missing data)
4. Evidence tracking (what data supported each claim)
5. Deterministic fallback (zero hallucination when LLM fails)
"""
import logging
from nvidia_client import call_with_consistency
from hallucination_guard import validate_profile_grounded
logger = logging.getLogger(__name__)
# βββ System prompt ββββββββββββββββββββββββββββββββββββββββββββ
SYSTEM_PROMPT = """You are a business analyst for an AI automation agency.
Your job: analyze a company and identify WHERE our AI services can help them.
CRITICAL RULES:
- Only state facts supported by the provided evidence
- Write "UNKNOWN" for anything not in the data β NEVER guess
- Your analysis determines whether a real salesperson contacts this company
- Wrong analysis = wasted human time = unacceptable
- Think step by step before concluding"""
# βββ User prompt builder βββββββββββββββββββββββββββββββββββββ
def build_profile_prompt(data: dict) -> str:
return f"""ANALYZE THIS COMPANY:
Name: {data.get('name', 'UNKNOWN')}
Industry: {data.get('industry', 'UNKNOWN')}
Employees: {data.get('employee_count', 'UNKNOWN')}
Description: {(data.get('description') or 'NONE PROVIDED')[:400]}
Website excerpt:
{(data.get('website_text') or '')[:600]}
LinkedIn description:
{data.get('linkedin_description') or 'NONE'}
Tech stack detected: {', '.join(data.get('tech_stack', [])) or 'NONE DETECTED'}
Job postings mentioning AI/automation: {data.get('ai_job_count', 0)}
Pain signals detected: {', '.join(data.get('pain_signals', [])) or 'NONE'}
Service match suggestion: {data.get('service_match') or 'NONE'}
STEP-BY-STEP ANALYSIS:
Step 1: What does this company actually DO? (2 sentences, facts only)
Step 2: What are their likely daily operational challenges? (based on industry + size)
Step 3: What specific AI automation would save them time/money? (be specific)
Step 4: Who in this organization would approve buying this service?
Step 5: What outreach angle would resonate with this specific person?
After reasoning through steps 1-5, output this JSON:
{{
"profile_summary": "2-3 factual sentences about what this company does",
"pain_points": ["specific pain 1", "specific pain 2"],
"ai_use_case": "The single most compelling AI use case for them",
"ai_readiness": "low|medium|high",
"decision_maker_reasoning": "Who likely makes purchasing decisions and why",
"outreach_angle": "One specific sentence β the hook for first contact",
"confidence": 0.0,
"evidence_used": ["list which data points you relied on"],
"evidence_missing": ["list what data you wished you had"]
}}
EXAMPLE 1 (dental clinic, 6 employees):
{{
"profile_summary": "ABC Dental is a 6-person dental practice in Houston offering general and cosmetic dentistry. They display their phone number prominently and use a basic contact form for appointments.",
"pain_points": ["Manual phone-based appointment scheduling during business hours only", "No after-hours patient communication capability"],
"ai_use_case": "AI receptionist to handle appointment booking, reminders, and after-hours calls",
"ai_readiness": "low",
"decision_maker_reasoning": "Practice owner (Dr. Smith, DDS) makes all purchasing decisions. Small practice = owner controls budget directly.",
"outreach_angle": "Stop losing patients to voicemail β our AI receptionist books appointments 24/7",
"confidence": 0.82,
"evidence_used": ["phone number on homepage", "contact form only", "6 staff listed", "no chatbot detected"],
"evidence_missing": ["annual revenue", "number of daily calls"]
}}
EXAMPLE 2 (manufacturing company, 150 employees):
{{
"profile_summary": "XYZ Manufacturing is a UK-based manufacturer of industrial valves with 150 employees. They use SAP for ERP and are hiring a Data Analyst.",
"pain_points": ["Manual data extraction from legacy SAP system", "Production reporting requires manual spreadsheet compilation"],
"ai_use_case": "Automated reporting pipeline that extracts SAP data and generates dashboards",
"ai_readiness": "medium",
"decision_maker_reasoning": "Operations Director manages the data team and would champion this internally. CTO signs off on tech purchases.",
"outreach_angle": "Your Data Analyst job posting tells us you're drowning in manual SAP reports β we automate that entirely",
"confidence": 0.88,
"evidence_used": ["SAP detected in tech stack", "Data Analyst job posting", "150 employees"],
"evidence_missing": ["specific SAP modules used", "current reporting frequency"]
}}"""
# βββ Main profiling function βββββββββββββββββββββββββββββββββ
async def generate_profile(company_data: dict, trace_id: str = "") -> dict:
"""
Generate LLM profile with consistency checking and grounding.
Returns cleaned, grounded profile or deterministic fallback.
"""
prompt = build_profile_prompt(company_data)
# Call with consistency check (2 temperatures, compare)
result = await call_with_consistency(
operation="profile",
system_prompt=SYSTEM_PROMPT,
user_prompt=prompt,
trace_id=trace_id,
company_id=company_data.get("id"),
)
# All models failed β deterministic fallback
if result.get("fallback_used") or not result.get("parsed"):
logger.warning(f"All LLM models failed for {company_data.get('name')} β using fallback")
return _deterministic_fallback(company_data)
profile = result["parsed"]
profile["llm_model"] = result["model"]
profile["is_fallback"] = False
profile["is_consistent"] = result.get("is_consistent", True)
profile["consistency_score"] = result.get("consistency_score", 1.0)
profile["tokens_used"] = result["tokens"]["total"]
# Grounding validation
grounding_result = validate_profile_grounded(profile, company_data)
profile["grounding_score"] = grounding_result["grounding_score"]
profile["corrections"] = grounding_result.get("corrections", {})
# Apply corrections
if grounding_result.get("corrections"):
for key, correction in grounding_result["corrections"].items():
if key in profile:
profile[key] = correction["actual"]
return profile
# βββ Deterministic fallback ββββββββββββββββββββββββββββββββββ
def _deterministic_fallback(data: dict) -> dict:
"""Zero-hallucination fallback. Only uses available facts."""
industry = data.get("industry", "business")
size = data.get("employee_count", "unknown")
name = data.get("name", "this company")
pain_signals = data.get("pain_signals", [])
service_match = data.get("service_match")
# Map service to pain points
pain_points = _get_pain_points(service_match, industry, pain_signals)
# AI readiness from evidence
ai_jobs = data.get("ai_job_count", 0)
tech_stack = data.get("tech_stack", [])
if ai_jobs >= 2: ai_readiness = "high"
elif tech_stack or ai_jobs >= 1: ai_readiness = "medium"
else: ai_readiness = "low"
return {
"profile_summary": f"{name} is a {industry} company with approximately {size} employees.",
"pain_points": pain_points,
"ai_use_case": _get_use_case(service_match, industry),
"ai_readiness": ai_readiness,
"decision_maker_reasoning": f"At a {size}-employee {industry} company, purchasing decisions are likely made by the owner or managing director.",
"outreach_angle": _get_outreach_angle(service_match, name),
"confidence": 0.5,
"evidence_used": [f"employee_count: {size}", f"industry: {industry}"] + pain_signals[:3],
"evidence_missing": ["revenue", "growth rate", "current tools"],
"llm_model": "deterministic_fallback",
"is_fallback": True,
"is_consistent": True,
"consistency_score": 1.0,
"grounding_score": 1.0,
"tokens_used": 0,
"corrections": {},
}
def _get_pain_points(service, industry, detected_signals):
if detected_signals and len(detected_signals) >= 2:
return detected_signals[:2]
service_pains = {
"AI Receptionist": ["Manual phone handling during business hours only", "Missed calls and appointments outside working hours"],
"AI Customer Support": ["Manual ticket handling and slow response times", "No automated FAQ or chatbot for common questions"],
"AI Data Processing": ["Manual data entry and reporting overhead", "Legacy system inefficiencies"],
"AI Sales Automation": ["Manual outbound sales process", "Unqualified leads consuming sales team time"],
"AI Workflow Automation": ["Manual approval workflows", "Multiple disconnected tools and platforms"],
}
return service_pains.get(service, ["Manual operational processes", "Unoptimized workflow efficiency"])
def _get_use_case(service, industry):
if service:
return f"{service} for {industry} operations"
return f"AI workflow automation for {industry} processes"
def _get_outreach_angle(service, name):
angles = {
"AI Receptionist": f"Stop losing customers to voicemail β our AI handles calls 24/7 for {name}",
"AI Customer Support": f"Reduce support costs by 60% with AI-powered customer service for {name}",
"AI Data Processing": f"Eliminate manual reporting β our AI automates your data pipeline",
"AI Sales Automation": f"Double your sales pipeline efficiency with AI-powered outreach",
}
return angles.get(service, f"Reduce operational overhead with targeted AI automation for {name}")
|