Spaces:
Running
Running
iDevBuddy
feat: Add Slack Events integration, Dockerfiles, and Hugging Face deployment config
5f138d4 | """ | |
| Profiler β Production-grade company profiling using NVIDIA NIM. | |
| Key differences from v1: | |
| 1. Chain-of-thought reasoning forced (Step 1-5 before JSON) | |
| 2. Few-shot examples (2 real-world examples in prompt) | |
| 3. Grounding instruction ("UNKNOWN" for missing data) | |
| 4. Evidence tracking (what data supported each claim) | |
| 5. Deterministic fallback (zero hallucination when LLM fails) | |
| """ | |
| import logging | |
| from nvidia_client import call_with_consistency | |
| from hallucination_guard import validate_profile_grounded | |
| logger = logging.getLogger(__name__) | |
| # βββ System prompt ββββββββββββββββββββββββββββββββββββββββββββ | |
| SYSTEM_PROMPT = """You are a business analyst for an AI automation agency. | |
| Your job: analyze a company and identify WHERE our AI services can help them. | |
| CRITICAL RULES: | |
| - Only state facts supported by the provided evidence | |
| - Write "UNKNOWN" for anything not in the data β NEVER guess | |
| - Your analysis determines whether a real salesperson contacts this company | |
| - Wrong analysis = wasted human time = unacceptable | |
| - Think step by step before concluding""" | |
| # βββ User prompt builder βββββββββββββββββββββββββββββββββββββ | |
| def build_profile_prompt(data: dict) -> str: | |
| return f"""ANALYZE THIS COMPANY: | |
| Name: {data.get('name', 'UNKNOWN')} | |
| Industry: {data.get('industry', 'UNKNOWN')} | |
| Employees: {data.get('employee_count', 'UNKNOWN')} | |
| Description: {(data.get('description') or 'NONE PROVIDED')[:400]} | |
| Website excerpt: | |
| {(data.get('website_text') or '')[:600]} | |
| LinkedIn description: | |
| {data.get('linkedin_description') or 'NONE'} | |
| Tech stack detected: {', '.join(data.get('tech_stack', [])) or 'NONE DETECTED'} | |
| Job postings mentioning AI/automation: {data.get('ai_job_count', 0)} | |
| Pain signals detected: {', '.join(data.get('pain_signals', [])) or 'NONE'} | |
| Service match suggestion: {data.get('service_match') or 'NONE'} | |
| STEP-BY-STEP ANALYSIS: | |
| Step 1: What does this company actually DO? (2 sentences, facts only) | |
| Step 2: What are their likely daily operational challenges? (based on industry + size) | |
| Step 3: What specific AI automation would save them time/money? (be specific) | |
| Step 4: Who in this organization would approve buying this service? | |
| Step 5: What outreach angle would resonate with this specific person? | |
| After reasoning through steps 1-5, output this JSON: | |
| {{ | |
| "profile_summary": "2-3 factual sentences about what this company does", | |
| "pain_points": ["specific pain 1", "specific pain 2"], | |
| "ai_use_case": "The single most compelling AI use case for them", | |
| "ai_readiness": "low|medium|high", | |
| "decision_maker_reasoning": "Who likely makes purchasing decisions and why", | |
| "outreach_angle": "One specific sentence β the hook for first contact", | |
| "confidence": 0.0, | |
| "evidence_used": ["list which data points you relied on"], | |
| "evidence_missing": ["list what data you wished you had"] | |
| }} | |
| EXAMPLE 1 (dental clinic, 6 employees): | |
| {{ | |
| "profile_summary": "ABC Dental is a 6-person dental practice in Houston offering general and cosmetic dentistry. They display their phone number prominently and use a basic contact form for appointments.", | |
| "pain_points": ["Manual phone-based appointment scheduling during business hours only", "No after-hours patient communication capability"], | |
| "ai_use_case": "AI receptionist to handle appointment booking, reminders, and after-hours calls", | |
| "ai_readiness": "low", | |
| "decision_maker_reasoning": "Practice owner (Dr. Smith, DDS) makes all purchasing decisions. Small practice = owner controls budget directly.", | |
| "outreach_angle": "Stop losing patients to voicemail β our AI receptionist books appointments 24/7", | |
| "confidence": 0.82, | |
| "evidence_used": ["phone number on homepage", "contact form only", "6 staff listed", "no chatbot detected"], | |
| "evidence_missing": ["annual revenue", "number of daily calls"] | |
| }} | |
| EXAMPLE 2 (manufacturing company, 150 employees): | |
| {{ | |
| "profile_summary": "XYZ Manufacturing is a UK-based manufacturer of industrial valves with 150 employees. They use SAP for ERP and are hiring a Data Analyst.", | |
| "pain_points": ["Manual data extraction from legacy SAP system", "Production reporting requires manual spreadsheet compilation"], | |
| "ai_use_case": "Automated reporting pipeline that extracts SAP data and generates dashboards", | |
| "ai_readiness": "medium", | |
| "decision_maker_reasoning": "Operations Director manages the data team and would champion this internally. CTO signs off on tech purchases.", | |
| "outreach_angle": "Your Data Analyst job posting tells us you're drowning in manual SAP reports β we automate that entirely", | |
| "confidence": 0.88, | |
| "evidence_used": ["SAP detected in tech stack", "Data Analyst job posting", "150 employees"], | |
| "evidence_missing": ["specific SAP modules used", "current reporting frequency"] | |
| }}""" | |
| # βββ Main profiling function βββββββββββββββββββββββββββββββββ | |
| async def generate_profile(company_data: dict, trace_id: str = "") -> dict: | |
| """ | |
| Generate LLM profile with consistency checking and grounding. | |
| Returns cleaned, grounded profile or deterministic fallback. | |
| """ | |
| prompt = build_profile_prompt(company_data) | |
| # Call with consistency check (2 temperatures, compare) | |
| result = await call_with_consistency( | |
| operation="profile", | |
| system_prompt=SYSTEM_PROMPT, | |
| user_prompt=prompt, | |
| trace_id=trace_id, | |
| company_id=company_data.get("id"), | |
| ) | |
| # All models failed β deterministic fallback | |
| if result.get("fallback_used") or not result.get("parsed"): | |
| logger.warning(f"All LLM models failed for {company_data.get('name')} β using fallback") | |
| return _deterministic_fallback(company_data) | |
| profile = result["parsed"] | |
| profile["llm_model"] = result["model"] | |
| profile["is_fallback"] = False | |
| profile["is_consistent"] = result.get("is_consistent", True) | |
| profile["consistency_score"] = result.get("consistency_score", 1.0) | |
| profile["tokens_used"] = result["tokens"]["total"] | |
| # Grounding validation | |
| grounding_result = validate_profile_grounded(profile, company_data) | |
| profile["grounding_score"] = grounding_result["grounding_score"] | |
| profile["corrections"] = grounding_result.get("corrections", {}) | |
| # Apply corrections | |
| if grounding_result.get("corrections"): | |
| for key, correction in grounding_result["corrections"].items(): | |
| if key in profile: | |
| profile[key] = correction["actual"] | |
| return profile | |
| # βββ Deterministic fallback ββββββββββββββββββββββββββββββββββ | |
| def _deterministic_fallback(data: dict) -> dict: | |
| """Zero-hallucination fallback. Only uses available facts.""" | |
| industry = data.get("industry", "business") | |
| size = data.get("employee_count", "unknown") | |
| name = data.get("name", "this company") | |
| pain_signals = data.get("pain_signals", []) | |
| service_match = data.get("service_match") | |
| # Map service to pain points | |
| pain_points = _get_pain_points(service_match, industry, pain_signals) | |
| # AI readiness from evidence | |
| ai_jobs = data.get("ai_job_count", 0) | |
| tech_stack = data.get("tech_stack", []) | |
| if ai_jobs >= 2: ai_readiness = "high" | |
| elif tech_stack or ai_jobs >= 1: ai_readiness = "medium" | |
| else: ai_readiness = "low" | |
| return { | |
| "profile_summary": f"{name} is a {industry} company with approximately {size} employees.", | |
| "pain_points": pain_points, | |
| "ai_use_case": _get_use_case(service_match, industry), | |
| "ai_readiness": ai_readiness, | |
| "decision_maker_reasoning": f"At a {size}-employee {industry} company, purchasing decisions are likely made by the owner or managing director.", | |
| "outreach_angle": _get_outreach_angle(service_match, name), | |
| "confidence": 0.5, | |
| "evidence_used": [f"employee_count: {size}", f"industry: {industry}"] + pain_signals[:3], | |
| "evidence_missing": ["revenue", "growth rate", "current tools"], | |
| "llm_model": "deterministic_fallback", | |
| "is_fallback": True, | |
| "is_consistent": True, | |
| "consistency_score": 1.0, | |
| "grounding_score": 1.0, | |
| "tokens_used": 0, | |
| "corrections": {}, | |
| } | |
| def _get_pain_points(service, industry, detected_signals): | |
| if detected_signals and len(detected_signals) >= 2: | |
| return detected_signals[:2] | |
| service_pains = { | |
| "AI Receptionist": ["Manual phone handling during business hours only", "Missed calls and appointments outside working hours"], | |
| "AI Customer Support": ["Manual ticket handling and slow response times", "No automated FAQ or chatbot for common questions"], | |
| "AI Data Processing": ["Manual data entry and reporting overhead", "Legacy system inefficiencies"], | |
| "AI Sales Automation": ["Manual outbound sales process", "Unqualified leads consuming sales team time"], | |
| "AI Workflow Automation": ["Manual approval workflows", "Multiple disconnected tools and platforms"], | |
| } | |
| return service_pains.get(service, ["Manual operational processes", "Unoptimized workflow efficiency"]) | |
| def _get_use_case(service, industry): | |
| if service: | |
| return f"{service} for {industry} operations" | |
| return f"AI workflow automation for {industry} processes" | |
| def _get_outreach_angle(service, name): | |
| angles = { | |
| "AI Receptionist": f"Stop losing customers to voicemail β our AI handles calls 24/7 for {name}", | |
| "AI Customer Support": f"Reduce support costs by 60% with AI-powered customer service for {name}", | |
| "AI Data Processing": f"Eliminate manual reporting β our AI automates your data pipeline", | |
| "AI Sales Automation": f"Double your sales pipeline efficiency with AI-powered outreach", | |
| } | |
| return angles.get(service, f"Reduce operational overhead with targeted AI automation for {name}") | |