Spaces:
Running
Running
iDevBuddy
feat: Add Slack Events integration, Dockerfiles, and Hugging Face deployment config
5f138d4 | """ | |
| Scorer v2 β Signal Extraction + Deterministic Scoring | |
| KEY DESIGN CHANGE: | |
| Old: LLM computes score directly β hallucination risk | |
| New: LLM extracts SIGNALS β Code computes score β zero hallucination | |
| LLM is good at: "Does this company have legacy SAP?" (yes/no) | |
| LLM is bad at: "Give this company 73 out of 100" (arbitrary) | |
| So: LLM extracts signals, code does math. | |
| """ | |
| import logging | |
| from nvidia_client import call_llm | |
| logger = logging.getLogger(__name__) | |
| # βββ Signal extraction prompt ββββββββββββββββββββββββββββββββ | |
| SYSTEM_PROMPT = """You are a lead qualification engine. | |
| Your job: extract SIGNALS from company data. You do NOT compute the final score. | |
| The system computes scores deterministically from your signal extraction. | |
| CRITICAL RULES: | |
| - Extract only what the evidence supports | |
| - For each signal, cite which piece of evidence supports it | |
| - If evidence is weak or missing, say so honestly | |
| - Output ONLY the structured JSON requested""" | |
| def build_signal_prompt(data: dict, profile: dict, contacts: list) -> str: | |
| has_verified_email = any(c.get("email_verified") for c in contacts) | |
| has_linkedin = any(c.get("linkedin_personal_url") for c in contacts) | |
| has_social = any(c.get("social_profiles") for c in contacts) | |
| return f"""EXTRACT SIGNALS for lead scoring. Do not compute a score. | |
| Company: {data.get('name', 'UNKNOWN')} | |
| Industry: {data.get('industry', 'UNKNOWN')} | |
| Employees: {data.get('employee_count', 'UNKNOWN')} | |
| Tech stack: {', '.join(data.get('tech_stack', [])) or 'NONE'} | |
| AI job postings: {data.get('ai_job_count', 0)} | |
| Pain signals: {', '.join(data.get('pain_signals', [])) or 'NONE'} | |
| Service match: {data.get('service_match') or 'NONE'} | |
| AI readiness (from profile): {profile.get('ai_readiness', 'UNKNOWN')} | |
| Has verified email: {has_verified_email} | |
| Has personal LinkedIn: {has_linkedin} | |
| Has social profiles: {has_social} | |
| Growth signals count: {len(data.get('growth_signals', []))} | |
| Output JSON: | |
| {{ | |
| "company_fit_signals": {{ | |
| "industry_match": true, | |
| "size_appropriate": true, | |
| "evidence": "why" | |
| }}, | |
| "ai_readiness_signals": {{ | |
| "level": "none|low|medium|high", | |
| "tech_stack_relevant": false, | |
| "ai_jobs_present": false, | |
| "evidence": "why" | |
| }}, | |
| "service_match_signals": {{ | |
| "matched": true, | |
| "service_name": "which service", | |
| "pain_count": 0, | |
| "evidence": "which pain signals" | |
| }}, | |
| "contact_quality_signals": {{ | |
| "email_verified": {str(has_verified_email).lower()}, | |
| "linkedin_found": {str(has_linkedin).lower()}, | |
| "decision_maker_identified": true | |
| }}, | |
| "timing_signals": {{ | |
| "actively_growing": false, | |
| "recently_active": true, | |
| "evidence": "what suggests timing" | |
| }}, | |
| "confidence": 0.0 | |
| }}""" | |
| # βββ Main scoring function βββββββββββββββββββββββββββββββββββ | |
| async def compute_score( | |
| company_data: dict, | |
| profile: dict, | |
| contacts: list, | |
| trace_id: str = "" | |
| ) -> dict: | |
| """ | |
| Step 1: LLM extracts signals (qualitative) | |
| Step 2: Code computes score (deterministic, reproducible) | |
| """ | |
| # ββ Step 1: Signal extraction via LLM βββββββββββββββββββββ | |
| signals = await _extract_signals(company_data, profile, contacts, trace_id) | |
| # ββ Step 2: Deterministic scoring βββββββββββββββββββββββββ | |
| score = _compute_deterministic_score(signals, company_data, profile, contacts) | |
| return score | |
| async def _extract_signals(data, profile, contacts, trace_id) -> dict: | |
| """Ask LLM to identify signals β NOT to score.""" | |
| try: | |
| prompt = build_signal_prompt(data, profile, contacts) | |
| result = await call_llm( | |
| operation="score", | |
| system_prompt=SYSTEM_PROMPT, | |
| user_prompt=prompt, | |
| model_index=2, # 8B model β signal extraction is simple | |
| temperature=0.1, | |
| max_tokens=400, | |
| json_mode=True, | |
| trace_id=trace_id, | |
| company_id=data.get("id"), | |
| ) | |
| if result.get("parsed"): | |
| return result["parsed"] | |
| except Exception as e: | |
| logger.warning(f"Signal extraction failed: {e}") | |
| # Fallback: extract signals from raw data | |
| return _extract_signals_deterministic(data, profile, contacts) | |
| def _extract_signals_deterministic(data, profile, contacts) -> dict: | |
| """Rule-based signal extraction when LLM fails.""" | |
| has_email = any(c.get("email_verified") for c in contacts) | |
| has_linkedin = any(c.get("linkedin_personal_url") for c in contacts) | |
| return { | |
| "company_fit_signals": { | |
| "industry_match": bool(data.get("industry")), | |
| "size_appropriate": (data.get("employee_count") or 0) >= 3, | |
| "evidence": "deterministic", | |
| }, | |
| "ai_readiness_signals": { | |
| "level": profile.get("ai_readiness", "low"), | |
| "tech_stack_relevant": len(data.get("tech_stack", [])) > 0, | |
| "ai_jobs_present": data.get("ai_job_count", 0) > 0, | |
| "evidence": "deterministic", | |
| }, | |
| "service_match_signals": { | |
| "matched": bool(data.get("service_match")), | |
| "service_name": data.get("service_match", "NONE"), | |
| "pain_count": len(data.get("pain_signals", [])), | |
| "evidence": "deterministic", | |
| }, | |
| "contact_quality_signals": { | |
| "email_verified": has_email, | |
| "linkedin_found": has_linkedin, | |
| "decision_maker_identified": len(contacts) > 0, | |
| }, | |
| "timing_signals": { | |
| "actively_growing": data.get("ai_job_count", 0) > 0, | |
| "recently_active": True, | |
| "evidence": "deterministic", | |
| }, | |
| "confidence": 0.5, | |
| } | |
| # βββ Deterministic score computation βββββββββββββββββββββββββ | |
| # This is where the ACTUAL score is calculated. | |
| # No LLM involved β pure math from signals. | |
| def _compute_deterministic_score(signals: dict, data: dict, profile: dict, contacts: list) -> dict: | |
| """ | |
| Weights: | |
| company_fit: 25 pts | |
| ai_readiness: 20 pts | |
| service_match: 20 pts (NEW β replaces old AI readiness weight) | |
| decision_maker: 20 pts | |
| timing: 15 pts | |
| """ | |
| # ββ Company Fit (25 pts) ββββββββββββββββββββββββββββββββββ | |
| fit = signals.get("company_fit_signals", {}) | |
| company_fit = 0 | |
| if fit.get("industry_match"): company_fit += 10 | |
| if fit.get("size_appropriate"): company_fit += 10 | |
| emp = data.get("employee_count") or 0 | |
| if emp >= 200: company_fit += 5 | |
| elif emp >= 50: company_fit += 3 | |
| elif emp >= 10: company_fit += 1 | |
| # ββ AI Readiness (20 pts) βββββββββββββββββββββββββββββββββ | |
| ai_sig = signals.get("ai_readiness_signals", {}) | |
| ai_readiness = 0 | |
| level = ai_sig.get("level", "low") | |
| if level == "high": ai_readiness += 12 | |
| elif level == "medium": ai_readiness += 8 | |
| elif level == "low": ai_readiness += 3 | |
| if ai_sig.get("tech_stack_relevant"): ai_readiness += 4 | |
| if ai_sig.get("ai_jobs_present"): ai_readiness += 4 | |
| ai_readiness = min(20, ai_readiness) | |
| # ββ Service Match (20 pts) β KEY DIFFERENTIATOR βββββββββββ | |
| svc = signals.get("service_match_signals", {}) | |
| service_match = 0 | |
| if svc.get("matched"): | |
| service_match += 10 | |
| pain_count = svc.get("pain_count", 0) | |
| service_match += min(10, pain_count * 3) # up to 10 pts for pain signals | |
| service_match = min(20, service_match) | |
| # ββ Decision Maker Access (20 pts) ββββββββββββββββββββββββ | |
| contact = signals.get("contact_quality_signals", {}) | |
| dm = 0 | |
| if contact.get("email_verified"): dm += 12 | |
| elif any(c.get("email") for c in contacts): dm += 6 | |
| if contact.get("linkedin_found"): dm += 5 | |
| if contact.get("decision_maker_identified"): dm += 3 | |
| dm = min(20, dm) | |
| # ββ Timing (15 pts) βββββββββββββββββββββββββββββββββββββββ | |
| timing = signals.get("timing_signals", {}) | |
| timing_score = 5 # base: company exists and has website | |
| if timing.get("actively_growing"): timing_score += 5 | |
| if timing.get("recently_active"): timing_score += 3 | |
| if len(data.get("growth_signals", [])) >= 2: timing_score += 2 | |
| timing_score = min(15, timing_score) | |
| # ββ Total βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| total = company_fit + ai_readiness + service_match + dm + timing_score | |
| tier = _score_to_tier(total) | |
| return { | |
| "company_fit": company_fit, | |
| "ai_readiness_score": ai_readiness, | |
| "service_match_score": service_match, | |
| "decision_maker_access": dm, | |
| "timing_score": timing_score, | |
| "total_score": total, | |
| "tier": tier, | |
| "score_breakdown": { | |
| "company_fit": f"{company_fit}/25", | |
| "ai_readiness": f"{ai_readiness}/20", | |
| "service_match": f"{service_match}/20", | |
| "decision_maker": f"{dm}/20", | |
| "timing": f"{timing_score}/15", | |
| }, | |
| "score_reasoning": f"Deterministic score from {len(signals)} signal groups", | |
| "llm_model": "deterministic_scorer", | |
| "is_fallback": False, | |
| } | |
| def _score_to_tier(score: int) -> str: | |
| if score >= 85: return "hot" | |
| if score >= 70: return "warm" | |
| if score >= 50: return "nurture" | |
| return "archive" | |