-- ============================================================ -- Migration 002 — Phase 1 Enhancements -- Territory management, service profiles, social profiles, -- discovery run tracking, pipeline checkpoints, LLM traces -- ============================================================ -- ─── SERVICE PROFILES ──────────────────────────────────────── -- What services WE offer → what industries → what pain signals to look for CREATE TABLE service_profiles ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), service_name TEXT NOT NULL UNIQUE, -- 'AI Receptionist' description TEXT, target_industries TEXT[] NOT NULL, -- ['dental','medical','legal','salon'] min_employees INTEGER DEFAULT 3, max_employees INTEGER DEFAULT 500, pain_signals TEXT[] NOT NULL, -- website signals to detect score_boost INTEGER NOT NULL DEFAULT 15, -- points added when matched outreach_keywords TEXT[], -- words to use in outreach is_active BOOLEAN DEFAULT true, created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() ); -- Seed service profiles INSERT INTO service_profiles (service_name, description, target_industries, min_employees, pain_signals, score_boost, outreach_keywords) VALUES ('AI Receptionist', 'Automated phone answering, appointment booking, 24/7 availability', ARRAY['dental','medical','veterinary','legal','salon','spa','real_estate','accounting','chiropractic'], 3, ARRAY['phone number prominent','book appointment','call us','receptionist','front desk','office hours','schedule a visit'], 20, ARRAY['missed calls','after hours','appointment booking','front desk costs']), ('AI Customer Support', 'Chatbot, ticket automation, FAQ automation', ARRAY['ecommerce','saas','retail','hospitality','travel','insurance','telecom'], 10, ARRAY['contact form','support email','FAQ page','help center','no chatbot','submit a ticket'], 15, ARRAY['support costs','response time','ticket volume','customer satisfaction']), ('AI Data Processing', 'Document processing, report automation, ERP modernization', ARRAY['manufacturing','logistics','finance','healthcare','supply_chain','construction','energy'], 50, ARRAY['legacy ERP','SAP','manual reporting','spreadsheet','data entry','compliance reporting'], 25, ARRAY['manual processes','reporting overhead','data accuracy','compliance automation']), ('AI Sales Automation', 'Outreach automation, CRM enrichment, lead scoring', ARRAY['b2b_saas','consulting','recruitment','insurance','financial_services','marketing_agency'], 10, ARRAY['sales team','CRM','outbound','SDR','BDR','sales development','pipeline'], 20, ARRAY['pipeline velocity','lead qualification','outbound efficiency','sales productivity']), ('AI Workflow Automation', 'General process automation, integration, workflow optimization', ARRAY['technology','professional_services','education','media','nonprofit_large','government_contractor'], 20, ARRAY['manual process','approval workflow','internal tools','legacy system','multiple platforms'], 15, ARRAY['operational efficiency','process bottlenecks','tool consolidation','workflow speed']); -- ─── TERRITORY GRID ────────────────────────────────────────── -- Every city × industry = one territory unit CREATE TABLE territory_grid ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), country TEXT NOT NULL, country_code TEXT NOT NULL, city TEXT NOT NULL, tier INTEGER NOT NULL DEFAULT 1, -- 1=major city, 2=mid, 3=small timezone TEXT, -- 'America/New_York' is_active BOOLEAN DEFAULT true, UNIQUE(country_code, city) ); -- Seed US Tier 1 cities INSERT INTO territory_grid (country, country_code, city, tier, timezone) VALUES ('United States', 'US', 'New York', 1, 'America/New_York'), ('United States', 'US', 'Los Angeles', 1, 'America/Los_Angeles'), ('United States', 'US', 'Chicago', 1, 'America/Chicago'), ('United States', 'US', 'Houston', 1, 'America/Chicago'), ('United States', 'US', 'Phoenix', 1, 'America/Phoenix'), ('United States', 'US', 'Philadelphia', 1, 'America/New_York'), ('United States', 'US', 'San Antonio', 1, 'America/Chicago'), ('United States', 'US', 'San Diego', 1, 'America/Los_Angeles'), ('United States', 'US', 'Dallas', 1, 'America/Chicago'), ('United States', 'US', 'Austin', 1, 'America/Chicago'), ('United States', 'US', 'San Francisco', 1, 'America/Los_Angeles'), ('United States', 'US', 'Seattle', 1, 'America/Los_Angeles'), ('United States', 'US', 'Denver', 1, 'America/Denver'), ('United States', 'US', 'Boston', 1, 'America/New_York'), ('United States', 'US', 'Miami', 1, 'America/New_York'); -- UK cities INSERT INTO territory_grid (country, country_code, city, tier, timezone) VALUES ('United Kingdom', 'GB', 'London', 1, 'Europe/London'), ('United Kingdom', 'GB', 'Manchester', 1, 'Europe/London'), ('United Kingdom', 'GB', 'Birmingham', 1, 'Europe/London'), ('United Kingdom', 'GB', 'Leeds', 2, 'Europe/London'), ('United Kingdom', 'GB', 'Edinburgh', 2, 'Europe/London'), ('United Kingdom', 'GB', 'Bristol', 2, 'Europe/London'), ('United Kingdom', 'GB', 'Glasgow', 2, 'Europe/London'); -- Australia cities INSERT INTO territory_grid (country, country_code, city, tier, timezone) VALUES ('Australia', 'AU', 'Sydney', 1, 'Australia/Sydney'), ('Australia', 'AU', 'Melbourne', 1, 'Australia/Melbourne'), ('Australia', 'AU', 'Brisbane', 1, 'Australia/Brisbane'), ('Australia', 'AU', 'Perth', 2, 'Australia/Perth'), ('Australia', 'AU', 'Adelaide', 2, 'Australia/Adelaide'); -- Gulf cities INSERT INTO territory_grid (country, country_code, city, tier, timezone) VALUES ('United Arab Emirates', 'AE', 'Dubai', 1, 'Asia/Dubai'), ('United Arab Emirates', 'AE', 'Abu Dhabi', 1, 'Asia/Dubai'), ('Saudi Arabia', 'SA', 'Riyadh', 1, 'Asia/Riyadh'), ('Saudi Arabia', 'SA', 'Jeddah', 2, 'Asia/Riyadh'), ('Qatar', 'QA', 'Doha', 1, 'Asia/Qatar'); -- ─── DISCOVERY RUNS ────────────────────────────────────────── -- Track every search execution — prevents duplicate searches CREATE TABLE discovery_runs ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), run_type TEXT NOT NULL, -- 'auto' | 'manual' territory_id UUID REFERENCES territory_grid(id), country_code TEXT NOT NULL, city TEXT NOT NULL, industry TEXT NOT NULL, search_queries TEXT[], -- actual Google queries used companies_found INTEGER DEFAULT 0, companies_passed_gate1 INTEGER DEFAULT 0, companies_passed_gate2 INTEGER DEFAULT 0, leads_qualified INTEGER DEFAULT 0, quota_target INTEGER DEFAULT 10, status TEXT DEFAULT 'running', -- 'running','completed','failed','partial' triggered_by TEXT DEFAULT 'system', -- 'system' | 'slack:username' ran_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), completed_at TIMESTAMPTZ ); CREATE INDEX idx_discovery_runs_territory ON discovery_runs(city, industry, ran_at DESC); -- ─── TERRITORY PROGRESS ───────────────────────────────────── -- Tracks which city+industry combos have been covered and when CREATE TABLE territory_progress ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), territory_id UUID REFERENCES territory_grid(id), industry TEXT NOT NULL, last_run_at TIMESTAMPTZ NOT NULL, next_eligible_at TIMESTAMPTZ NOT NULL, -- last_run + 30 days total_leads INTEGER DEFAULT 0, UNIQUE(territory_id, industry) ); -- ─── PIPELINE CHECKPOINTS ─────────────────────────────────── -- Allows pipeline to resume from failure point (idempotency) CREATE TABLE pipeline_checkpoints ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), run_id UUID REFERENCES discovery_runs(id), company_domain TEXT NOT NULL, stage TEXT NOT NULL, -- 'scraped','filtered','contacts_found','profiled','scored' stage_data JSONB DEFAULT '{}', -- intermediate data for resume completed BOOLEAN DEFAULT false, created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), UNIQUE(run_id, company_domain) ); -- ─── LLM CALL TRACES ──────────────────────────────────────── -- Every LLM call logged for cost tracking and debugging CREATE TABLE llm_traces ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), trace_id TEXT NOT NULL, -- pipeline run trace operation TEXT NOT NULL, -- 'profile','score','email_classify','pain_detect' model TEXT NOT NULL, -- 'meta/llama-3.3-70b-instruct' provider TEXT NOT NULL DEFAULT 'nvidia', prompt_tokens INTEGER, completion_tokens INTEGER, total_tokens INTEGER, latency_ms INTEGER, success BOOLEAN NOT NULL, fallback_used BOOLEAN DEFAULT false, grounding_score NUMERIC(3,2), -- 0.00-1.00 how well grounded company_id UUID REFERENCES companies(id), input_hash TEXT, -- hash of prompt (no PII stored) output_hash TEXT, -- hash of output created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() ); CREATE INDEX idx_llm_traces_trace ON llm_traces(trace_id); CREATE INDEX idx_llm_traces_company ON llm_traces(company_id); -- ─── SYSTEM CONFIG ─────────────────────────────────────────── -- Runtime configuration that Slack commands can modify CREATE TABLE system_config ( key TEXT PRIMARY KEY, value JSONB NOT NULL, updated_by TEXT DEFAULT 'system', updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() ); INSERT INTO system_config (key, value) VALUES ('daily_quota', '{"default": 10, "today_override": null}'), ('schedule', '{"start_hour_utc": 4, "enabled": true}'), ('auto_mode', '{"enabled": true, "paused": false, "paused_by": null}'), ('current_territory', '{"country_code": "US", "city_index": 0, "industry_index": 0}'); -- ─── ADD SOCIAL PROFILES TO CONTACTS ───────────────────────── ALTER TABLE contacts ADD COLUMN IF NOT EXISTS linkedin_personal_url TEXT; ALTER TABLE contacts ADD COLUMN IF NOT EXISTS social_profiles JSONB DEFAULT '{}'; ALTER TABLE contacts ADD COLUMN IF NOT EXISTS email_verification_layers JSONB DEFAULT '{}'; ALTER TABLE contacts ADD COLUMN IF NOT EXISTS email_tier TEXT; -- 'personal','authority','context_verified','rejected' ALTER TABLE contacts ADD COLUMN IF NOT EXISTS authority_confirmed BOOLEAN DEFAULT false; -- ─── ADD CITY TO COMPANIES ─────────────────────────────────── ALTER TABLE companies ADD COLUMN IF NOT EXISTS city TEXT; ALTER TABLE companies ADD COLUMN IF NOT EXISTS service_match TEXT; -- matched service name ALTER TABLE companies ADD COLUMN IF NOT EXISTS service_match_score INTEGER DEFAULT 0; ALTER TABLE companies ADD COLUMN IF NOT EXISTS pain_signals TEXT[] DEFAULT '{}'; ALTER TABLE companies ADD COLUMN IF NOT EXISTS trace_id TEXT; -- pipeline trace