import { distance } from "fastest-levenshtein"; import { getSupabaseClient } from "../../shared/supabase/client"; import { logger } from "../../shared/utils/logger"; /** * Checks if a company already exists in Supabase. * Uses exact domain match first, then fuzzy name match as fallback. * Returns the existing company ID if duplicate, null if new. */ export async function isDuplicate( domain: string, name: string ): Promise<{ isDupe: boolean; existingId?: string }> { const db = getSupabaseClient(); // ── 1. Exact domain match (fastest) ───────────────────────── const { data: byDomain } = await db .from("companies") .select("id, domain, name") .eq("domain", normalizeDomain(domain)) .maybeSingle(); if (byDomain) { logger.debug({ domain, existingId: byDomain.id }, "Duplicate: exact domain match"); return { isDupe: true, existingId: byDomain.id }; } // ── 2. Fuzzy name match against recent records ──────────────── const { data: recent } = await db .from("companies") .select("id, name") .order("discovered_at", { ascending: false }) .limit(500); if (!recent) return { isDupe: false }; const normalizedInput = normalizeName(name); for (const existing of recent) { const normalizedExisting = normalizeName(existing.name); const dist = distance(normalizedInput, normalizedExisting); const maxLen = Math.max(normalizedInput.length, normalizedExisting.length); const similarity = 1 - dist / maxLen; if (similarity >= 0.88) { logger.debug( { input: name, existing: existing.name, similarity: similarity.toFixed(2) }, "Duplicate: fuzzy name match" ); return { isDupe: true, existingId: existing.id }; } } return { isDupe: false }; } /** * Checks suppression list before any processing. */ export async function isSuppressed(domain: string): Promise { const db = getSupabaseClient(); const { data } = await db .from("suppression_list") .select("id") .eq("domain", domain) .maybeSingle(); return !!data; } // ─── Helpers ───────────────────────────────────────────────── function normalizeDomain(domain: string): string { return domain.toLowerCase().replace(/^www\./, "").replace(/\/$/, "").trim(); } function normalizeName(name: string): string { return name .toLowerCase() .replace(/\b(inc|ltd|llc|corp|co|limited|plc|gmbh|pty|pvt|srl|bv|ag|sa)\b\.?/gi, "") .replace(/[^a-z0-9\s]/g, "") .trim(); }