Spaces:
Running
Running
| import { distance } from "fastest-levenshtein"; | |
| import { getSupabaseClient } from "../../shared/supabase/client"; | |
| import { logger } from "../../shared/utils/logger"; | |
| /** | |
| * Checks if a company already exists in Supabase. | |
| * Uses exact domain match first, then fuzzy name match as fallback. | |
| * Returns the existing company ID if duplicate, null if new. | |
| */ | |
| export async function isDuplicate( | |
| domain: string, | |
| name: string | |
| ): Promise<{ isDupe: boolean; existingId?: string }> { | |
| const db = getSupabaseClient(); | |
| // ββ 1. Exact domain match (fastest) βββββββββββββββββββββββββ | |
| const { data: byDomain } = await db | |
| .from("companies") | |
| .select("id, domain, name") | |
| .eq("domain", normalizeDomain(domain)) | |
| .maybeSingle(); | |
| if (byDomain) { | |
| logger.debug({ domain, existingId: byDomain.id }, "Duplicate: exact domain match"); | |
| return { isDupe: true, existingId: byDomain.id }; | |
| } | |
| // ββ 2. Fuzzy name match against recent records ββββββββββββββββ | |
| const { data: recent } = await db | |
| .from("companies") | |
| .select("id, name") | |
| .order("discovered_at", { ascending: false }) | |
| .limit(500); | |
| if (!recent) return { isDupe: false }; | |
| const normalizedInput = normalizeName(name); | |
| for (const existing of recent) { | |
| const normalizedExisting = normalizeName(existing.name); | |
| const dist = distance(normalizedInput, normalizedExisting); | |
| const maxLen = Math.max(normalizedInput.length, normalizedExisting.length); | |
| const similarity = 1 - dist / maxLen; | |
| if (similarity >= 0.88) { | |
| logger.debug( | |
| { input: name, existing: existing.name, similarity: similarity.toFixed(2) }, | |
| "Duplicate: fuzzy name match" | |
| ); | |
| return { isDupe: true, existingId: existing.id }; | |
| } | |
| } | |
| return { isDupe: false }; | |
| } | |
| /** | |
| * Checks suppression list before any processing. | |
| */ | |
| export async function isSuppressed(domain: string): Promise<boolean> { | |
| const db = getSupabaseClient(); | |
| const { data } = await db | |
| .from("suppression_list") | |
| .select("id") | |
| .eq("domain", domain) | |
| .maybeSingle(); | |
| return !!data; | |
| } | |
| // βββ Helpers βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| function normalizeDomain(domain: string): string { | |
| return domain.toLowerCase().replace(/^www\./, "").replace(/\/$/, "").trim(); | |
| } | |
| function normalizeName(name: string): string { | |
| return name | |
| .toLowerCase() | |
| .replace(/\b(inc|ltd|llc|corp|co|limited|plc|gmbh|pty|pvt|srl|bv|ag|sa)\b\.?/gi, "") | |
| .replace(/[^a-z0-9\s]/g, "") | |
| .trim(); | |
| } | |