Spaces:
Running
Running
File size: 5,086 Bytes
bd28470 5f138d4 bd28470 5f138d4 bd28470 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 | import { task } from "@trigger.dev/sdk/v3";
import { z } from "zod";
import { getSupabaseClient } from "../../shared/supabase/client";
import { logger } from "../../shared/utils/logger";
import { loadIcpConfig, applyHardFilters, applySignalFilters } from "../lib/icp-filter";
import { isDuplicate, isSuppressed } from "../lib/deduplicator";
import { scrapeCompanyWebsite } from "../lib/web-scraper";
import { scrapeLinkedInCompany } from "../lib/linkedin-scraper";
import { normalizeCompany } from "../lib/normalizer";
import { enrichContacts } from "../lib/contact-enricher";
import { searchCompanies } from "../providers/serper";
import { getRegionConfig } from "../lib/rotation";
// βββ Input schema βββββββββββββββββββββββββββββββββββββββββββββ
const ManualDiscoveryInput = z.object({
region: z.enum(["US", "UK", "AU", "UAE", "SA", "SG"]),
industry: z.string().optional(),
customKeywords: z.array(z.string()).optional(),
maxCompanies: z.number().min(1).max(50).default(20),
triggeredBy: z.string().default("manual"), // slack username or "api"
});
export type ManualDiscoveryInput = z.infer<typeof ManualDiscoveryInput>;
// βββ Manual Discovery Task ββββββββββββββββββββββββββββββββββββ
export const manualDiscoveryTask = task({
id: "manual-discovery",
maxDuration: 1800, // 30 min max
run: async (payload: ManualDiscoveryInput) => {
const input = ManualDiscoveryInput.parse(payload);
logger.info({ input }, "π― Manual discovery started");
const icp = await loadIcpConfig();
const regionConfig = getRegionConfig(input.region);
const keywords = input.customKeywords?.length
? input.customKeywords
: icp.keywords;
const industries = input.industry
? [input.industry]
: regionConfig.industries.slice(0, 3); // limit to 3 for manual runs
let totalDiscovered = 0;
let totalQualified = 0;
for (const industry of industries) {
const results = await searchCompanies(input.region, industry, keywords);
const capped = results.slice(0, input.maxCompanies);
for (const result of capped) {
const status = await processManualCompany(result, input.region, icp, industry);
if (status !== "skip") totalDiscovered++;
if (status === "qualified") totalQualified++;
}
}
// ββ Audit log βββββββββββββββββββββββββββββββββββββββββββββ
const db = getSupabaseClient();
await db.from("audit_log").insert({
action: "manual_discovery_completed",
entity_type: "discovery_run",
entity_id: null,
actor: input.triggeredBy,
details: {
region: input.region,
industry: input.industry ?? "all",
totalDiscovered,
totalQualified,
},
});
logger.info({ totalDiscovered, totalQualified }, "β
Manual discovery completed");
return { region: input.region, totalDiscovered, totalQualified };
},
});
// βββ Processing pipeline (same logic as auto, extracted) βββββ
async function processManualCompany(
result: { domain: string; title: string; link: string; snippet: string },
region: string,
icp: Awaited<ReturnType<typeof loadIcpConfig>>,
industry: string
): Promise<"skip" | "new" | "qualified"> {
const { domain } = result;
const db = getSupabaseClient();
if (await isSuppressed(domain)) return "skip";
const { isDupe } = await isDuplicate(domain, result.title);
if (isDupe) return "skip";
const website = await scrapeCompanyWebsite(domain);
const gate1 = applyHardFilters(website, icp, region);
if (!gate1.passed) return "skip";
const gate2 = applySignalFilters(website, icp);
let linkedin = null;
if (website.linkedinUrl) {
linkedin = await scrapeLinkedInCompany(website.linkedinUrl).catch(() => null);
}
const normalized = normalizeCompany(result as any, website, linkedin, region, "manual");
const { data: saved, error } = await db
.from("companies")
.insert({ ...normalized, industry })
.select("id")
.single();
if (error || !saved) return "skip";
if (!gate2.passed) {
await db.from("companies").update({ status: "nurture" }).eq("id", saved.id);
return "new";
}
const contactsSaved = await enrichContacts(
saved.id,
domain,
normalized.name,
normalized.employee_count,
industry,
website.text.slice(0, 300),
website.html,
normalized.linkedin_url,
"manual-" + saved.id
);
await db.from("companies").update({ status: "profiled" }).eq("id", saved.id);
if (contactsSaved.length > 0) {
const { profilingTask } = await import("../../profiling/trigger-tasks/profiling-router");
await profilingTask.trigger({
company_id: saved.id,
domain,
name: normalized.name,
region,
source: "manual",
});
return "qualified";
}
return "new";
}
|