File size: 5,086 Bytes
bd28470
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5f138d4
 
 
 
 
 
 
 
 
 
 
bd28470
 
 
5f138d4
bd28470
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import { task } from "@trigger.dev/sdk/v3";
import { z } from "zod";
import { getSupabaseClient } from "../../shared/supabase/client";
import { logger } from "../../shared/utils/logger";
import { loadIcpConfig, applyHardFilters, applySignalFilters } from "../lib/icp-filter";
import { isDuplicate, isSuppressed } from "../lib/deduplicator";
import { scrapeCompanyWebsite } from "../lib/web-scraper";
import { scrapeLinkedInCompany } from "../lib/linkedin-scraper";
import { normalizeCompany } from "../lib/normalizer";
import { enrichContacts } from "../lib/contact-enricher";
import { searchCompanies } from "../providers/serper";
import { getRegionConfig } from "../lib/rotation";

// ─── Input schema ─────────────────────────────────────────────

const ManualDiscoveryInput = z.object({
  region: z.enum(["US", "UK", "AU", "UAE", "SA", "SG"]),
  industry: z.string().optional(),
  customKeywords: z.array(z.string()).optional(),
  maxCompanies: z.number().min(1).max(50).default(20),
  triggeredBy: z.string().default("manual"), // slack username or "api"
});

export type ManualDiscoveryInput = z.infer<typeof ManualDiscoveryInput>;

// ─── Manual Discovery Task ────────────────────────────────────

export const manualDiscoveryTask = task({
  id: "manual-discovery",
  maxDuration: 1800, // 30 min max

  run: async (payload: ManualDiscoveryInput) => {
    const input = ManualDiscoveryInput.parse(payload);
    logger.info({ input }, "🎯 Manual discovery started");

    const icp = await loadIcpConfig();
    const regionConfig = getRegionConfig(input.region);

    const keywords = input.customKeywords?.length
      ? input.customKeywords
      : icp.keywords;

    const industries = input.industry
      ? [input.industry]
      : regionConfig.industries.slice(0, 3); // limit to 3 for manual runs

    let totalDiscovered = 0;
    let totalQualified = 0;

    for (const industry of industries) {
      const results = await searchCompanies(input.region, industry, keywords);
      const capped = results.slice(0, input.maxCompanies);

      for (const result of capped) {
        const status = await processManualCompany(result, input.region, icp, industry);
        if (status !== "skip") totalDiscovered++;
        if (status === "qualified") totalQualified++;
      }
    }

    // ── Audit log ─────────────────────────────────────────────
    const db = getSupabaseClient();
    await db.from("audit_log").insert({
      action: "manual_discovery_completed",
      entity_type: "discovery_run",
      entity_id: null,
      actor: input.triggeredBy,
      details: {
        region: input.region,
        industry: input.industry ?? "all",
        totalDiscovered,
        totalQualified,
      },
    });

    logger.info({ totalDiscovered, totalQualified }, "βœ… Manual discovery completed");
    return { region: input.region, totalDiscovered, totalQualified };
  },
});

// ─── Processing pipeline (same logic as auto, extracted) ─────

async function processManualCompany(
  result: { domain: string; title: string; link: string; snippet: string },
  region: string,
  icp: Awaited<ReturnType<typeof loadIcpConfig>>,
  industry: string
): Promise<"skip" | "new" | "qualified"> {
  const { domain } = result;
  const db = getSupabaseClient();

  if (await isSuppressed(domain)) return "skip";
  const { isDupe } = await isDuplicate(domain, result.title);
  if (isDupe) return "skip";

  const website = await scrapeCompanyWebsite(domain);
  const gate1 = applyHardFilters(website, icp, region);
  if (!gate1.passed) return "skip";

  const gate2 = applySignalFilters(website, icp);

  let linkedin = null;
  if (website.linkedinUrl) {
    linkedin = await scrapeLinkedInCompany(website.linkedinUrl).catch(() => null);
  }

  const normalized = normalizeCompany(result as any, website, linkedin, region, "manual");
  const { data: saved, error } = await db
    .from("companies")
    .insert({ ...normalized, industry })
    .select("id")
    .single();

  if (error || !saved) return "skip";

  if (!gate2.passed) {
    await db.from("companies").update({ status: "nurture" }).eq("id", saved.id);
    return "new";
  }

  const contactsSaved = await enrichContacts(
    saved.id,
    domain,
    normalized.name,
    normalized.employee_count,
    industry,
    website.text.slice(0, 300),
    website.html,
    normalized.linkedin_url,
    "manual-" + saved.id
  );

  await db.from("companies").update({ status: "profiled" }).eq("id", saved.id);

  if (contactsSaved.length > 0) {
    const { profilingTask } = await import("../../profiling/trigger-tasks/profiling-router");
    await profilingTask.trigger({
      company_id: saved.id,
      domain,
      name: normalized.name,
      region,
      source: "manual",
    });
    return "qualified";
  }

  return "new";
}