Spaces:
Running
Running
| import axios from "axios"; | |
| import { getEnv } from "../../shared/config/env"; | |
| import { withRetry, isCircuitOpen, recordFailure, recordSuccess } from "../../shared/utils/retry"; | |
| import { serperLimiter } from "../../shared/utils/rate-limiter"; | |
| import { logger } from "../../shared/utils/logger"; | |
| const PROVIDER = "serper"; | |
| export interface SerperResult { | |
| title: string; | |
| link: string; | |
| snippet: string; | |
| domain: string; | |
| } | |
| /** | |
| * Searches Google via Serper.dev API. | |
| * Builds targeted queries to find companies matching ICP in a given region. | |
| */ | |
| export async function searchCompanies( | |
| region: string, | |
| industry: string, | |
| keywords: string[], | |
| page = 1 | |
| ): Promise<SerperResult[]> { | |
| if (isCircuitOpen(PROVIDER)) { | |
| logger.warn({ provider: PROVIDER }, "Circuit open — skipping Serper call"); | |
| return []; | |
| } | |
| await serperLimiter.consume(PROVIDER); | |
| const queries = buildQueries(region, industry, keywords); | |
| const results: SerperResult[] = []; | |
| for (const query of queries) { | |
| try { | |
| const data = await withRetry( | |
| () => callSerper(query, page), | |
| { provider: PROVIDER } | |
| ); | |
| results.push(...data); | |
| recordSuccess(PROVIDER); | |
| } catch (err) { | |
| recordFailure(PROVIDER); | |
| logger.error({ query, err }, "Serper search failed"); | |
| } | |
| } | |
| // Deduplicate by domain | |
| const seen = new Set<string>(); | |
| return results.filter((r) => { | |
| if (seen.has(r.domain)) return false; | |
| seen.add(r.domain); | |
| return true; | |
| }); | |
| } | |
| async function callSerper(query: string, page: number): Promise<SerperResult[]> { | |
| const env = getEnv(); | |
| const response = await axios.post( | |
| "https://google.serper.dev/search", | |
| { q: query, num: 10, page }, | |
| { | |
| headers: { | |
| "X-API-KEY": env.SERPER_API_KEY, | |
| "Content-Type": "application/json", | |
| }, | |
| timeout: 10_000, | |
| } | |
| ); | |
| const organic = response.data?.organic ?? []; | |
| return organic.map((item: { title: string; link: string; snippet: string }) => ({ | |
| title: item.title, | |
| link: item.link, | |
| snippet: item.snippet, | |
| domain: extractDomain(item.link), | |
| })); | |
| } | |
| function buildQueries(region: string, industry: string, keywords: string[]): string[] { | |
| // Precision queries — each targets a specific pain+industry+region combo | |
| const regionLabel = REGION_LABELS[region] ?? region; | |
| return [ | |
| `"${industry}" company "${regionLabel}" "50 employees" OR "100 employees" OR "200 employees" automation`, | |
| `${industry} business ${regionLabel} site:linkedin.com/company`, | |
| `"${industry}" "${regionLabel}" "digital transformation" OR "AI" OR "automation" company`, | |
| `${keywords[0]} ${keywords[1] ?? ""} company ${regionLabel} -job -careers`, | |
| ].filter(Boolean); | |
| } | |
| function extractDomain(url: string): string { | |
| try { | |
| return new URL(url).hostname.replace(/^www\./, ""); | |
| } catch { | |
| return url; | |
| } | |
| } | |
| const REGION_LABELS: Record<string, string> = { | |
| US: "United States", | |
| UK: "United Kingdom", | |
| AU: "Australia", | |
| UAE: "Dubai", | |
| SA: "Saudi Arabia", | |
| SG: "Singapore", | |
| }; | |