/** * Apify Client Wrapper * Simplified interface for running Apify actors and handling results */ import { ApifyClient } from 'apify-client'; const apifyClient = new ApifyClient({ token: process.env.APIFY_API_KEY, }); export interface ScrapingResult { success: boolean; content?: string; title?: string; description?: string; images?: string[]; metadata?: Record; error?: string; } /** * Run Apify actor and get results */ export async function runApifyActor( actorId: string, input: Record ): Promise { try { if (!process.env.APIFY_API_KEY) { return { success: false, error: 'Apify API key not configured', }; } // Start actor run const run = await apifyClient.actor(actorId).call(input); // Get dataset results const dataset = await apifyClient.dataset(run.defaultDatasetId).listItems(); if (!dataset.items || dataset.items.length === 0) { return { success: false, error: 'No data returned from actor', }; } // Parse results based on actor type const item = dataset.items[0] as Record; return { success: true, content: extractText(item), title: (item.title as string) || (item.name as string) || undefined, description: (item.description as string) || undefined, images: extractImages(item), metadata: item, }; } catch (error) { console.error('[apify] actor failed:', error); return { success: false, error: error instanceof Error ? error.message : 'Actor execution failed', }; } } /** * Extract text content from actor result */ function extractText(item: Record): string { // Try common text fields if (typeof item.text === 'string') return item.text; if (typeof item.content === 'string') return item.content; if (typeof item.description === 'string') return item.description; if (typeof item.caption === 'string') return item.caption; // Try combining multiple fields const parts: string[] = []; if (typeof item.title === 'string') parts.push(item.title); if (typeof item.subtitle === 'string') parts.push(item.subtitle); if (typeof item.body === 'string') parts.push(item.body); return parts.join('\n\n'); } /** * Extract image URLs from actor result */ function extractImages(item: Record): string[] { const images: string[] = []; // Check common image fields const imageFields = ['image', 'images', 'src', 'imageUrl', 'photo', 'photos']; for (const field of imageFields) { const value = item[field]; if (typeof value === 'string') { images.push(value); } else if (Array.isArray(value)) { for (const img of value) { if (typeof img === 'string') { images.push(img); } else if (typeof img === 'object' && img !== null) { const url = (img as Record).url; if (typeof url === 'string') { images.push(url); } } } } } return Array.from(new Set(images)); // Remove duplicates } /** * Popular actor IDs (ready-to-use) */ export const ACTOR_IDS = { instagram: 'apify/instagram-scraper', twitter: 'apidojo/tweet-scraper', tiktok: 'clockworks/tiktok-scraper', threads: 'apify/instagram-scraper', // Threads can be scraped via Instagram's API web: 'apify/website-content-crawler', };