Spaces:
Running
Running
| /** | |
| * Apify Client Wrapper | |
| * Simplified interface for running Apify actors and handling results | |
| */ | |
| import { ApifyClient } from 'apify-client'; | |
| const apifyClient = new ApifyClient({ | |
| token: process.env.APIFY_API_KEY, | |
| }); | |
| export interface ScrapingResult { | |
| success: boolean; | |
| content?: string; | |
| title?: string; | |
| description?: string; | |
| images?: string[]; | |
| metadata?: Record<string, unknown>; | |
| error?: string; | |
| } | |
| /** | |
| * Run Apify actor and get results | |
| */ | |
| export async function runApifyActor( | |
| actorId: string, | |
| input: Record<string, unknown> | |
| ): Promise<ScrapingResult> { | |
| try { | |
| if (!process.env.APIFY_API_KEY) { | |
| return { | |
| success: false, | |
| error: 'Apify API key not configured', | |
| }; | |
| } | |
| // Start actor run | |
| const run = await apifyClient.actor(actorId).call(input); | |
| // Get dataset results | |
| const dataset = await apifyClient.dataset(run.defaultDatasetId).listItems(); | |
| if (!dataset.items || dataset.items.length === 0) { | |
| return { | |
| success: false, | |
| error: 'No data returned from actor', | |
| }; | |
| } | |
| // Parse results based on actor type | |
| const item = dataset.items[0] as Record<string, unknown>; | |
| return { | |
| success: true, | |
| content: extractText(item), | |
| title: (item.title as string) || (item.name as string) || undefined, | |
| description: (item.description as string) || undefined, | |
| images: extractImages(item), | |
| metadata: item, | |
| }; | |
| } catch (error) { | |
| console.error('[apify] actor failed:', error); | |
| return { | |
| success: false, | |
| error: error instanceof Error ? error.message : 'Actor execution failed', | |
| }; | |
| } | |
| } | |
| /** | |
| * Extract text content from actor result | |
| */ | |
| function extractText(item: Record<string, unknown>): string { | |
| // Try common text fields | |
| if (typeof item.text === 'string') return item.text; | |
| if (typeof item.content === 'string') return item.content; | |
| if (typeof item.description === 'string') return item.description; | |
| if (typeof item.caption === 'string') return item.caption; | |
| // Try combining multiple fields | |
| const parts: string[] = []; | |
| if (typeof item.title === 'string') parts.push(item.title); | |
| if (typeof item.subtitle === 'string') parts.push(item.subtitle); | |
| if (typeof item.body === 'string') parts.push(item.body); | |
| return parts.join('\n\n'); | |
| } | |
| /** | |
| * Extract image URLs from actor result | |
| */ | |
| function extractImages(item: Record<string, unknown>): string[] { | |
| const images: string[] = []; | |
| // Check common image fields | |
| const imageFields = ['image', 'images', 'src', 'imageUrl', 'photo', 'photos']; | |
| for (const field of imageFields) { | |
| const value = item[field]; | |
| if (typeof value === 'string') { | |
| images.push(value); | |
| } else if (Array.isArray(value)) { | |
| for (const img of value) { | |
| if (typeof img === 'string') { | |
| images.push(img); | |
| } else if (typeof img === 'object' && img !== null) { | |
| const url = (img as Record<string, unknown>).url; | |
| if (typeof url === 'string') { | |
| images.push(url); | |
| } | |
| } | |
| } | |
| } | |
| } | |
| return Array.from(new Set(images)); // Remove duplicates | |
| } | |
| /** | |
| * Popular actor IDs (ready-to-use) | |
| */ | |
| export const ACTOR_IDS = { | |
| instagram: 'apify/instagram-scraper', | |
| twitter: 'apidojo/tweet-scraper', | |
| tiktok: 'clockworks/tiktok-scraper', | |
| threads: 'apify/instagram-scraper', // Threads can be scraped via Instagram's API | |
| web: 'apify/website-content-crawler', | |
| }; | |