carouselforge / src /lib /scraper /apify.ts
CarouselForge Developer
fix: resolve TypeScript and test configuration issues for Phase 13
9a43362
/**
* Apify Client Wrapper
* Simplified interface for running Apify actors and handling results
*/
import { ApifyClient } from 'apify-client';
const apifyClient = new ApifyClient({
token: process.env.APIFY_API_KEY,
});
export interface ScrapingResult {
success: boolean;
content?: string;
title?: string;
description?: string;
images?: string[];
metadata?: Record<string, unknown>;
error?: string;
}
/**
* Run Apify actor and get results
*/
export async function runApifyActor(
actorId: string,
input: Record<string, unknown>
): Promise<ScrapingResult> {
try {
if (!process.env.APIFY_API_KEY) {
return {
success: false,
error: 'Apify API key not configured',
};
}
// Start actor run
const run = await apifyClient.actor(actorId).call(input);
// Get dataset results
const dataset = await apifyClient.dataset(run.defaultDatasetId).listItems();
if (!dataset.items || dataset.items.length === 0) {
return {
success: false,
error: 'No data returned from actor',
};
}
// Parse results based on actor type
const item = dataset.items[0] as Record<string, unknown>;
return {
success: true,
content: extractText(item),
title: (item.title as string) || (item.name as string) || undefined,
description: (item.description as string) || undefined,
images: extractImages(item),
metadata: item,
};
} catch (error) {
console.error('[apify] actor failed:', error);
return {
success: false,
error: error instanceof Error ? error.message : 'Actor execution failed',
};
}
}
/**
* Extract text content from actor result
*/
function extractText(item: Record<string, unknown>): string {
// Try common text fields
if (typeof item.text === 'string') return item.text;
if (typeof item.content === 'string') return item.content;
if (typeof item.description === 'string') return item.description;
if (typeof item.caption === 'string') return item.caption;
// Try combining multiple fields
const parts: string[] = [];
if (typeof item.title === 'string') parts.push(item.title);
if (typeof item.subtitle === 'string') parts.push(item.subtitle);
if (typeof item.body === 'string') parts.push(item.body);
return parts.join('\n\n');
}
/**
* Extract image URLs from actor result
*/
function extractImages(item: Record<string, unknown>): string[] {
const images: string[] = [];
// Check common image fields
const imageFields = ['image', 'images', 'src', 'imageUrl', 'photo', 'photos'];
for (const field of imageFields) {
const value = item[field];
if (typeof value === 'string') {
images.push(value);
} else if (Array.isArray(value)) {
for (const img of value) {
if (typeof img === 'string') {
images.push(img);
} else if (typeof img === 'object' && img !== null) {
const url = (img as Record<string, unknown>).url;
if (typeof url === 'string') {
images.push(url);
}
}
}
}
}
return Array.from(new Set(images)); // Remove duplicates
}
/**
* Popular actor IDs (ready-to-use)
*/
export const ACTOR_IDS = {
instagram: 'apify/instagram-scraper',
twitter: 'apidojo/tweet-scraper',
tiktok: 'clockworks/tiktok-scraper',
threads: 'apify/instagram-scraper', // Threads can be scraped via Instagram's API
web: 'apify/website-content-crawler',
};