Spaces:
Sleeping
Sleeping
| /** | |
| * Generic Web Scraper | |
| * Uses Apify Website Content Crawler for any web URL | |
| */ | |
| import { runApifyActor, ACTOR_IDS, type ScrapingResult } from './apify'; | |
| /** | |
| * Scrape generic web page by URL | |
| */ | |
| export async function scrapeWeb(url: string): Promise<ScrapingResult> { | |
| try { | |
| // Validate URL | |
| try { | |
| new URL(url); | |
| } catch { | |
| return { | |
| success: false, | |
| error: 'Invalid URL format', | |
| }; | |
| } | |
| const result = await runApifyActor(ACTOR_IDS.web, { | |
| startUrls: [{ url }], | |
| maxPagesPerCrawl: 1, | |
| maxRequestsPerCrawl: 1, | |
| }); | |
| return { | |
| ...result, | |
| title: result.title || 'Web Article', | |
| }; | |
| } catch (error) { | |
| console.error('[web scraper] failed:', error); | |
| return { | |
| success: false, | |
| error: error instanceof Error ? error.message : 'Failed to scrape web page', | |
| }; | |
| } | |
| } | |