Spaces:
Sleeping
Sleeping
| /** | |
| * URL Parser & Orchestrator | |
| * Routes URLs to platform-specific scrapers | |
| */ | |
| import { detectPlatform, type PlatformType } from '../scraper/detector'; | |
| import { scrapeInstagram } from '../scraper/instagram'; | |
| import { scrapeTwitter } from '../scraper/twitter'; | |
| import { scrapeTikTok } from '../scraper/tiktok'; | |
| import { scrapeThreads } from '../scraper/threads'; | |
| import { scrapeWeb } from '../scraper/web'; | |
| import type { ScrapingResult } from '../scraper/apify'; | |
| export interface URLParseResult extends ScrapingResult { | |
| platform?: PlatformType; | |
| } | |
| /** | |
| * Parse URL and extract content | |
| * Automatically routes to correct scraper based on platform | |
| */ | |
| export async function parseURL(url: string): Promise<URLParseResult> { | |
| try { | |
| // Detect platform | |
| const detection = detectPlatform(url); | |
| if (detection.platform === 'unknown') { | |
| return { | |
| success: false, | |
| platform: 'unknown', | |
| error: 'Could not identify URL type. Please provide a valid URL.', | |
| }; | |
| } | |
| // Route to appropriate scraper | |
| let result: ScrapingResult; | |
| switch (detection.platform) { | |
| case 'instagram': | |
| result = await scrapeInstagram(url); | |
| break; | |
| case 'twitter': | |
| result = await scrapeTwitter(url); | |
| break; | |
| case 'tiktok': | |
| result = await scrapeTikTok(url); | |
| break; | |
| case 'threads': | |
| result = await scrapeThreads(url); | |
| break; | |
| case 'web': | |
| result = await scrapeWeb(url); | |
| break; | |
| default: | |
| result = await scrapeWeb(url); | |
| } | |
| return { | |
| ...result, | |
| platform: detection.platform, | |
| }; | |
| } catch (error) { | |
| console.warn('[url parser] scraping failed or no API key, falling back to raw url:', error); | |
| // Since scraping failed, pass the raw URL to the LLM to guess the context | |
| return { | |
| success: true, | |
| platform: 'unknown', | |
| content: url, // Fix: matched 'content' property instead of 'extractedText' for proper mapping | |
| title: 'Failed to scrape - falling back to URL guess', | |
| }; | |
| } | |
| } | |
| /** | |
| * Check if URL is parseable | |
| */ | |
| export function isParseableURL(url: string): boolean { | |
| const detection = detectPlatform(url); | |
| return detection.platform !== 'unknown'; | |
| } | |
| /** | |
| * Get platform type for URL | |
| */ | |
| export function getPlatformType(url: string): PlatformType { | |
| const detection = detectPlatform(url); | |
| return detection.platform; | |
| } | |