Spaces:

AIgoose
/

carouselforge

Sleeping

File size: 2,418 Bytes

/**
 * URL Parser & Orchestrator
 * Routes URLs to platform-specific scrapers
 */

import { detectPlatform, type PlatformType } from '../scraper/detector';
import { scrapeInstagram } from '../scraper/instagram';
import { scrapeTwitter } from '../scraper/twitter';
import { scrapeTikTok } from '../scraper/tiktok';
import { scrapeThreads } from '../scraper/threads';
import { scrapeWeb } from '../scraper/web';
import type { ScrapingResult } from '../scraper/apify';

export interface URLParseResult extends ScrapingResult {
  platform?: PlatformType;
}

/**
 * Parse URL and extract content
 * Automatically routes to correct scraper based on platform
 */
export async function parseURL(url: string): Promise<URLParseResult> {
  try {
    // Detect platform
    const detection = detectPlatform(url);

    if (detection.platform === 'unknown') {
      return {
        success: false,
        platform: 'unknown',
        error: 'Could not identify URL type. Please provide a valid URL.',
      };
    }

    // Route to appropriate scraper
    let result: ScrapingResult;

    switch (detection.platform) {
      case 'instagram':
        result = await scrapeInstagram(url);
        break;
      case 'twitter':
        result = await scrapeTwitter(url);
        break;
      case 'tiktok':
        result = await scrapeTikTok(url);
        break;
      case 'threads':
        result = await scrapeThreads(url);
        break;
      case 'web':
        result = await scrapeWeb(url);
        break;
      default:
        result = await scrapeWeb(url);
    }

    return {
      ...result,
      platform: detection.platform,
    };
  } catch (error) {
    console.warn('[url parser] scraping failed or no API key, falling back to raw url:', error);
    // Since scraping failed, pass the raw URL to the LLM to guess the context
    return {
      success: true,
      platform: 'unknown',
      content: url, // Fix: matched 'content' property instead of 'extractedText' for proper mapping
      title: 'Failed to scrape - falling back to URL guess',
    };
  }
}

/**
 * Check if URL is parseable
 */
export function isParseableURL(url: string): boolean {
  const detection = detectPlatform(url);
  return detection.platform !== 'unknown';
}

/**
 * Get platform type for URL
 */
export function getPlatformType(url: string): PlatformType {
  const detection = detectPlatform(url);
  return detection.platform;
}