File size: 2,418 Bytes
9a43362
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d9ba1d6
 
9a43362
d9ba1d6
 
c1e1733
d9ba1d6
9a43362
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
/**
 * URL Parser & Orchestrator
 * Routes URLs to platform-specific scrapers
 */

import { detectPlatform, type PlatformType } from '../scraper/detector';
import { scrapeInstagram } from '../scraper/instagram';
import { scrapeTwitter } from '../scraper/twitter';
import { scrapeTikTok } from '../scraper/tiktok';
import { scrapeThreads } from '../scraper/threads';
import { scrapeWeb } from '../scraper/web';
import type { ScrapingResult } from '../scraper/apify';

export interface URLParseResult extends ScrapingResult {
  platform?: PlatformType;
}

/**
 * Parse URL and extract content
 * Automatically routes to correct scraper based on platform
 */
export async function parseURL(url: string): Promise<URLParseResult> {
  try {
    // Detect platform
    const detection = detectPlatform(url);

    if (detection.platform === 'unknown') {
      return {
        success: false,
        platform: 'unknown',
        error: 'Could not identify URL type. Please provide a valid URL.',
      };
    }

    // Route to appropriate scraper
    let result: ScrapingResult;

    switch (detection.platform) {
      case 'instagram':
        result = await scrapeInstagram(url);
        break;
      case 'twitter':
        result = await scrapeTwitter(url);
        break;
      case 'tiktok':
        result = await scrapeTikTok(url);
        break;
      case 'threads':
        result = await scrapeThreads(url);
        break;
      case 'web':
        result = await scrapeWeb(url);
        break;
      default:
        result = await scrapeWeb(url);
    }

    return {
      ...result,
      platform: detection.platform,
    };
  } catch (error) {
    console.warn('[url parser] scraping failed or no API key, falling back to raw url:', error);
    // Since scraping failed, pass the raw URL to the LLM to guess the context
    return {
      success: true,
      platform: 'unknown',
      content: url, // Fix: matched 'content' property instead of 'extractedText' for proper mapping
      title: 'Failed to scrape - falling back to URL guess',
    };
  }
}

/**
 * Check if URL is parseable
 */
export function isParseableURL(url: string): boolean {
  const detection = detectPlatform(url);
  return detection.platform !== 'unknown';
}

/**
 * Get platform type for URL
 */
export function getPlatformType(url: string): PlatformType {
  const detection = detectPlatform(url);
  return detection.platform;
}