| import { normalizeUrl } from "./url-normalizer.js"; |
|
|
| |
| |
| |
|
|
| export interface ReadUrlConfig { |
| url: string; |
| withAllLinks?: boolean; |
| withAllImages?: boolean; |
| } |
|
|
| export interface ReadUrlResult { |
| success: boolean; |
| url: string; |
| structuredData: any; |
| withAllLinks: boolean; |
| withAllImages: boolean; |
| } |
|
|
| export interface ReadUrlError { |
| error: string; |
| url: string; |
| } |
|
|
| export type ReadUrlResponse = ReadUrlResult | ReadUrlError; |
|
|
| |
| |
| |
|
|
| |
| |
| |
| export async function readUrlFromConfig( |
| urlConfig: ReadUrlConfig, |
| bearerToken?: string |
| ): Promise<ReadUrlResponse> { |
| try { |
| |
| const normalizedUrl = normalizeUrl(urlConfig.url); |
| if (!normalizedUrl) { |
| return { error: "Invalid or unsupported URL", url: urlConfig.url }; |
| } |
|
|
| const headers: Record<string, string> = { |
| 'Accept': 'application/json', |
| 'Content-Type': 'application/json', |
| 'X-Md-Link-Style': 'discarded', |
| }; |
|
|
| |
| if (bearerToken) { |
| headers['Authorization'] = `Bearer ${bearerToken}`; |
| } |
|
|
| if (urlConfig.withAllLinks) { |
| headers['X-With-Links-Summary'] = 'all'; |
| } |
|
|
| if (urlConfig.withAllImages) { |
| headers['X-With-Images-Summary'] = 'true'; |
| } else { |
| headers['X-Retain-Images'] = 'none'; |
| } |
|
|
| const response = await fetch('https://r.jina.ai/', { |
| method: 'POST', |
| headers, |
| body: JSON.stringify({ url: normalizedUrl }), |
| }); |
|
|
| if (!response.ok) { |
| return { error: `HTTP ${response.status}: ${response.statusText}`, url: urlConfig.url }; |
| } |
|
|
| const data = await response.json() as any; |
|
|
| if (!data.data) { |
| return { error: "Invalid response data from r.jina.ai", url: urlConfig.url }; |
| } |
|
|
| |
| const structuredData: any = { |
| url: data.data.url, |
| title: data.data.title, |
| }; |
|
|
| if (urlConfig.withAllLinks && data.data.links) { |
| structuredData.links = data.data.links.map((link: [string, string]) => ({ |
| anchorText: link[0], |
| url: link[1] |
| })); |
| } |
|
|
| if (urlConfig.withAllImages && data.data.images) { |
| structuredData.images = data.data.images; |
| } |
| structuredData.content = data.data.content || ""; |
|
|
| return { |
| success: true, |
| url: urlConfig.url, |
| structuredData, |
| withAllLinks: urlConfig.withAllLinks || false, |
| withAllImages: urlConfig.withAllImages || false |
| }; |
| } catch (error) { |
| return { |
| error: error instanceof Error ? `${error.message}\nStack: ${error.stack}` : String(error), |
| url: urlConfig.url |
| }; |
| } |
| } |
|
|
| |
| |
| |
| export async function executeParallelUrlReads( |
| urlConfigs: ReadUrlConfig[], |
| bearerToken?: string, |
| timeout: number = 30000 |
| ): Promise<ReadUrlResponse[]> { |
| const timeoutPromise = new Promise<never>((_, reject) => |
| setTimeout(() => reject(new Error('Parallel URL read timeout')), timeout) |
| ); |
|
|
| const readPromises = urlConfigs.map(urlConfig => readUrlFromConfig(urlConfig, bearerToken)); |
|
|
| return Promise.race([ |
| Promise.all(readPromises), |
| timeoutPromise |
| ]); |
| } |
|
|