| import path from "node:path"; |
| import { formatErrorMessage } from "../infra/errors.js"; |
| import { fetchWithSsrFGuard, withStrictGuardedFetchMode } from "../infra/net/fetch-guard.js"; |
| import type { LookupFn, PinnedDispatcherPolicy, SsrFPolicy } from "../infra/net/ssrf.js"; |
| import { redactSensitiveText } from "../logging/redact.js"; |
| import { detectMime, extensionForMime } from "./mime.js"; |
| import { readResponseWithLimit } from "./read-response-with-limit.js"; |
|
|
| type FetchMediaResult = { |
| buffer: Buffer; |
| contentType?: string; |
| fileName?: string; |
| }; |
|
|
| export type MediaFetchErrorCode = "max_bytes" | "http_error" | "fetch_failed"; |
|
|
| export class MediaFetchError extends Error { |
| readonly code: MediaFetchErrorCode; |
|
|
| constructor(code: MediaFetchErrorCode, message: string, options?: { cause?: unknown }) { |
| super(message, options); |
| this.code = code; |
| this.name = "MediaFetchError"; |
| } |
| } |
|
|
| export type FetchLike = (input: RequestInfo | URL, init?: RequestInit) => Promise<Response>; |
|
|
| type FetchMediaOptions = { |
| url: string; |
| fetchImpl?: FetchLike; |
| requestInit?: RequestInit; |
| filePathHint?: string; |
| maxBytes?: number; |
| maxRedirects?: number; |
| |
| readIdleTimeoutMs?: number; |
| ssrfPolicy?: SsrFPolicy; |
| lookupFn?: LookupFn; |
| dispatcherPolicy?: PinnedDispatcherPolicy; |
| fallbackDispatcherPolicy?: PinnedDispatcherPolicy; |
| shouldRetryFetchError?: (error: unknown) => boolean; |
| }; |
|
|
| function stripQuotes(value: string): string { |
| return value.replace(/^["']|["']$/g, ""); |
| } |
|
|
| function parseContentDispositionFileName(header?: string | null): string | undefined { |
| if (!header) { |
| return undefined; |
| } |
| const starMatch = /filename\*\s*=\s*([^;]+)/i.exec(header); |
| if (starMatch?.[1]) { |
| const cleaned = stripQuotes(starMatch[1].trim()); |
| const encoded = cleaned.split("''").slice(1).join("''") || cleaned; |
| try { |
| return path.basename(decodeURIComponent(encoded)); |
| } catch { |
| return path.basename(encoded); |
| } |
| } |
| const match = /filename\s*=\s*([^;]+)/i.exec(header); |
| if (match?.[1]) { |
| return path.basename(stripQuotes(match[1].trim())); |
| } |
| return undefined; |
| } |
|
|
| async function readErrorBodySnippet(res: Response, maxChars = 200): Promise<string | undefined> { |
| try { |
| const text = await res.text(); |
| if (!text) { |
| return undefined; |
| } |
| const collapsed = text.replace(/\s+/g, " ").trim(); |
| if (!collapsed) { |
| return undefined; |
| } |
| if (collapsed.length <= maxChars) { |
| return collapsed; |
| } |
| return `${collapsed.slice(0, maxChars)}…`; |
| } catch { |
| return undefined; |
| } |
| } |
|
|
| function redactMediaUrl(url: string): string { |
| return redactSensitiveText(url); |
| } |
|
|
| export async function fetchRemoteMedia(options: FetchMediaOptions): Promise<FetchMediaResult> { |
| const { |
| url, |
| fetchImpl, |
| requestInit, |
| filePathHint, |
| maxBytes, |
| maxRedirects, |
| readIdleTimeoutMs, |
| ssrfPolicy, |
| lookupFn, |
| dispatcherPolicy, |
| fallbackDispatcherPolicy, |
| shouldRetryFetchError, |
| } = options; |
| const sourceUrl = redactMediaUrl(url); |
|
|
| let res: Response; |
| let finalUrl = url; |
| let release: (() => Promise<void>) | null = null; |
| const runGuardedFetch = async (policy?: PinnedDispatcherPolicy) => |
| await fetchWithSsrFGuard( |
| withStrictGuardedFetchMode({ |
| url, |
| fetchImpl, |
| init: requestInit, |
| maxRedirects, |
| policy: ssrfPolicy, |
| lookupFn, |
| dispatcherPolicy: policy, |
| }), |
| ); |
| try { |
| let result; |
| try { |
| result = await runGuardedFetch(dispatcherPolicy); |
| } catch (err) { |
| if ( |
| fallbackDispatcherPolicy && |
| typeof shouldRetryFetchError === "function" && |
| shouldRetryFetchError(err) |
| ) { |
| try { |
| result = await runGuardedFetch(fallbackDispatcherPolicy); |
| } catch (fallbackErr) { |
| const combined = new Error( |
| `Primary fetch failed and fallback fetch also failed for ${sourceUrl}`, |
| { cause: fallbackErr }, |
| ); |
| (combined as Error & { primaryError?: unknown }).primaryError = err; |
| throw combined; |
| } |
| } else { |
| throw err; |
| } |
| } |
| res = result.response; |
| finalUrl = result.finalUrl; |
| release = result.release; |
| } catch (err) { |
| throw new MediaFetchError( |
| "fetch_failed", |
| `Failed to fetch media from ${sourceUrl}: ${formatErrorMessage(err)}`, |
| { |
| cause: err, |
| }, |
| ); |
| } |
|
|
| try { |
| if (!res.ok) { |
| const statusText = res.statusText ? ` ${res.statusText}` : ""; |
| const redirected = finalUrl !== url ? ` (redirected to ${redactMediaUrl(finalUrl)})` : ""; |
| let detail = `HTTP ${res.status}${statusText}`; |
| if (!res.body) { |
| detail = `HTTP ${res.status}${statusText}; empty response body`; |
| } else { |
| const snippet = await readErrorBodySnippet(res); |
| if (snippet) { |
| detail += `; body: ${snippet}`; |
| } |
| } |
| throw new MediaFetchError( |
| "http_error", |
| `Failed to fetch media from ${sourceUrl}${redirected}: ${redactSensitiveText(detail)}`, |
| ); |
| } |
|
|
| const contentLength = res.headers.get("content-length"); |
| if (maxBytes && contentLength) { |
| const length = Number(contentLength); |
| if (Number.isFinite(length) && length > maxBytes) { |
| throw new MediaFetchError( |
| "max_bytes", |
| `Failed to fetch media from ${sourceUrl}: content length ${length} exceeds maxBytes ${maxBytes}`, |
| ); |
| } |
| } |
|
|
| let buffer: Buffer; |
| try { |
| buffer = maxBytes |
| ? await readResponseWithLimit(res, maxBytes, { |
| onOverflow: ({ maxBytes, res }) => |
| new MediaFetchError( |
| "max_bytes", |
| `Failed to fetch media from ${redactMediaUrl(res.url || url)}: payload exceeds maxBytes ${maxBytes}`, |
| ), |
| chunkTimeoutMs: readIdleTimeoutMs, |
| }) |
| : Buffer.from(await res.arrayBuffer()); |
| } catch (err) { |
| if (err instanceof MediaFetchError) { |
| throw err; |
| } |
| throw new MediaFetchError( |
| "fetch_failed", |
| `Failed to fetch media from ${redactMediaUrl(res.url || url)}: ${formatErrorMessage(err)}`, |
| { cause: err }, |
| ); |
| } |
| let fileNameFromUrl: string | undefined; |
| try { |
| const parsed = new URL(finalUrl); |
| const base = path.basename(parsed.pathname); |
| fileNameFromUrl = base || undefined; |
| } catch { |
| |
| } |
|
|
| const headerFileName = parseContentDispositionFileName(res.headers.get("content-disposition")); |
| let fileName = |
| headerFileName || fileNameFromUrl || (filePathHint ? path.basename(filePathHint) : undefined); |
|
|
| const filePathForMime = |
| headerFileName && path.extname(headerFileName) ? headerFileName : (filePathHint ?? finalUrl); |
| const contentType = await detectMime({ |
| buffer, |
| headerMime: res.headers.get("content-type"), |
| filePath: filePathForMime, |
| }); |
| if (fileName && !path.extname(fileName) && contentType) { |
| const ext = extensionForMime(contentType); |
| if (ext) { |
| fileName = `${fileName}${ext}`; |
| } |
| } |
|
|
| return { |
| buffer, |
| contentType: contentType ?? undefined, |
| fileName, |
| }; |
| } finally { |
| if (release) { |
| await release(); |
| } |
| } |
| } |
|
|