mishig's picture
mishig HF Staff
Sync from GitHub via hub-sync
c2e58ef verified
import { NextRequest } from "next/server";
// Same-origin streaming proxy for huggingface.co. The native <video> element
// can't carry an Authorization header, so we proxy through this route, which
// pulls the user's HF access token from the HttpOnly `hf_access_token` cookie
// (set by /api/auth/session after OAuth) and forwards Range requests upstream.
//
// Public datasets work too β€” the upstream simply ignores the bearer token.
//
// Allowed path prefixes are constrained so this can't be turned into an open
// proxy for arbitrary huggingface.co URLs (e.g. user profile, billing pages).
const HF_HOST = "https://huggingface.co";
const COOKIE_NAME = "hf_access_token";
const ALLOWED_PREFIXES = ["datasets/", "buckets/"];
export const runtime = "nodejs";
export const dynamic = "force-dynamic";
const FORWARD_REQUEST_HEADERS = [
"range",
"if-modified-since",
"if-none-match",
"accept",
"accept-encoding",
];
const FORWARD_RESPONSE_HEADERS = [
"content-type",
"content-length",
"content-range",
"accept-ranges",
"etag",
"last-modified",
"cache-control",
];
// Generous enough for first-byte on a multi-GB video over a slow network,
// strict enough that hung connections don't pile up server-side.
const UPSTREAM_TIMEOUT_MS = 30_000;
// Cancel the upstream when either (a) the client disconnects, so we stop
// pulling bytes nobody is reading, or (b) the timeout fires, so a hung HF
// connection eventually surrenders its socket.
function upstreamSignal(req: NextRequest): AbortSignal {
return AbortSignal.any([
req.signal,
AbortSignal.timeout(UPSTREAM_TIMEOUT_MS),
]);
}
// Shared by GET and HEAD so they always forward the same set of headers.
// Previously HEAD only attached Authorization, so a client sending a
// conditional HEAD (If-None-Match etag check) would always get a fresh
// 200 instead of a 304 β€” defeating the cache validation it was asking for.
function buildUpstreamHeaders(req: NextRequest): Headers {
const headers = new Headers();
const token = req.cookies.get(COOKIE_NAME)?.value;
if (token) headers.set("authorization", `Bearer ${token}`);
for (const h of FORWARD_REQUEST_HEADERS) {
const v = req.headers.get(h);
if (v) headers.set(h, v);
}
return headers;
}
// Build the upstream URL and validate it. Returns the URL or null if the
// request should be rejected.
//
// Two attack surfaces this guards against:
// 1. Path traversal β€” `subPath = "datasets/../api/tokens"` passes a naive
// startsWith("datasets/") check, but URL normalization resolves it to
// huggingface.co/api/tokens. We re-check the prefix on the *normalized*
// pathname after construction, so traversal is caught.
// 2. Origin escape β€” exotic URL syntax could cause new URL() to land on a
// different host. We assert origin === HF_HOST.
function resolveUpstreamUrl(
subPath: string,
searchParams: URLSearchParams,
): URL | null {
let upstreamUrl: URL;
try {
upstreamUrl = new URL(`${HF_HOST}/${subPath}`);
} catch {
return null;
}
if (upstreamUrl.origin !== HF_HOST) return null;
const normalized = upstreamUrl.pathname.replace(/^\/+/, "");
if (!ALLOWED_PREFIXES.some((p) => normalized.startsWith(p))) return null;
for (const [k, v] of searchParams) {
upstreamUrl.searchParams.set(k, v);
}
return upstreamUrl;
}
export async function GET(
req: NextRequest,
ctx: { params: Promise<{ path: string[] }> },
) {
const { path } = await ctx.params;
const upstreamUrl = resolveUpstreamUrl(
path.join("/"),
req.nextUrl.searchParams,
);
if (!upstreamUrl) return new Response("Forbidden", { status: 403 });
const headers = buildUpstreamHeaders(req);
let upstream: Response;
try {
upstream = await fetch(upstreamUrl, {
method: "GET",
headers,
redirect: "follow",
cache: "no-store",
signal: upstreamSignal(req),
});
} catch (err) {
// Network error reaching huggingface.co, or the upstream timed out, or
// the client went away. The native <video> turns this into a generic
// load error with no details, so log server-side and return a useful
// status the client can surface in devtools.
const isTimeout = err instanceof Error && err.name === "TimeoutError";
console.error("[proxy] upstream fetch failed", err);
return new Response(
isTimeout
? "Gateway timeout: upstream took too long"
: "Bad gateway: upstream fetch failed",
{ status: isTimeout ? 504 : 502 },
);
}
const respHeaders = new Headers();
for (const h of FORWARD_RESPONSE_HEADERS) {
const v = upstream.headers.get(h);
if (v) respHeaders.set(h, v);
}
return new Response(upstream.body, {
status: upstream.status,
statusText: upstream.statusText,
headers: respHeaders,
});
}
export async function HEAD(
req: NextRequest,
ctx: { params: Promise<{ path: string[] }> },
) {
const { path } = await ctx.params;
const upstreamUrl = resolveUpstreamUrl(
path.join("/"),
req.nextUrl.searchParams,
);
if (!upstreamUrl) return new Response(null, { status: 403 });
const headers = buildUpstreamHeaders(req);
let upstream: Response;
try {
upstream = await fetch(upstreamUrl, {
method: "HEAD",
headers,
redirect: "follow",
cache: "no-store",
signal: upstreamSignal(req),
});
} catch (err) {
const isTimeout = err instanceof Error && err.name === "TimeoutError";
console.error("[proxy] upstream HEAD failed", err);
return new Response(null, { status: isTimeout ? 504 : 502 });
}
const respHeaders = new Headers();
for (const h of FORWARD_RESPONSE_HEADERS) {
const v = upstream.headers.get(h);
if (v) respHeaders.set(h, v);
}
return new Response(null, {
status: upstream.status,
statusText: upstream.statusText,
headers: respHeaders,
});
}