Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
File size: 5,882 Bytes
1cce69a 15a4661 c2e58ef 5146f3d 1cce69a 5146f3d 1cce69a c2e58ef 1cce69a 8273830 15a4661 8273830 15a4661 8273830 15a4661 8273830 1cce69a 5146f3d 1cce69a c2e58ef 1cce69a 8273830 15a4661 8273830 15a4661 8273830 15a4661 8273830 1cce69a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 | import { NextRequest } from "next/server";
// Same-origin streaming proxy for huggingface.co. The native <video> element
// can't carry an Authorization header, so we proxy through this route, which
// pulls the user's HF access token from the HttpOnly `hf_access_token` cookie
// (set by /api/auth/session after OAuth) and forwards Range requests upstream.
//
// Public datasets work too — the upstream simply ignores the bearer token.
//
// Allowed path prefixes are constrained so this can't be turned into an open
// proxy for arbitrary huggingface.co URLs (e.g. user profile, billing pages).
const HF_HOST = "https://huggingface.co";
const COOKIE_NAME = "hf_access_token";
const ALLOWED_PREFIXES = ["datasets/", "buckets/"];
export const runtime = "nodejs";
export const dynamic = "force-dynamic";
const FORWARD_REQUEST_HEADERS = [
"range",
"if-modified-since",
"if-none-match",
"accept",
"accept-encoding",
];
const FORWARD_RESPONSE_HEADERS = [
"content-type",
"content-length",
"content-range",
"accept-ranges",
"etag",
"last-modified",
"cache-control",
];
// Generous enough for first-byte on a multi-GB video over a slow network,
// strict enough that hung connections don't pile up server-side.
const UPSTREAM_TIMEOUT_MS = 30_000;
// Cancel the upstream when either (a) the client disconnects, so we stop
// pulling bytes nobody is reading, or (b) the timeout fires, so a hung HF
// connection eventually surrenders its socket.
function upstreamSignal(req: NextRequest): AbortSignal {
return AbortSignal.any([
req.signal,
AbortSignal.timeout(UPSTREAM_TIMEOUT_MS),
]);
}
// Shared by GET and HEAD so they always forward the same set of headers.
// Previously HEAD only attached Authorization, so a client sending a
// conditional HEAD (If-None-Match etag check) would always get a fresh
// 200 instead of a 304 — defeating the cache validation it was asking for.
function buildUpstreamHeaders(req: NextRequest): Headers {
const headers = new Headers();
const token = req.cookies.get(COOKIE_NAME)?.value;
if (token) headers.set("authorization", `Bearer ${token}`);
for (const h of FORWARD_REQUEST_HEADERS) {
const v = req.headers.get(h);
if (v) headers.set(h, v);
}
return headers;
}
// Build the upstream URL and validate it. Returns the URL or null if the
// request should be rejected.
//
// Two attack surfaces this guards against:
// 1. Path traversal — `subPath = "datasets/../api/tokens"` passes a naive
// startsWith("datasets/") check, but URL normalization resolves it to
// huggingface.co/api/tokens. We re-check the prefix on the *normalized*
// pathname after construction, so traversal is caught.
// 2. Origin escape — exotic URL syntax could cause new URL() to land on a
// different host. We assert origin === HF_HOST.
function resolveUpstreamUrl(
subPath: string,
searchParams: URLSearchParams,
): URL | null {
let upstreamUrl: URL;
try {
upstreamUrl = new URL(`${HF_HOST}/${subPath}`);
} catch {
return null;
}
if (upstreamUrl.origin !== HF_HOST) return null;
const normalized = upstreamUrl.pathname.replace(/^\/+/, "");
if (!ALLOWED_PREFIXES.some((p) => normalized.startsWith(p))) return null;
for (const [k, v] of searchParams) {
upstreamUrl.searchParams.set(k, v);
}
return upstreamUrl;
}
export async function GET(
req: NextRequest,
ctx: { params: Promise<{ path: string[] }> },
) {
const { path } = await ctx.params;
const upstreamUrl = resolveUpstreamUrl(
path.join("/"),
req.nextUrl.searchParams,
);
if (!upstreamUrl) return new Response("Forbidden", { status: 403 });
const headers = buildUpstreamHeaders(req);
let upstream: Response;
try {
upstream = await fetch(upstreamUrl, {
method: "GET",
headers,
redirect: "follow",
cache: "no-store",
signal: upstreamSignal(req),
});
} catch (err) {
// Network error reaching huggingface.co, or the upstream timed out, or
// the client went away. The native <video> turns this into a generic
// load error with no details, so log server-side and return a useful
// status the client can surface in devtools.
const isTimeout = err instanceof Error && err.name === "TimeoutError";
console.error("[proxy] upstream fetch failed", err);
return new Response(
isTimeout
? "Gateway timeout: upstream took too long"
: "Bad gateway: upstream fetch failed",
{ status: isTimeout ? 504 : 502 },
);
}
const respHeaders = new Headers();
for (const h of FORWARD_RESPONSE_HEADERS) {
const v = upstream.headers.get(h);
if (v) respHeaders.set(h, v);
}
return new Response(upstream.body, {
status: upstream.status,
statusText: upstream.statusText,
headers: respHeaders,
});
}
export async function HEAD(
req: NextRequest,
ctx: { params: Promise<{ path: string[] }> },
) {
const { path } = await ctx.params;
const upstreamUrl = resolveUpstreamUrl(
path.join("/"),
req.nextUrl.searchParams,
);
if (!upstreamUrl) return new Response(null, { status: 403 });
const headers = buildUpstreamHeaders(req);
let upstream: Response;
try {
upstream = await fetch(upstreamUrl, {
method: "HEAD",
headers,
redirect: "follow",
cache: "no-store",
signal: upstreamSignal(req),
});
} catch (err) {
const isTimeout = err instanceof Error && err.name === "TimeoutError";
console.error("[proxy] upstream HEAD failed", err);
return new Response(null, { status: isTimeout ? 504 : 502 });
}
const respHeaders = new Headers();
for (const h of FORWARD_RESPONSE_HEADERS) {
const v = upstream.headers.get(h);
if (v) respHeaders.set(h, v);
}
return new Response(null, {
status: upstream.status,
statusText: upstream.statusText,
headers: respHeaders,
});
}
|