Dynamic Intelligence
commited on
Commit
·
abd54b8
1
Parent(s):
42ac8f7
Fix 401 error: Add authenticated API proxy for private datasets
Browse files
src/app/[org]/[dataset]/[episode]/fetch-data.ts
CHANGED
|
@@ -240,7 +240,9 @@ async function getEpisodeDataV2(
|
|
| 240 |
if (!task && allData.length > 0) {
|
| 241 |
try {
|
| 242 |
const tasksUrl = buildVersionedUrl(repoId, version, "meta/tasks.jsonl");
|
| 243 |
-
|
|
|
|
|
|
|
| 244 |
|
| 245 |
if (tasksResponse.ok) {
|
| 246 |
const tasksText = await tasksResponse.text();
|
|
|
|
| 240 |
if (!task && allData.length > 0) {
|
| 241 |
try {
|
| 242 |
const tasksUrl = buildVersionedUrl(repoId, version, "meta/tasks.jsonl");
|
| 243 |
+
// Use proxy for authenticated requests
|
| 244 |
+
const proxyUrl = `/api/hf-proxy?url=${encodeURIComponent(tasksUrl)}`;
|
| 245 |
+
const tasksResponse = await fetch(proxyUrl);
|
| 246 |
|
| 247 |
if (tasksResponse.ok) {
|
| 248 |
const tasksText = await tasksResponse.text();
|
src/app/api/hf-proxy/route.ts
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { NextRequest, NextResponse } from 'next/server';
|
| 2 |
+
|
| 3 |
+
/**
|
| 4 |
+
* Generic proxy API route for Hugging Face requests with authentication
|
| 5 |
+
* Handles both JSON and binary (Parquet) files
|
| 6 |
+
* Supports GET and HEAD methods
|
| 7 |
+
*/
|
| 8 |
+
export async function GET(request: NextRequest) {
|
| 9 |
+
return handleRequest(request, 'GET');
|
| 10 |
+
}
|
| 11 |
+
|
| 12 |
+
export async function HEAD(request: NextRequest) {
|
| 13 |
+
return handleRequest(request, 'HEAD');
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
async function handleRequest(request: NextRequest, method: 'GET' | 'HEAD') {
|
| 17 |
+
const searchParams = request.nextUrl.searchParams;
|
| 18 |
+
const url = searchParams.get('url');
|
| 19 |
+
|
| 20 |
+
if (!url) {
|
| 21 |
+
return NextResponse.json(
|
| 22 |
+
{ error: 'url parameter is required' },
|
| 23 |
+
{ status: 400 }
|
| 24 |
+
);
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
// Get token from environment variable (set in Hugging Face Space secrets)
|
| 28 |
+
const token = process.env.HF_TOKEN || process.env.HUGGINGFACE_TOKEN;
|
| 29 |
+
|
| 30 |
+
if (!token) {
|
| 31 |
+
// If no token, try without auth (for public repos)
|
| 32 |
+
// But log a warning
|
| 33 |
+
console.warn('HF_TOKEN not configured in Space secrets - requests may fail for private repos');
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
try {
|
| 37 |
+
const headers: HeadersInit = {
|
| 38 |
+
'Cache-Control': 'no-store',
|
| 39 |
+
};
|
| 40 |
+
|
| 41 |
+
// Add authentication if token is available
|
| 42 |
+
if (token) {
|
| 43 |
+
headers['Authorization'] = `Bearer ${token}`;
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
const response = await fetch(url, {
|
| 47 |
+
method,
|
| 48 |
+
headers,
|
| 49 |
+
cache: 'no-store',
|
| 50 |
+
});
|
| 51 |
+
|
| 52 |
+
if (!response.ok) {
|
| 53 |
+
// Return error with status code
|
| 54 |
+
const errorText = await response.text().catch(() => 'Unknown error');
|
| 55 |
+
return NextResponse.json(
|
| 56 |
+
{ error: `Failed to fetch: ${response.status} ${response.statusText}`, details: errorText },
|
| 57 |
+
{ status: response.status }
|
| 58 |
+
);
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
// For HEAD requests, just return status without body
|
| 62 |
+
if (method === 'HEAD') {
|
| 63 |
+
return new NextResponse(null, {
|
| 64 |
+
status: response.status,
|
| 65 |
+
headers: {
|
| 66 |
+
'Content-Type': response.headers.get('content-type') || '',
|
| 67 |
+
'Content-Length': response.headers.get('content-length') || '0',
|
| 68 |
+
},
|
| 69 |
+
});
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
// Check content type to determine if it's binary or JSON
|
| 73 |
+
const contentType = response.headers.get('content-type') || '';
|
| 74 |
+
|
| 75 |
+
if (contentType.includes('application/json')) {
|
| 76 |
+
// Return JSON
|
| 77 |
+
const data = await response.json();
|
| 78 |
+
return NextResponse.json(data);
|
| 79 |
+
} else {
|
| 80 |
+
// Return binary data (for Parquet files, videos, etc.)
|
| 81 |
+
const arrayBuffer = await response.arrayBuffer();
|
| 82 |
+
return new NextResponse(arrayBuffer, {
|
| 83 |
+
headers: {
|
| 84 |
+
'Content-Type': contentType,
|
| 85 |
+
'Content-Length': arrayBuffer.byteLength.toString(),
|
| 86 |
+
},
|
| 87 |
+
});
|
| 88 |
+
}
|
| 89 |
+
} catch (error) {
|
| 90 |
+
console.error('Proxy error:', error);
|
| 91 |
+
return NextResponse.json(
|
| 92 |
+
{ error: error instanceof Error ? error.message : 'Unknown error' },
|
| 93 |
+
{ status: 500 }
|
| 94 |
+
);
|
| 95 |
+
}
|
| 96 |
+
}
|
| 97 |
+
|
src/app/explore/page.tsx
CHANGED
|
@@ -71,9 +71,10 @@ export default async function ExplorePage({
|
|
| 71 |
episode_index: "0".padStart(6, "0"),
|
| 72 |
});
|
| 73 |
const url = buildVersionedUrl(repoId, version, videoPath);
|
| 74 |
-
// Check if videoUrl exists (status 200)
|
| 75 |
try {
|
| 76 |
-
const
|
|
|
|
| 77 |
if (headRes.ok) {
|
| 78 |
videoUrl = url;
|
| 79 |
}
|
|
|
|
| 71 |
episode_index: "0".padStart(6, "0"),
|
| 72 |
});
|
| 73 |
const url = buildVersionedUrl(repoId, version, videoPath);
|
| 74 |
+
// Check if videoUrl exists (status 200) - use proxy for authenticated requests
|
| 75 |
try {
|
| 76 |
+
const proxyUrl = `/api/hf-proxy?url=${encodeURIComponent(url)}`;
|
| 77 |
+
const headRes = await fetch(proxyUrl, { method: "HEAD" });
|
| 78 |
if (headRes.ok) {
|
| 79 |
videoUrl = url;
|
| 80 |
}
|
src/utils/parquetUtils.ts
CHANGED
|
@@ -25,10 +25,13 @@ export interface DatasetMetadata {
|
|
| 25 |
}
|
| 26 |
|
| 27 |
export async function fetchJson<T>(url: string): Promise<T> {
|
| 28 |
-
|
|
|
|
|
|
|
| 29 |
if (!res.ok) {
|
|
|
|
| 30 |
throw new Error(
|
| 31 |
-
`Failed to fetch JSON ${url}: ${res.status} ${res.statusText}`,
|
| 32 |
);
|
| 33 |
}
|
| 34 |
return res.json() as Promise<T>;
|
|
@@ -43,10 +46,13 @@ export function formatStringWithVars(
|
|
| 43 |
|
| 44 |
// Fetch and parse the Parquet file
|
| 45 |
export async function fetchParquetFile(url: string): Promise<ArrayBuffer> {
|
| 46 |
-
|
|
|
|
|
|
|
| 47 |
|
| 48 |
if (!res.ok) {
|
| 49 |
-
|
|
|
|
| 50 |
}
|
| 51 |
|
| 52 |
return res.arrayBuffer();
|
|
|
|
| 25 |
}
|
| 26 |
|
| 27 |
export async function fetchJson<T>(url: string): Promise<T> {
|
| 28 |
+
// Use API proxy for authenticated requests (handles private repos)
|
| 29 |
+
const proxyUrl = `/api/hf-proxy?url=${encodeURIComponent(url)}`;
|
| 30 |
+
const res = await fetch(proxyUrl);
|
| 31 |
if (!res.ok) {
|
| 32 |
+
const errorData = await res.json().catch(() => ({}));
|
| 33 |
throw new Error(
|
| 34 |
+
errorData.error || `Failed to fetch JSON ${url}: ${res.status} ${res.statusText}`,
|
| 35 |
);
|
| 36 |
}
|
| 37 |
return res.json() as Promise<T>;
|
|
|
|
| 46 |
|
| 47 |
// Fetch and parse the Parquet file
|
| 48 |
export async function fetchParquetFile(url: string): Promise<ArrayBuffer> {
|
| 49 |
+
// Use API proxy for authenticated requests (handles private repos)
|
| 50 |
+
const proxyUrl = `/api/hf-proxy?url=${encodeURIComponent(url)}`;
|
| 51 |
+
const res = await fetch(proxyUrl);
|
| 52 |
|
| 53 |
if (!res.ok) {
|
| 54 |
+
const errorData = await res.json().catch(() => ({}));
|
| 55 |
+
throw new Error(errorData.error || `Failed to fetch ${url}: ${res.status} ${res.statusText}`);
|
| 56 |
}
|
| 57 |
|
| 58 |
return res.arrayBuffer();
|
src/utils/versionUtils.ts
CHANGED
|
@@ -25,15 +25,19 @@ interface DatasetInfo {
|
|
| 25 |
|
| 26 |
/**
|
| 27 |
* Fetches dataset information from the main revision
|
|
|
|
| 28 |
*/
|
| 29 |
export async function getDatasetInfo(repoId: string): Promise<DatasetInfo> {
|
| 30 |
try {
|
| 31 |
const testUrl = `${DATASET_URL}/${repoId}/resolve/main/meta/info.json`;
|
| 32 |
|
|
|
|
|
|
|
|
|
|
| 33 |
const controller = new AbortController();
|
| 34 |
const timeoutId = setTimeout(() => controller.abort(), 10000); // 10 second timeout
|
| 35 |
|
| 36 |
-
const response = await fetch(
|
| 37 |
method: "GET",
|
| 38 |
cache: "no-store",
|
| 39 |
signal: controller.signal
|
|
@@ -42,7 +46,8 @@ export async function getDatasetInfo(repoId: string): Promise<DatasetInfo> {
|
|
| 42 |
clearTimeout(timeoutId);
|
| 43 |
|
| 44 |
if (!response.ok) {
|
| 45 |
-
|
|
|
|
| 46 |
}
|
| 47 |
|
| 48 |
const data = await response.json();
|
|
|
|
| 25 |
|
| 26 |
/**
|
| 27 |
* Fetches dataset information from the main revision
|
| 28 |
+
* Uses authenticated API proxy to handle private datasets
|
| 29 |
*/
|
| 30 |
export async function getDatasetInfo(repoId: string): Promise<DatasetInfo> {
|
| 31 |
try {
|
| 32 |
const testUrl = `${DATASET_URL}/${repoId}/resolve/main/meta/info.json`;
|
| 33 |
|
| 34 |
+
// Use API proxy for authenticated requests
|
| 35 |
+
const proxyUrl = `/api/hf-proxy?url=${encodeURIComponent(testUrl)}`;
|
| 36 |
+
|
| 37 |
const controller = new AbortController();
|
| 38 |
const timeoutId = setTimeout(() => controller.abort(), 10000); // 10 second timeout
|
| 39 |
|
| 40 |
+
const response = await fetch(proxyUrl, {
|
| 41 |
method: "GET",
|
| 42 |
cache: "no-store",
|
| 43 |
signal: controller.signal
|
|
|
|
| 46 |
clearTimeout(timeoutId);
|
| 47 |
|
| 48 |
if (!response.ok) {
|
| 49 |
+
const errorData = await response.json().catch(() => ({}));
|
| 50 |
+
throw new Error(errorData.error || `Failed to fetch dataset info: ${response.status}`);
|
| 51 |
}
|
| 52 |
|
| 53 |
const data = await response.json();
|