Dynamic Intelligence commited on
Commit
abd54b8
·
1 Parent(s): 42ac8f7

Fix 401 error: Add authenticated API proxy for private datasets

Browse files
src/app/[org]/[dataset]/[episode]/fetch-data.ts CHANGED
@@ -240,7 +240,9 @@ async function getEpisodeDataV2(
240
  if (!task && allData.length > 0) {
241
  try {
242
  const tasksUrl = buildVersionedUrl(repoId, version, "meta/tasks.jsonl");
243
- const tasksResponse = await fetch(tasksUrl);
 
 
244
 
245
  if (tasksResponse.ok) {
246
  const tasksText = await tasksResponse.text();
 
240
  if (!task && allData.length > 0) {
241
  try {
242
  const tasksUrl = buildVersionedUrl(repoId, version, "meta/tasks.jsonl");
243
+ // Use proxy for authenticated requests
244
+ const proxyUrl = `/api/hf-proxy?url=${encodeURIComponent(tasksUrl)}`;
245
+ const tasksResponse = await fetch(proxyUrl);
246
 
247
  if (tasksResponse.ok) {
248
  const tasksText = await tasksResponse.text();
src/app/api/hf-proxy/route.ts ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { NextRequest, NextResponse } from 'next/server';
2
+
3
+ /**
4
+ * Generic proxy API route for Hugging Face requests with authentication
5
+ * Handles both JSON and binary (Parquet) files
6
+ * Supports GET and HEAD methods
7
+ */
8
+ export async function GET(request: NextRequest) {
9
+ return handleRequest(request, 'GET');
10
+ }
11
+
12
+ export async function HEAD(request: NextRequest) {
13
+ return handleRequest(request, 'HEAD');
14
+ }
15
+
16
+ async function handleRequest(request: NextRequest, method: 'GET' | 'HEAD') {
17
+ const searchParams = request.nextUrl.searchParams;
18
+ const url = searchParams.get('url');
19
+
20
+ if (!url) {
21
+ return NextResponse.json(
22
+ { error: 'url parameter is required' },
23
+ { status: 400 }
24
+ );
25
+ }
26
+
27
+ // Get token from environment variable (set in Hugging Face Space secrets)
28
+ const token = process.env.HF_TOKEN || process.env.HUGGINGFACE_TOKEN;
29
+
30
+ if (!token) {
31
+ // If no token, try without auth (for public repos)
32
+ // But log a warning
33
+ console.warn('HF_TOKEN not configured in Space secrets - requests may fail for private repos');
34
+ }
35
+
36
+ try {
37
+ const headers: HeadersInit = {
38
+ 'Cache-Control': 'no-store',
39
+ };
40
+
41
+ // Add authentication if token is available
42
+ if (token) {
43
+ headers['Authorization'] = `Bearer ${token}`;
44
+ }
45
+
46
+ const response = await fetch(url, {
47
+ method,
48
+ headers,
49
+ cache: 'no-store',
50
+ });
51
+
52
+ if (!response.ok) {
53
+ // Return error with status code
54
+ const errorText = await response.text().catch(() => 'Unknown error');
55
+ return NextResponse.json(
56
+ { error: `Failed to fetch: ${response.status} ${response.statusText}`, details: errorText },
57
+ { status: response.status }
58
+ );
59
+ }
60
+
61
+ // For HEAD requests, just return status without body
62
+ if (method === 'HEAD') {
63
+ return new NextResponse(null, {
64
+ status: response.status,
65
+ headers: {
66
+ 'Content-Type': response.headers.get('content-type') || '',
67
+ 'Content-Length': response.headers.get('content-length') || '0',
68
+ },
69
+ });
70
+ }
71
+
72
+ // Check content type to determine if it's binary or JSON
73
+ const contentType = response.headers.get('content-type') || '';
74
+
75
+ if (contentType.includes('application/json')) {
76
+ // Return JSON
77
+ const data = await response.json();
78
+ return NextResponse.json(data);
79
+ } else {
80
+ // Return binary data (for Parquet files, videos, etc.)
81
+ const arrayBuffer = await response.arrayBuffer();
82
+ return new NextResponse(arrayBuffer, {
83
+ headers: {
84
+ 'Content-Type': contentType,
85
+ 'Content-Length': arrayBuffer.byteLength.toString(),
86
+ },
87
+ });
88
+ }
89
+ } catch (error) {
90
+ console.error('Proxy error:', error);
91
+ return NextResponse.json(
92
+ { error: error instanceof Error ? error.message : 'Unknown error' },
93
+ { status: 500 }
94
+ );
95
+ }
96
+ }
97
+
src/app/explore/page.tsx CHANGED
@@ -71,9 +71,10 @@ export default async function ExplorePage({
71
  episode_index: "0".padStart(6, "0"),
72
  });
73
  const url = buildVersionedUrl(repoId, version, videoPath);
74
- // Check if videoUrl exists (status 200)
75
  try {
76
- const headRes = await fetch(url, { method: "HEAD" });
 
77
  if (headRes.ok) {
78
  videoUrl = url;
79
  }
 
71
  episode_index: "0".padStart(6, "0"),
72
  });
73
  const url = buildVersionedUrl(repoId, version, videoPath);
74
+ // Check if videoUrl exists (status 200) - use proxy for authenticated requests
75
  try {
76
+ const proxyUrl = `/api/hf-proxy?url=${encodeURIComponent(url)}`;
77
+ const headRes = await fetch(proxyUrl, { method: "HEAD" });
78
  if (headRes.ok) {
79
  videoUrl = url;
80
  }
src/utils/parquetUtils.ts CHANGED
@@ -25,10 +25,13 @@ export interface DatasetMetadata {
25
  }
26
 
27
  export async function fetchJson<T>(url: string): Promise<T> {
28
- const res = await fetch(url);
 
 
29
  if (!res.ok) {
 
30
  throw new Error(
31
- `Failed to fetch JSON ${url}: ${res.status} ${res.statusText}`,
32
  );
33
  }
34
  return res.json() as Promise<T>;
@@ -43,10 +46,13 @@ export function formatStringWithVars(
43
 
44
  // Fetch and parse the Parquet file
45
  export async function fetchParquetFile(url: string): Promise<ArrayBuffer> {
46
- const res = await fetch(url);
 
 
47
 
48
  if (!res.ok) {
49
- throw new Error(`Failed to fetch ${url}: ${res.status} ${res.statusText}`);
 
50
  }
51
 
52
  return res.arrayBuffer();
 
25
  }
26
 
27
  export async function fetchJson<T>(url: string): Promise<T> {
28
+ // Use API proxy for authenticated requests (handles private repos)
29
+ const proxyUrl = `/api/hf-proxy?url=${encodeURIComponent(url)}`;
30
+ const res = await fetch(proxyUrl);
31
  if (!res.ok) {
32
+ const errorData = await res.json().catch(() => ({}));
33
  throw new Error(
34
+ errorData.error || `Failed to fetch JSON ${url}: ${res.status} ${res.statusText}`,
35
  );
36
  }
37
  return res.json() as Promise<T>;
 
46
 
47
  // Fetch and parse the Parquet file
48
  export async function fetchParquetFile(url: string): Promise<ArrayBuffer> {
49
+ // Use API proxy for authenticated requests (handles private repos)
50
+ const proxyUrl = `/api/hf-proxy?url=${encodeURIComponent(url)}`;
51
+ const res = await fetch(proxyUrl);
52
 
53
  if (!res.ok) {
54
+ const errorData = await res.json().catch(() => ({}));
55
+ throw new Error(errorData.error || `Failed to fetch ${url}: ${res.status} ${res.statusText}`);
56
  }
57
 
58
  return res.arrayBuffer();
src/utils/versionUtils.ts CHANGED
@@ -25,15 +25,19 @@ interface DatasetInfo {
25
 
26
  /**
27
  * Fetches dataset information from the main revision
 
28
  */
29
  export async function getDatasetInfo(repoId: string): Promise<DatasetInfo> {
30
  try {
31
  const testUrl = `${DATASET_URL}/${repoId}/resolve/main/meta/info.json`;
32
 
 
 
 
33
  const controller = new AbortController();
34
  const timeoutId = setTimeout(() => controller.abort(), 10000); // 10 second timeout
35
 
36
- const response = await fetch(testUrl, {
37
  method: "GET",
38
  cache: "no-store",
39
  signal: controller.signal
@@ -42,7 +46,8 @@ export async function getDatasetInfo(repoId: string): Promise<DatasetInfo> {
42
  clearTimeout(timeoutId);
43
 
44
  if (!response.ok) {
45
- throw new Error(`Failed to fetch dataset info: ${response.status}`);
 
46
  }
47
 
48
  const data = await response.json();
 
25
 
26
  /**
27
  * Fetches dataset information from the main revision
28
+ * Uses authenticated API proxy to handle private datasets
29
  */
30
  export async function getDatasetInfo(repoId: string): Promise<DatasetInfo> {
31
  try {
32
  const testUrl = `${DATASET_URL}/${repoId}/resolve/main/meta/info.json`;
33
 
34
+ // Use API proxy for authenticated requests
35
+ const proxyUrl = `/api/hf-proxy?url=${encodeURIComponent(testUrl)}`;
36
+
37
  const controller = new AbortController();
38
  const timeoutId = setTimeout(() => controller.abort(), 10000); // 10 second timeout
39
 
40
+ const response = await fetch(proxyUrl, {
41
  method: "GET",
42
  cache: "no-store",
43
  signal: controller.signal
 
46
  clearTimeout(timeoutId);
47
 
48
  if (!response.ok) {
49
+ const errorData = await response.json().catch(() => ({}));
50
+ throw new Error(errorData.error || `Failed to fetch dataset info: ${response.status}`);
51
  }
52
 
53
  const data = await response.json();