import { NextRequest, NextResponse } from 'next/server'; /** * Generic proxy API route for Hugging Face requests with authentication * Handles both JSON and binary (Parquet) files * Supports GET and HEAD methods */ export async function GET(request: NextRequest) { return handleRequest(request, 'GET'); } export async function HEAD(request: NextRequest) { return handleRequest(request, 'HEAD'); } async function handleRequest(request: NextRequest, method: 'GET' | 'HEAD') { const searchParams = request.nextUrl.searchParams; const urlParam = searchParams.get('url'); if (!urlParam) { return NextResponse.json( { error: 'url parameter is required' }, { status: 400 } ); } // Decode and validate URL let url: string; try { url = decodeURIComponent(urlParam); // Validate it's a proper URL new URL(url); // This will throw if invalid } catch (error) { return NextResponse.json( { error: `Invalid URL: ${urlParam}. ${error instanceof Error ? error.message : 'Failed to parse URL'}` }, { status: 400 } ); } // Get token from environment variable (set in Hugging Face Space secrets) // Try multiple possible variable names that Hugging Face Spaces might use const token = process.env.HF_TOKEN || process.env.HUGGINGFACE_TOKEN || process.env.HF_API_TOKEN || process.env.HUGGING_FACE_HUB_TOKEN || // Also check if it's available in the request headers (for debugging) request.headers.get('x-hf-token') || null; // Log token status for debugging (don't log actual token value) if (!token) { const envKeys = Object.keys(process.env).filter(k => k.includes('HF') || k.includes('TOKEN') || k.includes('HUGGING') ); console.error('HF_TOKEN not found!'); console.error('Checked: HF_TOKEN, HUGGINGFACE_TOKEN, HF_API_TOKEN, HUGGING_FACE_HUB_TOKEN'); console.error('Available env vars with HF/TOKEN:', envKeys.length > 0 ? envKeys.join(', ') : 'NONE'); console.error('⚠️ Requests to private repos will fail with 401'); console.error('💡 Make sure HF_TOKEN secret is added in Space Settings → Variables and secrets'); } else { console.log('HF_TOKEN found (length:', token.length, 'chars, starts with:', token.substring(0, 5) + '...)'); } try { const headers: HeadersInit = { 'Cache-Control': 'no-store', }; // Add authentication if token is available if (token) { headers['Authorization'] = `Bearer ${token}`; } const response = await fetch(url, { method, headers, cache: 'no-store', }); if (!response.ok) { // Return error with status code const errorText = await response.text().catch(() => 'Unknown error'); // Log detailed error for debugging console.error(`Proxy fetch failed: ${response.status} ${response.statusText}`); console.error(`URL: ${url}`); console.error(`Token present: ${!!token}`); console.error(`Token length: ${token ? token.length : 0}`); if (response.status === 401) { console.error('401 Unauthorized - Possible causes:'); console.error(' 1. Token has no READ permissions'); console.error(' 2. Dataset is gated and token/user has no access'); console.error(' 3. Token is invalid or expired'); console.error(` 4. Token used: ${token ? token.substring(0, 10) + '...' : 'NONE'}`); } return NextResponse.json( { error: `Failed to fetch: ${response.status} ${response.statusText}`, details: errorText }, { status: response.status } ); } // For HEAD requests, just return status without body if (method === 'HEAD') { return new NextResponse(null, { status: response.status, headers: { 'Content-Type': response.headers.get('content-type') || '', 'Content-Length': response.headers.get('content-length') || '0', }, }); } // Check content type to determine if it's binary or JSON const contentType = response.headers.get('content-type') || ''; if (contentType.includes('application/json')) { // Return JSON const data = await response.json(); return NextResponse.json(data); } else { // Return binary data (for Parquet files, videos, etc.) const arrayBuffer = await response.arrayBuffer(); return new NextResponse(arrayBuffer, { headers: { 'Content-Type': contentType, 'Content-Length': arrayBuffer.byteLength.toString(), }, }); } } catch (error) { console.error('Proxy error:', error); return NextResponse.json( { error: error instanceof Error ? error.message : 'Unknown error' }, { status: 500 } ); } }