File size: 4,801 Bytes
abd54b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9b45fba
abd54b8
9b45fba
abd54b8
 
 
 
 
 
9b45fba
 
 
 
 
 
 
 
 
 
 
 
 
abd54b8
00a956c
 
 
 
 
 
 
 
abd54b8
6e4988e
abd54b8
00a956c
 
 
e612ce5
00a956c
 
 
 
6e4988e
e612ce5
abd54b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cbae1bb
 
 
 
 
 
 
 
 
 
 
 
 
 
abd54b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import { NextRequest, NextResponse } from 'next/server';

/**
 * Generic proxy API route for Hugging Face requests with authentication
 * Handles both JSON and binary (Parquet) files
 * Supports GET and HEAD methods
 */
export async function GET(request: NextRequest) {
  return handleRequest(request, 'GET');
}

export async function HEAD(request: NextRequest) {
  return handleRequest(request, 'HEAD');
}

async function handleRequest(request: NextRequest, method: 'GET' | 'HEAD') {
  const searchParams = request.nextUrl.searchParams;
  const urlParam = searchParams.get('url');
  
  if (!urlParam) {
    return NextResponse.json(
      { error: 'url parameter is required' },
      { status: 400 }
    );
  }

  // Decode and validate URL
  let url: string;
  try {
    url = decodeURIComponent(urlParam);
    // Validate it's a proper URL
    new URL(url); // This will throw if invalid
  } catch (error) {
    return NextResponse.json(
      { error: `Invalid URL: ${urlParam}. ${error instanceof Error ? error.message : 'Failed to parse URL'}` },
      { status: 400 }
    );
  }

  // Get token from environment variable (set in Hugging Face Space secrets)
  // Try multiple possible variable names that Hugging Face Spaces might use
  const token = 
    process.env.HF_TOKEN || 
    process.env.HUGGINGFACE_TOKEN || 
    process.env.HF_API_TOKEN ||
    process.env.HUGGING_FACE_HUB_TOKEN ||
    // Also check if it's available in the request headers (for debugging)
    request.headers.get('x-hf-token') || null;
  
  // Log token status for debugging (don't log actual token value)
  if (!token) {
    const envKeys = Object.keys(process.env).filter(k => 
      k.includes('HF') || k.includes('TOKEN') || k.includes('HUGGING')
    );
    console.error('HF_TOKEN not found!');
    console.error('Checked: HF_TOKEN, HUGGINGFACE_TOKEN, HF_API_TOKEN, HUGGING_FACE_HUB_TOKEN');
    console.error('Available env vars with HF/TOKEN:', envKeys.length > 0 ? envKeys.join(', ') : 'NONE');
    console.error('⚠️  Requests to private repos will fail with 401');
    console.error('💡 Make sure HF_TOKEN secret is added in Space Settings → Variables and secrets');
  } else {
    console.log('HF_TOKEN found (length:', token.length, 'chars, starts with:', token.substring(0, 5) + '...)');
  }

  try {
    const headers: HeadersInit = {
      'Cache-Control': 'no-store',
    };
    
    // Add authentication if token is available
    if (token) {
      headers['Authorization'] = `Bearer ${token}`;
    }
    
    const response = await fetch(url, {
      method,
      headers,
      cache: 'no-store',
    });

    if (!response.ok) {
      // Return error with status code
      const errorText = await response.text().catch(() => 'Unknown error');
      
      // Log detailed error for debugging
      console.error(`Proxy fetch failed: ${response.status} ${response.statusText}`);
      console.error(`URL: ${url}`);
      console.error(`Token present: ${!!token}`);
      console.error(`Token length: ${token ? token.length : 0}`);
      if (response.status === 401) {
        console.error('401 Unauthorized - Possible causes:');
        console.error('  1. Token has no READ permissions');
        console.error('  2. Dataset is gated and token/user has no access');
        console.error('  3. Token is invalid or expired');
        console.error(`  4. Token used: ${token ? token.substring(0, 10) + '...' : 'NONE'}`);
      }
      
      return NextResponse.json(
        { error: `Failed to fetch: ${response.status} ${response.statusText}`, details: errorText },
        { status: response.status }
      );
    }

    // For HEAD requests, just return status without body
    if (method === 'HEAD') {
      return new NextResponse(null, {
        status: response.status,
        headers: {
          'Content-Type': response.headers.get('content-type') || '',
          'Content-Length': response.headers.get('content-length') || '0',
        },
      });
    }

    // Check content type to determine if it's binary or JSON
    const contentType = response.headers.get('content-type') || '';
    
    if (contentType.includes('application/json')) {
      // Return JSON
      const data = await response.json();
      return NextResponse.json(data);
    } else {
      // Return binary data (for Parquet files, videos, etc.)
      const arrayBuffer = await response.arrayBuffer();
      return new NextResponse(arrayBuffer, {
        headers: {
          'Content-Type': contentType,
          'Content-Length': arrayBuffer.byteLength.toString(),
        },
      });
    }
  } catch (error) {
    console.error('Proxy error:', error);
    return NextResponse.json(
      { error: error instanceof Error ? error.message : 'Unknown error' },
      { status: 500 }
    );
  }
}