davanstrien HF Staff commited on
Commit
bde3673
·
verified ·
1 Parent(s): 392fced

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. README.md +9 -5
  2. index.html +174 -18
README.md CHANGED
@@ -1,10 +1,14 @@
1
  ---
2
- title: Duckdb Wasm Cors Test
3
- emoji: 🏃
4
- colorFrom: yellow
5
- colorTo: gray
6
  sdk: static
7
  pinned: false
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
1
  ---
2
+ title: DuckDB WASM HF Parquet CORS Test
3
+ emoji: 🧪
4
+ colorFrom: blue
5
+ colorTo: green
6
  sdk: static
7
  pinned: false
8
  ---
9
 
10
+ # DuckDB WASM - HF Parquet CORS Test
11
+
12
+ Minimal test to check if DuckDB WASM can load parquet files from HuggingFace dataset URLs when running on an HF Space.
13
+
14
+ Tests multiple URL patterns to determine which (if any) work for cross-origin parquet loading.
index.html CHANGED
@@ -1,19 +1,175 @@
1
- <!doctype html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
- <link rel="stylesheet" href="style.css" />
8
- </head>
9
- <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
18
- </body>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  </html>
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>DuckDB WASM - HF Parquet CORS Test</title>
7
+ <style>
8
+ body { font-family: system-ui, sans-serif; max-width: 800px; margin: 2rem auto; padding: 0 1rem; }
9
+ h1 { font-size: 1.5rem; }
10
+ .test { border: 1px solid #ddd; border-radius: 8px; padding: 1rem; margin: 1rem 0; }
11
+ .test h3 { margin-top: 0; font-size: 0.95rem; word-break: break-all; }
12
+ .pending { border-left: 4px solid #888; }
13
+ .success { border-left: 4px solid #22c55e; background: #f0fdf4; }
14
+ .failure { border-left: 4px solid #ef4444; background: #fef2f2; }
15
+ .status { font-weight: bold; }
16
+ pre { background: #f1f5f9; padding: 0.5rem; border-radius: 4px; overflow-x: auto; font-size: 0.85rem; }
17
+ #init-status { padding: 0.5rem; margin: 1rem 0; }
18
+ button { padding: 0.5rem 1rem; border-radius: 4px; border: 1px solid #ddd; cursor: pointer; margin: 0.25rem; }
19
+ button:hover { background: #f1f5f9; }
20
+ .info { background: #eff6ff; border: 1px solid #bfdbfe; border-radius: 8px; padding: 1rem; margin: 1rem 0; font-size: 0.9rem; }
21
+ </style>
22
+ </head>
23
+ <body>
24
+ <h1>DuckDB WASM - HF Parquet CORS Test</h1>
25
+
26
+ <div class="info">
27
+ <strong>Page origin:</strong> <span id="page-origin"></span><br>
28
+ <strong>DuckDB WASM version:</strong> 1.30.0 (same as embedding-atlas)<br>
29
+ <strong>forceFullHTTPReads:</strong> true (same as embedding-atlas)
30
+ </div>
31
+
32
+ <div id="init-status">Initializing DuckDB WASM...</div>
33
+
34
+ <div>
35
+ <button onclick="runAllTests()">Run All Tests</button>
36
+ <button onclick="runCustomTest()">Test Custom URL</button>
37
+ <input type="text" id="custom-url" placeholder="Enter parquet URL..." style="width: 400px; padding: 0.5rem; border: 1px solid #ddd; border-radius: 4px;">
38
+ </div>
39
+
40
+ <div id="tests"></div>
41
+
42
+ <script type="module">
43
+ import * as duckdb from 'https://cdn.jsdelivr.net/npm/@duckdb/duckdb-wasm@1.30.0/+esm';
44
+
45
+ document.getElementById('page-origin').textContent = window.location.origin;
46
+
47
+ let db, connection;
48
+
49
+ // Test URLs - different patterns for HF parquet access
50
+ const TEST_URLS = [
51
+ {
52
+ label: "HF resolve URL (small dataset, no ?download=true)",
53
+ url: "https://huggingface.co/datasets/nyu-mll/glue/resolve/refs%2Fconvert%2Fparquet/cola/test/0000.parquet"
54
+ },
55
+ {
56
+ label: "HF resolve URL (small dataset, with ?download=true)",
57
+ url: "https://huggingface.co/datasets/nyu-mll/glue/resolve/refs%2Fconvert%2Fparquet/cola/test/0000.parquet?download=true"
58
+ },
59
+ {
60
+ label: "HF datasets-server parquet URL",
61
+ url: "https://datasets-server.huggingface.co/rows?dataset=nyu-mll/glue&config=cola&split=test&offset=0&length=10"
62
+ },
63
+ ];
64
+
65
+ async function initDuckDB() {
66
+ try {
67
+ const JSDELIVR_BUNDLES = duckdb.getJsDelivrBundles();
68
+ const bundle = await duckdb.selectBundle(JSDELIVR_BUNDLES);
69
+
70
+ const worker_url = URL.createObjectURL(
71
+ new Blob([`importScripts("${bundle.mainWorker}");`], { type: 'text/javascript' })
72
+ );
73
+ const worker = new Worker(worker_url);
74
+ const logger = new duckdb.ConsoleLogger();
75
+ db = new duckdb.AsyncDuckDB(logger, worker);
76
+ await db.instantiate(bundle.mainModule, bundle.pthreadWorker);
77
+ await db.open({
78
+ filesystem: {
79
+ forceFullHTTPReads: true, // Match embedding-atlas config
80
+ },
81
+ });
82
+ connection = await db.connect();
83
+
84
+ document.getElementById('init-status').innerHTML = '<span style="color: green; font-weight: bold;">DuckDB WASM initialized successfully</span>';
85
+ return true;
86
+ } catch (e) {
87
+ document.getElementById('init-status').innerHTML = `<span style="color: red; font-weight: bold;">DuckDB init failed: ${e.message}</span>`;
88
+ console.error('DuckDB init error:', e);
89
+ return false;
90
+ }
91
+ }
92
+
93
+ async function testParquetLoad(url, label) {
94
+ const testDiv = document.createElement('div');
95
+ testDiv.className = 'test pending';
96
+ testDiv.innerHTML = `<h3>${label}</h3><pre>${url}</pre><div class="status">Running...</div>`;
97
+ document.getElementById('tests').appendChild(testDiv);
98
+
99
+ const startTime = performance.now();
100
+
101
+ try {
102
+ // First, test basic fetch CORS (what the browser's fetch API sees)
103
+ let fetchResult = 'not tested';
104
+ try {
105
+ const resp = await fetch(url, { method: 'HEAD', mode: 'cors' });
106
+ const corsHeader = resp.headers.get('access-control-allow-origin');
107
+ fetchResult = `fetch HEAD: ${resp.status} ${resp.statusText}, CORS: ${corsHeader || 'not set'}`;
108
+ } catch (fetchErr) {
109
+ fetchResult = `fetch HEAD failed: ${fetchErr.message}`;
110
+ }
111
+
112
+ // Now test DuckDB WASM read_parquet
113
+ const tableName = 'test_' + Math.random().toString(36).slice(2, 8);
114
+ const result = await connection.query(`SELECT COUNT(*) as cnt FROM read_parquet('${url}')`);
115
+ const count = result.get(0).cnt;
116
+ const elapsed = ((performance.now() - startTime) / 1000).toFixed(2);
117
+
118
+ testDiv.className = 'test success';
119
+ testDiv.innerHTML = `
120
+ <h3>${label}</h3>
121
+ <pre>${url}</pre>
122
+ <div class="status" style="color: green;">SUCCESS - ${count} rows loaded in ${elapsed}s</div>
123
+ <div style="font-size: 0.85rem; color: #666; margin-top: 0.5rem;">${fetchResult}</div>
124
+ `;
125
+ } catch (e) {
126
+ const elapsed = ((performance.now() - startTime) / 1000).toFixed(2);
127
+
128
+ // Still try fetch to understand CORS
129
+ let fetchResult = 'not tested';
130
+ try {
131
+ const resp = await fetch(url, { method: 'HEAD', mode: 'cors' });
132
+ const corsHeader = resp.headers.get('access-control-allow-origin');
133
+ fetchResult = `fetch HEAD: ${resp.status} ${resp.statusText}, CORS: ${corsHeader || 'not set'}`;
134
+ } catch (fetchErr) {
135
+ fetchResult = `fetch HEAD also failed: ${fetchErr.message}`;
136
+ }
137
+
138
+ testDiv.className = 'test failure';
139
+ testDiv.innerHTML = `
140
+ <h3>${label}</h3>
141
+ <pre>${url}</pre>
142
+ <div class="status" style="color: red;">FAILED after ${elapsed}s</div>
143
+ <pre style="color: red;">${e.message || e.toString()}</pre>
144
+ <div style="font-size: 0.85rem; color: #666; margin-top: 0.5rem;">${fetchResult}</div>
145
+ `;
146
+ console.error(`Test failed for ${url}:`, e);
147
+ }
148
+ }
149
+
150
+ window.runAllTests = async function() {
151
+ document.getElementById('tests').innerHTML = '';
152
+ if (!connection) {
153
+ const ok = await initDuckDB();
154
+ if (!ok) return;
155
+ }
156
+ for (const test of TEST_URLS) {
157
+ await testParquetLoad(test.url, test.label);
158
+ }
159
+ };
160
+
161
+ window.runCustomTest = async function() {
162
+ const url = document.getElementById('custom-url').value.trim();
163
+ if (!url) { alert('Enter a URL first'); return; }
164
+ if (!connection) {
165
+ const ok = await initDuckDB();
166
+ if (!ok) return;
167
+ }
168
+ await testParquetLoad(url, 'Custom URL');
169
+ };
170
+
171
+ // Auto-init
172
+ initDuckDB();
173
+ </script>
174
+ </body>
175
  </html>