| <!DOCTYPE html> |
| <html lang="en"> |
| <head> |
| <meta charset="UTF-8"> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| <title>DuckDB WASM - HF Parquet CORS Test</title> |
| <style> |
| body { font-family: system-ui, sans-serif; max-width: 800px; margin: 2rem auto; padding: 0 1rem; } |
| h1 { font-size: 1.5rem; } |
| .test { border: 1px solid #ddd; border-radius: 8px; padding: 1rem; margin: 1rem 0; } |
| .test h3 { margin-top: 0; font-size: 0.95rem; word-break: break-all; } |
| .pending { border-left: 4px solid #888; } |
| .success { border-left: 4px solid #22c55e; background: #f0fdf4; } |
| .failure { border-left: 4px solid #ef4444; background: #fef2f2; } |
| .status { font-weight: bold; } |
| pre { background: #f1f5f9; padding: 0.5rem; border-radius: 4px; overflow-x: auto; font-size: 0.85rem; } |
| #init-status { padding: 0.5rem; margin: 1rem 0; } |
| button { padding: 0.5rem 1rem; border-radius: 4px; border: 1px solid #ddd; cursor: pointer; margin: 0.25rem; } |
| button:hover { background: #f1f5f9; } |
| .info { background: #eff6ff; border: 1px solid #bfdbfe; border-radius: 8px; padding: 1rem; margin: 1rem 0; font-size: 0.9rem; } |
| </style> |
| </head> |
| <body> |
| <h1>DuckDB WASM - HF Parquet CORS Test</h1> |
|
|
| <div class="info"> |
| <strong>Page origin:</strong> <span id="page-origin"></span><br> |
| <strong>DuckDB WASM version:</strong> 1.30.0 (same as embedding-atlas)<br> |
| <strong>forceFullHTTPReads:</strong> true (same as embedding-atlas) |
| </div> |
|
|
| <div id="init-status">Initializing DuckDB WASM...</div> |
|
|
| <div> |
| <button onclick="runAllTests()">Run All Tests</button> |
| <button onclick="runCustomTest()">Test Custom URL</button> |
| <input type="text" id="custom-url" placeholder="Enter parquet URL..." style="width: 400px; padding: 0.5rem; border: 1px solid #ddd; border-radius: 4px;"> |
| </div> |
|
|
| <div id="tests"></div> |
|
|
| <script type="module"> |
| import * as duckdb from 'https://cdn.jsdelivr.net/npm/@duckdb/duckdb-wasm@1.30.0/+esm'; |
| |
| document.getElementById('page-origin').textContent = window.location.origin; |
| |
| let db, connection; |
| |
| |
| const TEST_URLS = [ |
| { |
| label: "HF resolve URL (small dataset, no ?download=true)", |
| url: "https://huggingface.co/datasets/nyu-mll/glue/resolve/refs%2Fconvert%2Fparquet/cola/test/0000.parquet" |
| }, |
| { |
| label: "HF resolve URL (small dataset, with ?download=true)", |
| url: "https://huggingface.co/datasets/nyu-mll/glue/resolve/refs%2Fconvert%2Fparquet/cola/test/0000.parquet?download=true" |
| }, |
| { |
| label: "HF datasets-server parquet URL", |
| url: "https://datasets-server.huggingface.co/rows?dataset=nyu-mll/glue&config=cola&split=test&offset=0&length=10" |
| }, |
| ]; |
| |
| async function initDuckDB() { |
| try { |
| const JSDELIVR_BUNDLES = duckdb.getJsDelivrBundles(); |
| const bundle = await duckdb.selectBundle(JSDELIVR_BUNDLES); |
| |
| const worker_url = URL.createObjectURL( |
| new Blob([`importScripts("${bundle.mainWorker}");`], { type: 'text/javascript' }) |
| ); |
| const worker = new Worker(worker_url); |
| const logger = new duckdb.ConsoleLogger(); |
| db = new duckdb.AsyncDuckDB(logger, worker); |
| await db.instantiate(bundle.mainModule, bundle.pthreadWorker); |
| await db.open({ |
| filesystem: { |
| forceFullHTTPReads: true, |
| }, |
| }); |
| connection = await db.connect(); |
| |
| document.getElementById('init-status').innerHTML = '<span style="color: green; font-weight: bold;">DuckDB WASM initialized successfully</span>'; |
| return true; |
| } catch (e) { |
| document.getElementById('init-status').innerHTML = `<span style="color: red; font-weight: bold;">DuckDB init failed: ${e.message}</span>`; |
| console.error('DuckDB init error:', e); |
| return false; |
| } |
| } |
| |
| async function testParquetLoad(url, label) { |
| const testDiv = document.createElement('div'); |
| testDiv.className = 'test pending'; |
| testDiv.innerHTML = `<h3>${label}</h3><pre>${url}</pre><div class="status">Running...</div>`; |
| document.getElementById('tests').appendChild(testDiv); |
| |
| const startTime = performance.now(); |
| |
| try { |
| |
| let fetchResult = 'not tested'; |
| try { |
| const resp = await fetch(url, { method: 'HEAD', mode: 'cors' }); |
| const corsHeader = resp.headers.get('access-control-allow-origin'); |
| fetchResult = `fetch HEAD: ${resp.status} ${resp.statusText}, CORS: ${corsHeader || 'not set'}`; |
| } catch (fetchErr) { |
| fetchResult = `fetch HEAD failed: ${fetchErr.message}`; |
| } |
| |
| |
| const tableName = 'test_' + Math.random().toString(36).slice(2, 8); |
| const result = await connection.query(`SELECT COUNT(*) as cnt FROM read_parquet('${url}')`); |
| const count = result.get(0).cnt; |
| const elapsed = ((performance.now() - startTime) / 1000).toFixed(2); |
| |
| testDiv.className = 'test success'; |
| testDiv.innerHTML = ` |
| <h3>${label}</h3> |
| <pre>${url}</pre> |
| <div class="status" style="color: green;">SUCCESS - ${count} rows loaded in ${elapsed}s</div> |
| <div style="font-size: 0.85rem; color: #666; margin-top: 0.5rem;">${fetchResult}</div> |
| `; |
| } catch (e) { |
| const elapsed = ((performance.now() - startTime) / 1000).toFixed(2); |
| |
| |
| let fetchResult = 'not tested'; |
| try { |
| const resp = await fetch(url, { method: 'HEAD', mode: 'cors' }); |
| const corsHeader = resp.headers.get('access-control-allow-origin'); |
| fetchResult = `fetch HEAD: ${resp.status} ${resp.statusText}, CORS: ${corsHeader || 'not set'}`; |
| } catch (fetchErr) { |
| fetchResult = `fetch HEAD also failed: ${fetchErr.message}`; |
| } |
| |
| testDiv.className = 'test failure'; |
| testDiv.innerHTML = ` |
| <h3>${label}</h3> |
| <pre>${url}</pre> |
| <div class="status" style="color: red;">FAILED after ${elapsed}s</div> |
| <pre style="color: red;">${e.message || e.toString()}</pre> |
| <div style="font-size: 0.85rem; color: #666; margin-top: 0.5rem;">${fetchResult}</div> |
| `; |
| console.error(`Test failed for ${url}:`, e); |
| } |
| } |
| |
| window.runAllTests = async function() { |
| document.getElementById('tests').innerHTML = ''; |
| if (!connection) { |
| const ok = await initDuckDB(); |
| if (!ok) return; |
| } |
| for (const test of TEST_URLS) { |
| await testParquetLoad(test.url, test.label); |
| } |
| }; |
| |
| window.runCustomTest = async function() { |
| const url = document.getElementById('custom-url').value.trim(); |
| if (!url) { alert('Enter a URL first'); return; } |
| if (!connection) { |
| const ok = await initDuckDB(); |
| if (!ok) return; |
| } |
| await testParquetLoad(url, 'Custom URL'); |
| }; |
| |
| |
| initDuckDB(); |
| </script> |
| </body> |
| </html> |
|
|