Javedalam commited on
Commit
8c05aeb
·
verified ·
1 Parent(s): de5ca6f

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +9 -21
index.html CHANGED
@@ -3,7 +3,7 @@
3
  <head>
4
  <meta charset="utf-8" />
5
  <meta name="viewport" content="width=device-width,initial-scale=1" />
6
- <title>WebGPU · Transformers.js · Better Image Captioning</title>
7
  <style>
8
  body { font: 16px/1.45 system-ui, sans-serif; margin: 24px auto; max-width: 900px; padding: 0 16px; }
9
  .card { border:1px solid #4443; border-radius:12px; padding:16px; }
@@ -25,7 +25,7 @@
25
  <h4>Output</h4>
26
  <div id="log" class="log">Loading model…</div>
27
  <p class="muted">
28
- Model: <code>Xenova/blip-image-captioning-large</code><br />
29
  Backend: <span id="backend">…</span>
30
  </p>
31
  </div>
@@ -38,37 +38,23 @@
38
  const imgEl = document.getElementById('preview');
39
  const backendEl = document.getElementById('backend');
40
 
41
- // Prefer WebGPU; fall back to WASM
42
  const hasWebGPU = 'gpu' in navigator;
43
  let device = hasWebGPU ? 'webgpu' : 'wasm';
44
  backendEl.textContent = device.toUpperCase();
45
  envEl.textContent = hasWebGPU ? '✅ WebGPU detected (will fallback if slow)…' : '⚠️ Using WASM (CPU).';
46
 
47
- // --- SINGLE FIX: force ?download=1 on HF "resolve" URLs to avoid 401s ---
48
- const _fetch = window.fetch.bind(window);
49
- window.fetch = (url, opts) => {
50
- try {
51
- const u = new URL(url);
52
- if (u.hostname === 'huggingface.co' && u.pathname.includes('/resolve/')) {
53
- if (!u.searchParams.has('download')) u.searchParams.set('download', '1');
54
- url = u.toString();
55
- }
56
- } catch {}
57
- return _fetch(url, opts);
58
- };
59
- // -----------------------------------------------------------------------
60
-
61
  // Load Transformers.js v3
62
  const { pipeline } = await import('https://cdn.jsdelivr.net/npm/@huggingface/transformers@3');
63
 
64
  // Watchdog: if WebGPU load takes too long, retry on WASM
65
- const LOAD_TIMEOUT_MS = 30000;
66
  let captioner;
67
 
68
  async function buildPipeline(targetDevice) {
69
  logEl.textContent = `Loading model… device=${targetDevice}`;
70
  backendEl.textContent = targetDevice.toUpperCase();
71
- return await pipeline('image-to-text', 'Xenova/blip-image-captioning-large', { device: targetDevice });
72
  }
73
 
74
  try {
@@ -80,7 +66,8 @@
80
  captioner = await buildPipeline('wasm');
81
  }
82
  } catch (e) {
83
- if (hasWebGPU && (e.message === 'webgpu-timeout' || String(e).includes('webgpu'))) {
 
84
  envEl.textContent = '⚠️ WebGPU load slow/failed → falling back to WASM.';
85
  device = 'wasm';
86
  captioner = await buildPipeline('wasm');
@@ -103,7 +90,7 @@
103
  imgEl.src = imgURL;
104
  });
105
 
106
- // Run captioning (beam search for better quality)
107
  runBtn.addEventListener('click', async () => {
108
  if (!captioner) return;
109
  if (!imgURL) { logEl.textContent = 'Pick an image first.'; return; }
@@ -126,3 +113,4 @@
126
  </html>
127
 
128
 
 
 
3
  <head>
4
  <meta charset="utf-8" />
5
  <meta name="viewport" content="width=device-width,initial-scale=1" />
6
+ <title>WebGPU · Transformers.js · Image Captioning (Works)</title>
7
  <style>
8
  body { font: 16px/1.45 system-ui, sans-serif; margin: 24px auto; max-width: 900px; padding: 0 16px; }
9
  .card { border:1px solid #4443; border-radius:12px; padding:16px; }
 
25
  <h4>Output</h4>
26
  <div id="log" class="log">Loading model…</div>
27
  <p class="muted">
28
+ Model: <code>Xenova/vit-gpt2-image-captioning</code><br />
29
  Backend: <span id="backend">…</span>
30
  </p>
31
  </div>
 
38
  const imgEl = document.getElementById('preview');
39
  const backendEl = document.getElementById('backend');
40
 
41
+ // Prefer WebGPU; fall back to WASM if unavailable/slow
42
  const hasWebGPU = 'gpu' in navigator;
43
  let device = hasWebGPU ? 'webgpu' : 'wasm';
44
  backendEl.textContent = device.toUpperCase();
45
  envEl.textContent = hasWebGPU ? '✅ WebGPU detected (will fallback if slow)…' : '⚠️ Using WASM (CPU).';
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  // Load Transformers.js v3
48
  const { pipeline } = await import('https://cdn.jsdelivr.net/npm/@huggingface/transformers@3');
49
 
50
  // Watchdog: if WebGPU load takes too long, retry on WASM
51
+ const LOAD_TIMEOUT_MS = 25000;
52
  let captioner;
53
 
54
  async function buildPipeline(targetDevice) {
55
  logEl.textContent = `Loading model… device=${targetDevice}`;
56
  backendEl.textContent = targetDevice.toUpperCase();
57
+ return await pipeline('image-to-text', 'Xenova/vit-gpt2-image-captioning', { device: targetDevice });
58
  }
59
 
60
  try {
 
66
  captioner = await buildPipeline('wasm');
67
  }
68
  } catch (e) {
69
+ // Fallback once to WASM if WebGPU fails or stalls
70
+ if (hasWebGPU && (e.message === 'webgpu-timeout' || String(e).toLowerCase().includes('webgpu'))) {
71
  envEl.textContent = '⚠️ WebGPU load slow/failed → falling back to WASM.';
72
  device = 'wasm';
73
  captioner = await buildPipeline('wasm');
 
90
  imgEl.src = imgURL;
91
  });
92
 
93
+ // Run captioning (beam search for better captions)
94
  runBtn.addEventListener('click', async () => {
95
  if (!captioner) return;
96
  if (!imgURL) { logEl.textContent = 'Pick an image first.'; return; }
 
113
  </html>
114
 
115
 
116
+