Spaces:

Javedalam
/

transformersjs-webgpu-captioning

Running

App Files Files Community

Javedalam commited on Aug 31, 2025

Commit

50dd43a

verified ·

1 Parent(s): 7ac754e

Update index.html

Browse files

Files changed (1) hide show

index.html +10 -27

index.html CHANGED Viewed

@@ -25,7 +25,7 @@
     <h4>Output</h4>
     <div id="log" class="log">Loading model…</div>
     <p class="muted">
-      Model: <code>Xenova/blip-image-captioning-base</code><br />
       Backend: <span id="backend">…</span>
     </p>
   </div>
@@ -38,37 +38,19 @@
     const imgEl  = document.getElementById('preview');
     const backendEl = document.getElementById('backend');
-    // 1) WebGPU probe (will use WASM if unavailable)
     const hasWebGPU = 'gpu' in navigator;
     const device = hasWebGPU ? 'webgpu' : 'wasm';
     backendEl.textContent = device.toUpperCase();
-    envEl.textContent = hasWebGPU
-      ? '✅ WebGPU detected. Using GPU when possible (falls back to FP32 automatically if no shader-f16).'
-      : '⚠️ No WebGPU, falling back to WASM (CPU).';
-    // Optional: show if shader-f16 exists (info only)
-    if (hasWebGPU) {
-      try {
-        const adapter = await navigator.gpu.requestAdapter();
-        if (adapter && !adapter.features.has('shader-f16')) {
-          envEl.textContent += ' (no shader-f16; running in FP32)';
-        }
-      } catch { /* ignore */ }
-    }
-    // 2) Load Transformers.js v3 from CDN
-    let pipeline;
-    try {
-      ({ pipeline } = await import('https://cdn.jsdelivr.net/npm/@huggingface/transformers@3'));
-    } catch (e) {
-      logEl.textContent = 'Failed to load Transformers.js: ' + e;
-      throw e;
-    }
-    // 3) Build the captioning pipeline (FP16 not required)
     let captioner;
     try {
-      captioner = await pipeline('image-to-text', 'Xenova/blip-image-captioning-base', { device });
       logEl.textContent = `Model ready · device=${device}`;
       runBtn.disabled = false;
     } catch (e) {
@@ -76,7 +58,7 @@
       console.error(e);
     }
-    // 4) Preview selected image
     let imgURL = null;
     fileEl.addEventListener('change', () => {
       if (imgURL) URL.revokeObjectURL(imgURL);
@@ -86,7 +68,7 @@
       imgEl.src = imgURL;
     });
-    // 5) Run captioning
     runBtn.addEventListener('click', async () => {
       if (!captioner) return;
       if (!imgURL) { logEl.textContent = 'Pick an image first.'; return; }
@@ -102,3 +84,4 @@
   </script>
 </body>
 </html>

     <h4>Output</h4>
     <div id="log" class="log">Loading model…</div>
     <p class="muted">
+      Model: <code>Xenova/vit-gpt2-image-captioning</code><br />
       Backend: <span id="backend">…</span>
     </p>
   </div>
     const imgEl  = document.getElementById('preview');
     const backendEl = document.getElementById('backend');
+    // Prefer WebGPU; fall back to WASM
     const hasWebGPU = 'gpu' in navigator;
     const device = hasWebGPU ? 'webgpu' : 'wasm';
     backendEl.textContent = device.toUpperCase();
+    envEl.textContent = hasWebGPU ? '✅ WebGPU detected' : '⚠️ Using WASM (CPU)';
+    // Load Transformers.js v3
+    const { pipeline } = await import('https://cdn.jsdelivr.net/npm/@huggingface/transformers@3');
+    // Build captioning pipeline with a model that fetches cleanly
     let captioner;
     try {
+      captioner = await pipeline('image-to-text', 'Xenova/vit-gpt2-image-captioning', { device });
       logEl.textContent = `Model ready · device=${device}`;
       runBtn.disabled = false;
     } catch (e) {
       console.error(e);
     }
+    // Preview selected image
     let imgURL = null;
     fileEl.addEventListener('change', () => {
       if (imgURL) URL.revokeObjectURL(imgURL);
       imgEl.src = imgURL;
     });
+    // Run captioning
     runBtn.addEventListener('click', async () => {
       if (!captioner) return;
       if (!imgURL) { logEl.textContent = 'Pick an image first.'; return; }
   </script>
 </body>
 </html>