Spaces:

Javedalam
/

transformersjs-webgpu-captioning

Running

App Files Files Community

Javedalam commited on Aug 31, 2025

Commit

7ac754e

verified ·

1 Parent(s): ca67b71

Update index.html

Browse files

Files changed (1) hide show

index.html +101 -16

index.html CHANGED Viewed

@@ -1,19 +1,104 @@
 <!doctype html>
 <html>
-	<head>
-		<meta charset="utf-8" />
-		<meta name="viewport" content="width=device-width" />
-		<title>My static Space</title>
-		<link rel="stylesheet" href="style.css" />
-	</head>
-	<body>
-		<div class="card">
-			<h1>Welcome to your static Space!</h1>
-			<p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
-			<p>
-				Also don't forget to check the
-				<a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
-			</p>
-		</div>
-	</body>
 </html>

 <!doctype html>
 <html>
+<head>
+  <meta charset="utf-8" />
+  <meta name="viewport" content="width=device-width,initial-scale=1" />
+  <title>WebGPU · Transformers.js · Image Captioning</title>
+  <style>
+    body { font: 16px/1.45 system-ui, sans-serif; margin: 24px auto; max-width: 900px; padding: 0 16px; }
+    .card { border:1px solid #4443; border-radius:12px; padding:16px; }
+    .log { white-space:pre-wrap; background:#111; color:#0f0; padding:12px; border-radius:8px; min-height:80px; }
+    img { max-width:100%; border-radius:8px; margin-top:10px; }
+    .muted { opacity:.75; font-size:14px; }
+    button,input { font:inherit; }
+  </style>
+</head>
+<body>
+  <h2>Image → Text in your browser (Transformers.js + WebGPU)</h2>
+  <p id="env">Probing environment…</p>
+  <div class="card">
+    <h3>Caption an image (file upload)</h3>
+    <input id="file" type="file" accept="image/*" />
+    <button id="run" disabled>Caption</button>
+    <div><img id="preview" alt="" /></div>
+    <h4>Output</h4>
+    <div id="log" class="log">Loading model…</div>
+    <p class="muted">
+      Model: <code>Xenova/blip-image-captioning-base</code><br />
+      Backend: <span id="backend">…</span>
+    </p>
+  </div>
+  <script type="module">
+    const envEl = document.getElementById('env');
+    const fileEl = document.getElementById('file');
+    const runBtn = document.getElementById('run');
+    const logEl  = document.getElementById('log');
+    const imgEl  = document.getElementById('preview');
+    const backendEl = document.getElementById('backend');
+    // 1) WebGPU probe (will use WASM if unavailable)
+    const hasWebGPU = 'gpu' in navigator;
+    const device = hasWebGPU ? 'webgpu' : 'wasm';
+    backendEl.textContent = device.toUpperCase();
+    envEl.textContent = hasWebGPU
+      ? '✅ WebGPU detected. Using GPU when possible (falls back to FP32 automatically if no shader-f16).'
+      : '⚠️ No WebGPU, falling back to WASM (CPU).';
+    // Optional: show if shader-f16 exists (info only)
+    if (hasWebGPU) {
+      try {
+        const adapter = await navigator.gpu.requestAdapter();
+        if (adapter && !adapter.features.has('shader-f16')) {
+          envEl.textContent += ' (no shader-f16; running in FP32)';
+        }
+      } catch { /* ignore */ }
+    }
+    // 2) Load Transformers.js v3 from CDN
+    let pipeline;
+    try {
+      ({ pipeline } = await import('https://cdn.jsdelivr.net/npm/@huggingface/transformers@3'));
+    } catch (e) {
+      logEl.textContent = 'Failed to load Transformers.js: ' + e;
+      throw e;
+    }
+    // 3) Build the captioning pipeline (FP16 not required)
+    let captioner;
+    try {
+      captioner = await pipeline('image-to-text', 'Xenova/blip-image-captioning-base', { device });
+      logEl.textContent = `Model ready · device=${device}`;
+      runBtn.disabled = false;
+    } catch (e) {
+      logEl.textContent = 'Error loading model: ' + e;
+      console.error(e);
+    }
+    // 4) Preview selected image
+    let imgURL = null;
+    fileEl.addEventListener('change', () => {
+      if (imgURL) URL.revokeObjectURL(imgURL);
+      const f = fileEl.files?.[0];
+      if (!f) return;
+      imgURL = URL.createObjectURL(f);
+      imgEl.src = imgURL;
+    });
+    // 5) Run captioning
+    runBtn.addEventListener('click', async () => {
+      if (!captioner) return;
+      if (!imgURL) { logEl.textContent = 'Pick an image first.'; return; }
+      logEl.textContent = 'Running…';
+      try {
+        const out = await captioner(imgURL); // [{ generated_text }]
+        logEl.textContent = out[0].generated_text;
+      } catch (e) {
+        logEl.textContent = 'Inference error: ' + e;
+        console.error(e);
+      }
+    });
+  </script>
+</body>
 </html>