Update index.html
Browse files- index.html +40 -12
index.html
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
<head>
|
| 4 |
<meta charset="utf-8" />
|
| 5 |
<meta name="viewport" content="width=device-width,initial-scale=1" />
|
| 6 |
-
<title>WebGPU · Transformers.js · Image Captioning</title>
|
| 7 |
<style>
|
| 8 |
body { font: 16px/1.45 system-ui, sans-serif; margin: 24px auto; max-width: 900px; padding: 0 16px; }
|
| 9 |
.card { border:1px solid #4443; border-radius:12px; padding:16px; }
|
|
@@ -25,7 +25,7 @@
|
|
| 25 |
<h4>Output</h4>
|
| 26 |
<div id="log" class="log">Loading model…</div>
|
| 27 |
<p class="muted">
|
| 28 |
-
Model: <code>Xenova/
|
| 29 |
Backend: <span id="backend">…</span>
|
| 30 |
</p>
|
| 31 |
</div>
|
|
@@ -40,24 +40,46 @@
|
|
| 40 |
|
| 41 |
// Prefer WebGPU; fall back to WASM
|
| 42 |
const hasWebGPU = 'gpu' in navigator;
|
| 43 |
-
|
| 44 |
backendEl.textContent = device.toUpperCase();
|
| 45 |
-
envEl.textContent = hasWebGPU ? '✅ WebGPU detected
|
|
|
|
| 46 |
|
| 47 |
// Load Transformers.js v3
|
| 48 |
const { pipeline } = await import('https://cdn.jsdelivr.net/npm/@huggingface/transformers@3');
|
| 49 |
|
| 50 |
-
//
|
|
|
|
| 51 |
let captioner;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
try {
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
} catch (e) {
|
| 57 |
-
|
| 58 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
}
|
| 60 |
|
|
|
|
|
|
|
|
|
|
| 61 |
// Preview selected image
|
| 62 |
let imgURL = null;
|
| 63 |
fileEl.addEventListener('change', () => {
|
|
@@ -68,13 +90,18 @@
|
|
| 68 |
imgEl.src = imgURL;
|
| 69 |
});
|
| 70 |
|
| 71 |
-
// Run captioning
|
| 72 |
runBtn.addEventListener('click', async () => {
|
| 73 |
if (!captioner) return;
|
| 74 |
if (!imgURL) { logEl.textContent = 'Pick an image first.'; return; }
|
| 75 |
logEl.textContent = 'Running…';
|
| 76 |
try {
|
| 77 |
-
const out = await captioner(imgURL
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
logEl.textContent = out[0].generated_text;
|
| 79 |
} catch (e) {
|
| 80 |
logEl.textContent = 'Inference error: ' + e;
|
|
@@ -85,3 +112,4 @@
|
|
| 85 |
</body>
|
| 86 |
</html>
|
| 87 |
|
|
|
|
|
|
| 3 |
<head>
|
| 4 |
<meta charset="utf-8" />
|
| 5 |
<meta name="viewport" content="width=device-width,initial-scale=1" />
|
| 6 |
+
<title>WebGPU · Transformers.js · Better Image Captioning</title>
|
| 7 |
<style>
|
| 8 |
body { font: 16px/1.45 system-ui, sans-serif; margin: 24px auto; max-width: 900px; padding: 0 16px; }
|
| 9 |
.card { border:1px solid #4443; border-radius:12px; padding:16px; }
|
|
|
|
| 25 |
<h4>Output</h4>
|
| 26 |
<div id="log" class="log">Loading model…</div>
|
| 27 |
<p class="muted">
|
| 28 |
+
Model: <code>Xenova/blip-image-captioning-large</code><br />
|
| 29 |
Backend: <span id="backend">…</span>
|
| 30 |
</p>
|
| 31 |
</div>
|
|
|
|
| 40 |
|
| 41 |
// Prefer WebGPU; fall back to WASM
|
| 42 |
const hasWebGPU = 'gpu' in navigator;
|
| 43 |
+
let device = hasWebGPU ? 'webgpu' : 'wasm';
|
| 44 |
backendEl.textContent = device.toUpperCase();
|
| 45 |
+
envEl.textContent = hasWebGPU ? '✅ WebGPU detected (will fallback if slow)…'
|
| 46 |
+
: '⚠️ Using WASM (CPU).';
|
| 47 |
|
| 48 |
// Load Transformers.js v3
|
| 49 |
const { pipeline } = await import('https://cdn.jsdelivr.net/npm/@huggingface/transformers@3');
|
| 50 |
|
| 51 |
+
// Watchdog: if WebGPU load takes too long, retry on WASM
|
| 52 |
+
const LOAD_TIMEOUT_MS = 30000; // 30s
|
| 53 |
let captioner;
|
| 54 |
+
|
| 55 |
+
async function buildPipeline(targetDevice) {
|
| 56 |
+
logEl.textContent = `Loading model… device=${targetDevice}`;
|
| 57 |
+
backendEl.textContent = targetDevice.toUpperCase();
|
| 58 |
+
return await pipeline('image-to-text', 'Xenova/blip-image-captioning-large', { device: targetDevice });
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
try {
|
| 62 |
+
if (device === 'webgpu') {
|
| 63 |
+
const webgpuPromise = buildPipeline('webgpu');
|
| 64 |
+
const timeout = new Promise((_, rej) => setTimeout(() => rej(new Error('webgpu-timeout')), LOAD_TIMEOUT_MS));
|
| 65 |
+
captioner = await Promise.race([webgpuPromise, timeout]);
|
| 66 |
+
} else {
|
| 67 |
+
captioner = await buildPipeline('wasm');
|
| 68 |
+
}
|
| 69 |
} catch (e) {
|
| 70 |
+
if (hasWebGPU && (e.message === 'webgpu-timeout' || String(e).includes('webgpu'))) {
|
| 71 |
+
envEl.textContent = '⚠️ WebGPU load slow/failed → falling back to WASM.';
|
| 72 |
+
device = 'wasm';
|
| 73 |
+
captioner = await buildPipeline('wasm');
|
| 74 |
+
} else {
|
| 75 |
+
logEl.textContent = 'Error loading model: ' + e;
|
| 76 |
+
throw e;
|
| 77 |
+
}
|
| 78 |
}
|
| 79 |
|
| 80 |
+
logEl.textContent = `Model ready · device=${device}`;
|
| 81 |
+
runBtn.disabled = false;
|
| 82 |
+
|
| 83 |
// Preview selected image
|
| 84 |
let imgURL = null;
|
| 85 |
fileEl.addEventListener('change', () => {
|
|
|
|
| 90 |
imgEl.src = imgURL;
|
| 91 |
});
|
| 92 |
|
| 93 |
+
// Run captioning with better decoding (beam search)
|
| 94 |
runBtn.addEventListener('click', async () => {
|
| 95 |
if (!captioner) return;
|
| 96 |
if (!imgURL) { logEl.textContent = 'Pick an image first.'; return; }
|
| 97 |
logEl.textContent = 'Running…';
|
| 98 |
try {
|
| 99 |
+
const out = await captioner(imgURL, {
|
| 100 |
+
max_new_tokens: 48,
|
| 101 |
+
num_beams: 5,
|
| 102 |
+
do_sample: false,
|
| 103 |
+
no_repeat_ngram_size: 3
|
| 104 |
+
}); // → [{ generated_text }]
|
| 105 |
logEl.textContent = out[0].generated_text;
|
| 106 |
} catch (e) {
|
| 107 |
logEl.textContent = 'Inference error: ' + e;
|
|
|
|
| 112 |
</body>
|
| 113 |
</html>
|
| 114 |
|
| 115 |
+
|