Update index.html
Browse files- index.html +34 -11
index.html
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
<head>
|
| 4 |
<meta charset="utf-8" />
|
| 5 |
<meta name="viewport" content="width=device-width,initial-scale=1" />
|
| 6 |
-
<title>WebGPU · Transformers.js · Image Captioning
|
| 7 |
<style>
|
| 8 |
body { font: 16px/1.45 system-ui, sans-serif; margin: 24px auto; max-width: 900px; padding: 0 16px; }
|
| 9 |
.card { border:1px solid #4443; border-radius:12px; padding:16px; }
|
|
@@ -21,7 +21,7 @@
|
|
| 21 |
<h3>Caption an image (file upload)</h3>
|
| 22 |
<input id="file" type="file" accept="image/*" />
|
| 23 |
<button id="run" disabled>Caption</button>
|
| 24 |
-
<div><img id="preview" alt="" /></div>
|
| 25 |
<h4>Output</h4>
|
| 26 |
<div id="log" class="log">Loading model…</div>
|
| 27 |
<p class="muted">
|
|
@@ -66,7 +66,6 @@
|
|
| 66 |
captioner = await buildPipeline('wasm');
|
| 67 |
}
|
| 68 |
} catch (e) {
|
| 69 |
-
// Fallback once to WASM if WebGPU fails or stalls
|
| 70 |
if (hasWebGPU && (e.message === 'webgpu-timeout' || String(e).toLowerCase().includes('webgpu'))) {
|
| 71 |
envEl.textContent = '⚠️ WebGPU load slow/failed → falling back to WASM.';
|
| 72 |
device = 'wasm';
|
|
@@ -80,23 +79,46 @@
|
|
| 80 |
logEl.textContent = `Model ready · device=${device}`;
|
| 81 |
runBtn.disabled = false;
|
| 82 |
|
| 83 |
-
//
|
| 84 |
-
let
|
|
|
|
| 85 |
fileEl.addEventListener('change', () => {
|
| 86 |
-
|
| 87 |
const f = fileEl.files?.[0];
|
| 88 |
-
if (!f) return;
|
| 89 |
-
|
| 90 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
});
|
|
|
|
| 92 |
|
| 93 |
// Run captioning (beam search for better captions)
|
| 94 |
runBtn.addEventListener('click', async () => {
|
| 95 |
if (!captioner) return;
|
| 96 |
-
if (!
|
| 97 |
logEl.textContent = 'Running…';
|
| 98 |
try {
|
| 99 |
-
const out = await captioner(
|
| 100 |
max_new_tokens: 48,
|
| 101 |
num_beams: 5,
|
| 102 |
do_sample: false,
|
|
@@ -114,3 +136,4 @@
|
|
| 114 |
|
| 115 |
|
| 116 |
|
|
|
|
|
|
| 3 |
<head>
|
| 4 |
<meta charset="utf-8" />
|
| 5 |
<meta name="viewport" content="width=device-width,initial-scale=1" />
|
| 6 |
+
<title>WebGPU · Transformers.js · Image Captioning</title>
|
| 7 |
<style>
|
| 8 |
body { font: 16px/1.45 system-ui, sans-serif; margin: 24px auto; max-width: 900px; padding: 0 16px; }
|
| 9 |
.card { border:1px solid #4443; border-radius:12px; padding:16px; }
|
|
|
|
| 21 |
<h3>Caption an image (file upload)</h3>
|
| 22 |
<input id="file" type="file" accept="image/*" />
|
| 23 |
<button id="run" disabled>Caption</button>
|
| 24 |
+
<div><img id="preview" alt="preview will appear here" /></div>
|
| 25 |
<h4>Output</h4>
|
| 26 |
<div id="log" class="log">Loading model…</div>
|
| 27 |
<p class="muted">
|
|
|
|
| 66 |
captioner = await buildPipeline('wasm');
|
| 67 |
}
|
| 68 |
} catch (e) {
|
|
|
|
| 69 |
if (hasWebGPU && (e.message === 'webgpu-timeout' || String(e).toLowerCase().includes('webgpu'))) {
|
| 70 |
envEl.textContent = '⚠️ WebGPU load slow/failed → falling back to WASM.';
|
| 71 |
device = 'wasm';
|
|
|
|
| 79 |
logEl.textContent = `Model ready · device=${device}`;
|
| 80 |
runBtn.disabled = false;
|
| 81 |
|
| 82 |
+
// ---------- Robust file load (FileReader → data URL, with checks) ----------
|
| 83 |
+
let imgDataURL = null;
|
| 84 |
+
|
| 85 |
fileEl.addEventListener('change', () => {
|
| 86 |
+
logEl.textContent = 'Image selected. Preparing preview…';
|
| 87 |
const f = fileEl.files?.[0];
|
| 88 |
+
if (!f) { logEl.textContent = 'No file chosen.'; return; }
|
| 89 |
+
|
| 90 |
+
// Some Android cameras save HEIC/HEIF which many browsers can’t decode.
|
| 91 |
+
if (!f.type.startsWith('image/')) {
|
| 92 |
+
logEl.textContent = `Unsupported file type: ${f.type || 'unknown'}. Use JPG/PNG.`;
|
| 93 |
+
return;
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
const reader = new FileReader();
|
| 97 |
+
reader.onerror = () => {
|
| 98 |
+
logEl.textContent = 'Failed to read file. Try another image.';
|
| 99 |
+
};
|
| 100 |
+
reader.onload = async () => {
|
| 101 |
+
imgDataURL = reader.result; // base64 data URL
|
| 102 |
+
imgEl.src = imgDataURL;
|
| 103 |
+
try {
|
| 104 |
+
// ensure it decoded before we allow run
|
| 105 |
+
if (imgEl.decode) await imgEl.decode();
|
| 106 |
+
logEl.textContent = 'Preview ready. Click “Caption”.';
|
| 107 |
+
} catch {
|
| 108 |
+
logEl.textContent = 'Could not decode image. Try a JPG/PNG under ~5 MB.';
|
| 109 |
+
}
|
| 110 |
+
};
|
| 111 |
+
reader.readAsDataURL(f);
|
| 112 |
});
|
| 113 |
+
// --------------------------------------------------------------------------
|
| 114 |
|
| 115 |
// Run captioning (beam search for better captions)
|
| 116 |
runBtn.addEventListener('click', async () => {
|
| 117 |
if (!captioner) return;
|
| 118 |
+
if (!imgDataURL) { logEl.textContent = 'Pick an image first.'; return; }
|
| 119 |
logEl.textContent = 'Running…';
|
| 120 |
try {
|
| 121 |
+
const out = await captioner(imgDataURL, {
|
| 122 |
max_new_tokens: 48,
|
| 123 |
num_beams: 5,
|
| 124 |
do_sample: false,
|
|
|
|
| 136 |
|
| 137 |
|
| 138 |
|
| 139 |
+
|