Javedalam commited on
Commit
e74a613
·
verified ·
1 Parent(s): 8c05aeb

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +34 -11
index.html CHANGED
@@ -3,7 +3,7 @@
3
  <head>
4
  <meta charset="utf-8" />
5
  <meta name="viewport" content="width=device-width,initial-scale=1" />
6
- <title>WebGPU · Transformers.js · Image Captioning (Works)</title>
7
  <style>
8
  body { font: 16px/1.45 system-ui, sans-serif; margin: 24px auto; max-width: 900px; padding: 0 16px; }
9
  .card { border:1px solid #4443; border-radius:12px; padding:16px; }
@@ -21,7 +21,7 @@
21
  <h3>Caption an image (file upload)</h3>
22
  <input id="file" type="file" accept="image/*" />
23
  <button id="run" disabled>Caption</button>
24
- <div><img id="preview" alt="" /></div>
25
  <h4>Output</h4>
26
  <div id="log" class="log">Loading model…</div>
27
  <p class="muted">
@@ -66,7 +66,6 @@
66
  captioner = await buildPipeline('wasm');
67
  }
68
  } catch (e) {
69
- // Fallback once to WASM if WebGPU fails or stalls
70
  if (hasWebGPU && (e.message === 'webgpu-timeout' || String(e).toLowerCase().includes('webgpu'))) {
71
  envEl.textContent = '⚠️ WebGPU load slow/failed → falling back to WASM.';
72
  device = 'wasm';
@@ -80,23 +79,46 @@
80
  logEl.textContent = `Model ready · device=${device}`;
81
  runBtn.disabled = false;
82
 
83
- // Preview selected image
84
- let imgURL = null;
 
85
  fileEl.addEventListener('change', () => {
86
- if (imgURL) URL.revokeObjectURL(imgURL);
87
  const f = fileEl.files?.[0];
88
- if (!f) return;
89
- imgURL = URL.createObjectURL(f);
90
- imgEl.src = imgURL;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  });
 
92
 
93
  // Run captioning (beam search for better captions)
94
  runBtn.addEventListener('click', async () => {
95
  if (!captioner) return;
96
- if (!imgURL) { logEl.textContent = 'Pick an image first.'; return; }
97
  logEl.textContent = 'Running…';
98
  try {
99
- const out = await captioner(imgURL, {
100
  max_new_tokens: 48,
101
  num_beams: 5,
102
  do_sample: false,
@@ -114,3 +136,4 @@
114
 
115
 
116
 
 
 
3
  <head>
4
  <meta charset="utf-8" />
5
  <meta name="viewport" content="width=device-width,initial-scale=1" />
6
+ <title>WebGPU · Transformers.js · Image Captioning</title>
7
  <style>
8
  body { font: 16px/1.45 system-ui, sans-serif; margin: 24px auto; max-width: 900px; padding: 0 16px; }
9
  .card { border:1px solid #4443; border-radius:12px; padding:16px; }
 
21
  <h3>Caption an image (file upload)</h3>
22
  <input id="file" type="file" accept="image/*" />
23
  <button id="run" disabled>Caption</button>
24
+ <div><img id="preview" alt="preview will appear here" /></div>
25
  <h4>Output</h4>
26
  <div id="log" class="log">Loading model…</div>
27
  <p class="muted">
 
66
  captioner = await buildPipeline('wasm');
67
  }
68
  } catch (e) {
 
69
  if (hasWebGPU && (e.message === 'webgpu-timeout' || String(e).toLowerCase().includes('webgpu'))) {
70
  envEl.textContent = '⚠️ WebGPU load slow/failed → falling back to WASM.';
71
  device = 'wasm';
 
79
  logEl.textContent = `Model ready · device=${device}`;
80
  runBtn.disabled = false;
81
 
82
+ // ---------- Robust file load (FileReader → data URL, with checks) ----------
83
+ let imgDataURL = null;
84
+
85
  fileEl.addEventListener('change', () => {
86
+ logEl.textContent = 'Image selected. Preparing preview…';
87
  const f = fileEl.files?.[0];
88
+ if (!f) { logEl.textContent = 'No file chosen.'; return; }
89
+
90
+ // Some Android cameras save HEIC/HEIF which many browsers can’t decode.
91
+ if (!f.type.startsWith('image/')) {
92
+ logEl.textContent = `Unsupported file type: ${f.type || 'unknown'}. Use JPG/PNG.`;
93
+ return;
94
+ }
95
+
96
+ const reader = new FileReader();
97
+ reader.onerror = () => {
98
+ logEl.textContent = 'Failed to read file. Try another image.';
99
+ };
100
+ reader.onload = async () => {
101
+ imgDataURL = reader.result; // base64 data URL
102
+ imgEl.src = imgDataURL;
103
+ try {
104
+ // ensure it decoded before we allow run
105
+ if (imgEl.decode) await imgEl.decode();
106
+ logEl.textContent = 'Preview ready. Click “Caption”.';
107
+ } catch {
108
+ logEl.textContent = 'Could not decode image. Try a JPG/PNG under ~5 MB.';
109
+ }
110
+ };
111
+ reader.readAsDataURL(f);
112
  });
113
+ // --------------------------------------------------------------------------
114
 
115
  // Run captioning (beam search for better captions)
116
  runBtn.addEventListener('click', async () => {
117
  if (!captioner) return;
118
+ if (!imgDataURL) { logEl.textContent = 'Pick an image first.'; return; }
119
  logEl.textContent = 'Running…';
120
  try {
121
+ const out = await captioner(imgDataURL, {
122
  max_new_tokens: 48,
123
  num_beams: 5,
124
  do_sample: false,
 
136
 
137
 
138
 
139
+