lorien-danger commited on
Commit
768e90f
·
verified ·
1 Parent(s): bc892ce

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +902 -424
index.html CHANGED
@@ -1,437 +1,915 @@
1
  <!DOCTYPE html>
 
2
  <html lang="en">
3
  <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width,initial-scale=1" />
6
- <title>I-JEPA Patch Matching (Browser, ONNX)</title>
7
- <style>
8
- :root { --w: 256; --gap: 40; --bg: #0b0d10; --fg: #e8f0f2; --muted:#92a2aa; --accent:#7bdcff; }
9
- html,body { height:100%; margin:0; background:var(--bg); color:var(--fg); font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial; }
10
- header { padding:16px 18px; border-bottom:1px solid #1b2228; display:flex; gap:16px; align-items:center; flex-wrap:wrap;}
11
- header h1 { font-size:16px; margin:0; font-weight:600; letter-spacing:.2px;}
12
- header .pill { padding:6px 10px; border:1px solid #24313a; border-radius:999px; color:#cfe7ff; }
13
- main { display:grid; grid-template-columns: 320px 1fr; gap:16px; height:calc(100% - 66px); }
14
- aside { border-right:1px solid #1b2228; padding:16px; overflow:auto; }
15
- section { padding:16px; overflow:auto;}
16
- fieldset { border:1px solid #24313a; border-radius:10px; padding:12px; margin:0 0 12px 0;}
17
- legend { padding:0 6px; color:#c0d1da; font-size:12px; }
18
- label { display:block; font-size:12px; color:#a9bac4; margin:8px 0 4px;}
19
- input[type="file"] { width:100%; }
20
- .row { display:flex; gap:8px; align-items:center; flex-wrap:wrap;}
21
- .row > * { flex:1; min-width:0; }
22
- input[type="range"] { width:100%; }
23
- button { background:#0f1418; color:var(--fg); border:1px solid #2a3945; padding:10px 12px; border-radius:8px; cursor:pointer;}
24
- button:disabled { opacity:.6; cursor:not-allowed;}
25
- small.muted { color:var(--muted); }
26
- .canv-wrap { display:flex; align-items:center; justify-content:center; }
27
- canvas { background:#0a0c0f; border:1px solid #1f2830; border-radius:10px; }
28
- .status { font-family: ui-monospace, SFMono-Regular, Menlo, monospace; font-size:12px; color:#c5e3ff; white-space:pre-wrap; background:#0a0f13; border:1px solid #23313b; padding:8px; border-radius:8px; min-height:2.5em;}
29
- .gridlabel { font-size:11px; color:#7e909b; }
30
- .foot { color:#7a8b95; font-size:12px; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  </style>
 
 
32
  </head>
33
  <body>
34
- <header>
35
- <h1>I-JEPA Patch Matching (Transformers.js + ONNX)</h1>
36
- <span class="pill">Model: onnx-community/ijepa_vith14_22k · dtype=q8</span>
37
- </header>
38
-
39
- <main>
40
- <aside>
41
- <fieldset>
42
- <legend>Inputs</legend>
43
- <label>Image A</label>
44
- <input id="fileA" type="file" accept="image/*" />
45
- <label>Image B</label>
46
- <input id="fileB" type="file" accept="image/*" />
47
- <div class="row" style="margin-top:10px;">
48
- <button id="runBtn" disabled>Run patch matching</button>
49
- <button id="clearBtn">Clear</button>
50
- </div>
51
- <small class="muted">Images are resized to 224×224 internally to match the model.</small>
52
- </fieldset>
53
-
54
- <fieldset>
55
- <legend>Matching</legend>
56
- <label>Top-K lines <span id="kVal" class="gridlabel"></span></label>
57
- <input id="k" type="range" min="8" max="256" step="8" value="64" />
58
- <label>Min similarity (cosine) <span id="thrVal" class="gridlabel"></span></label>
59
- <input id="thr" type="range" min="0" max="100" step="1" value="40" />
60
- <div class="row">
61
- <label class="row" style="gap:6px;align-items:center;">
62
- <input id="mutual" type="checkbox" checked />
63
- Mutual nearest neighbors only
64
- </label>
65
- </div>
66
- <div class="row">
67
- <label class="row" style="gap:6px;align-items:center;">
68
- <input id="showGrid" type="checkbox" />
69
- Show 16×16 patch grid overlay
70
- </label>
71
- </div>
72
- </fieldset>
73
-
74
- <fieldset>
75
- <legend>Runtime</legend>
76
- <div class="row">
77
- <label class="row" style="gap:6px;align-items:center;">
78
- <input id="preferGPU" type="checkbox" />
79
- Try WebGPU (if available)
80
- </label>
81
- </div>
82
- <label>Quantization</label>
83
- <select id="dtype">
84
- <option value="q8" selected>q8 (smallest, default)</option>
85
- <option value="fp32">fp32</option>
86
- </select>
87
- <label>Model repo</label>
88
- <input id="modelId" type="text" value="onnx-community/ijepa_vith14_22k" />
89
- <small class="muted">Patch size is 14; tokens map to a 16×16 grid. CLS token is dropped if present.</small>
90
- </fieldset>
91
-
92
- <fieldset>
93
- <legend>Status</legend>
94
- <div id="status" class="status">Idle.</div>
95
- </fieldset>
96
- <div class="foot">
97
- Preprocess (from model card): resize 224, rescale 1/255, normalize mean=std=0.5. Patch size=14.
98
- Outputs are per-patch hidden states; we build a full cosine similarity matrix.
99
  </div>
100
- </aside>
101
 
102
- <section>
103
- <div class="canv-wrap">
104
- <canvas id="viz" width="544" height="240" aria-label="Patch correspondence visualizer"></canvas>
 
 
 
105
  </div>
106
- <div class="row" style="margin-top:8px;">
107
- <small class="muted">Left = Image A (224×224). Right = Image B (224×224). Lines connect matched patch centers.</small>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  </div>
109
- </section>
110
- </main>
111
-
112
- <!-- Transformers.js UMD (exposes window.transformers) -->
113
- <script src="https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.7.3/dist/transformers.min.js"></script>
114
-
115
- <script>
116
- (async () => {
117
- const status = (msg) => document.getElementById('status').textContent = msg;
118
-
119
- // UI elements
120
- const fileA = document.getElementById('fileA');
121
- const fileB = document.getElementById('fileB');
122
- const runBtn = document.getElementById('runBtn');
123
- const clearBtn = document.getElementById('clearBtn');
124
- const preferGPU = document.getElementById('preferGPU');
125
- const dtypeSel = document.getElementById('dtype');
126
- const modelIdInput = document.getElementById('modelId');
127
- const kSlider = document.getElementById('k');
128
- const thrSlider = document.getElementById('thr');
129
- const kVal = document.getElementById('kVal');
130
- const thrVal = document.getElementById('thrVal');
131
- const mutualChk = document.getElementById('mutual');
132
- const gridChk = document.getElementById('showGrid');
133
-
134
- const W = 224, H = 224, PATCH = 14; // per config
135
- const GRID = W / PATCH; // 16
136
- const GAP = 96; // gap between images on the canvas
137
-
138
- const cvs = document.getElementById('viz');
139
- const ctx = cvs.getContext('2d');
140
-
141
- const setSliders = () => {
142
- kVal.textContent = `(${kSlider.value})`;
143
- thrVal.textContent = `(${(thrSlider.value/100).toFixed(2)})`;
144
- };
145
- setSliders();
146
- kSlider.addEventListener('input', setSliders);
147
- thrSlider.addEventListener('input', setSliders);
148
-
149
- // Enable buttons when both files chosen
150
- const updateReady = () => runBtn.disabled = !(fileA.files?.[0] && fileB.files?.[0]);
151
- fileA.addEventListener('change', updateReady);
152
- fileB.addEventListener('change', updateReady);
153
-
154
- clearBtn.onclick = () => {
155
- fileA.value = ''; fileB.value = '';
156
- runBtn.disabled = true;
157
- ctx.clearRect(0,0,cvs.width,cvs.height);
158
- status('Cleared.');
159
- };
160
-
161
- // Load Transformers.js and configure runtime
162
- const { pipeline, env } = window.transformers;
163
-
164
- // Configure ONNX Runtime Web assets and caching
165
- env.backends.onnx.wasm.wasmPaths = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.19.2/dist/";
166
- env.useBrowserCache = true;
167
- env.allowRemoteModels = true;
168
-
169
- // WebGPU hint (Transformers.js chooses the best available)
170
- preferGPU.addEventListener('change', () => {
171
- // Hint: runtime picks WebGPU automatically if available; keeping as a user toggle placeholder
172
- status(preferGPU.checked ? 'Will try WebGPU where possible.' : 'Defaulting to WASM backend.');
173
- });
174
-
175
- // Helpers
176
- const loadImageURL = (file) => new Promise((resolve, reject) => {
177
- const url = URL.createObjectURL(file);
178
- const img = new Image();
179
- img.onload = () => resolve({ url, img });
180
- img.onerror = reject;
181
- img.crossOrigin = "anonymous";
182
- img.src = url;
183
- });
184
-
185
- function drawSideBySide(imgA, imgB) {
186
- // Clear
187
- ctx.fillStyle = '#0a0c0f';
188
- ctx.fillRect(0, 0, cvs.width, cvs.height);
189
-
190
- // Draw A and B (padded)
191
- const leftX = 8, topY = 8;
192
- ctx.drawImage(imgA, leftX, topY, W, H);
193
- const rightX = leftX + W + GAP;
194
- ctx.drawImage(imgB, rightX, topY, W, H);
195
-
196
- // Optional grid
197
- if (gridChk.checked) {
198
- ctx.strokeStyle = 'rgba(255,255,255,0.15)';
199
- ctx.lineWidth = 1;
200
- for (let i=1;i<GRID;i++) {
201
- const xA = leftX + i*PATCH, xB = rightX + i*PATCH, y = topY + i*PATCH;
202
- ctx.beginPath(); ctx.moveTo(xA, topY); ctx.lineTo(xA, topY+H); ctx.stroke();
203
- ctx.beginPath(); ctx.moveTo(leftX, y); ctx.lineTo(leftX+W, y); ctx.stroke();
204
- ctx.beginPath(); ctx.moveTo(xB, topY); ctx.lineTo(xB, topY+H); ctx.stroke();
205
- ctx.beginPath(); ctx.moveTo(rightX, y); ctx.lineTo(rightX+W, y); ctx.stroke();
206
- }
207
- }
208
-
209
- return { leftX, rightX, topY };
210
- }
211
-
212
- // Normalize a [L,D] flat Float32Array in-place to unit vectors per row
213
- function rowNormalize(data, L, D) {
214
- for (let i=0; i<L; i++) {
215
- let sum=0.0, off = i*D;
216
- for (let j=0; j<D; j++) { const v = data[off+j]; sum += v*v; }
217
- const inv = 1.0 / Math.max(Math.sqrt(sum), 1e-12);
218
- for (let j=0; j<D; j++) data[off+j] *= inv;
219
- }
220
- }
221
-
222
- // Cosine sim matrix S (L1 x L2) = A_norm (L1 x D) * B_norm^T (D x L2)
223
- function simMatrix(A, L1, B, L2, D) {
224
- const S = new Float32Array(L1 * L2);
225
- for (let i=0; i<L1; i++) {
226
- const ai = i*D;
227
- for (let j=0; j<L2; j++) {
228
- const bj = j*D;
229
- let acc = 0.0;
230
- // unrolled loop could help but keep simple & correct
231
- for (let d=0; d<D; d++) acc += A[ai+d] * B[bj+d];
232
- S[i*L2 + j] = acc;
233
- }
234
- }
235
- return S;
236
- }
237
-
238
- function argmaxPerRow(S, rows, cols) {
239
- const idx = new Int32Array(rows);
240
- const val = new Float32Array(rows);
241
- for (let i=0; i<rows; i++) {
242
- let bestV = -Infinity, bestJ = -1, off = i*cols;
243
- for (let j=0; j<cols; j++) {
244
- const v = S[off + j];
245
- if (v > bestV) { bestV = v; bestJ = j; }
246
- }
247
- idx[i] = bestJ; val[i] = bestV;
248
- }
249
- return { idx, val };
250
- }
251
-
252
- function argmaxPerCol(S, rows, cols) {
253
- const idx = new Int32Array(cols);
254
- const val = new Float32Array(cols);
255
- for (let j=0; j<cols; j++) {
256
- let bestV = -Infinity, bestI = -1;
257
- for (let i=0; i<rows; i++) {
258
- const v = S[i*cols + j];
259
- if (v > bestV) { bestV = v; bestI = i; }
260
- }
261
- idx[j] = bestI; val[j] = bestV;
262
- }
263
- return { idx, val };
264
- }
265
-
266
- function gridCenter(k) {
267
- // k in [0, L). grid is row-major over 16x16.
268
- const r = Math.floor(k / GRID);
269
- const c = k % GRID;
270
- return { r, c, cx: c*PATCH + PATCH/2, cy: r*PATCH + PATCH/2 };
271
- }
272
-
273
- function drawMatches(base, matches, L2, topK, thr, mutualOnly) {
274
- const { leftX, rightX, topY } = base;
275
- // Sort by similarity desc
276
- matches.sort((a,b) => b.sim - a.sim);
277
- const K = Math.min(topK, matches.length);
278
-
279
- // Draw lines
280
- for (let n=0, drawn=0; n<matches.length && drawn<K; n++) {
281
- const m = matches[n];
282
- if (m.sim < thr) continue;
283
- if (mutualOnly && !m.mutual) continue;
284
-
285
- const A = gridCenter(m.i);
286
- const B = gridCenter(m.j);
287
-
288
- const x1 = leftX + A.cx, y1 = topY + A.cy;
289
- const x2 = rightX + B.cx, y2 = topY + B.cy;
290
-
291
- // color by similarity (blue→cyan)
292
- const t = Math.min(1, Math.max(0, (m.sim - thr) / (1 - thr)));
293
- const r = Math.floor(60 + 40*t);
294
- const g = Math.floor(200 + 30*t);
295
- const b = Math.floor(255);
296
- ctx.strokeStyle = `rgba(${r},${g},${b},${0.85})`;
297
- ctx.lineWidth = 1.25;
298
-
299
- ctx.beginPath();
300
- ctx.moveTo(x1, y1);
301
- ctx.lineTo(x2, y2);
302
- ctx.stroke();
303
-
304
- drawn++;
305
- }
306
- }
307
-
308
- // Extract per-patch tokens as a flat Float32Array [L,D]
309
- function tokens2D(tensor) {
310
- // Expect dims [B, L, D] OR [L, D]
311
- const dims = tensor.dims;
312
- let L, D, data = tensor.data;
313
- if (dims.length === 3) {
314
- L = dims[1]; D = dims[2];
315
- } else if (dims.length === 2) {
316
- L = dims[0]; D = dims[1];
317
- } else {
318
- throw new Error(`Unexpected tensor shape: [${dims.join(',')}]`);
319
- }
320
-
321
- // If CLS present, drop first token to get a perfect square (16x16)
322
- const isSquare = (n) => Number.isInteger(Math.sqrt(n));
323
- if (!isSquare(L) && isSquare(L - 1)) {
324
- // Slice off the first token (CLS) → returns [L-1, D]
325
- const out = new Float32Array((L - 1) * D);
326
- let dst = 0, src = D; // skip first row
327
- for (let i=1;i<L;i++, src += D) {
328
- out.set(data.subarray(src, src + D), dst);
329
- dst += D;
330
- }
331
- return { data: out, L: L - 1, D };
332
- }
333
-
334
- // Already square grid
335
- return { data: Float32Array.from(data), L, D };
336
- }
337
-
338
- let extractor = null;
339
-
340
- async function ensureExtractor() {
341
- if (extractor) return extractor;
342
- const modelId = modelIdInput.value.trim();
343
- const dtype = dtypeSel.value; // "q8" or "fp32"
344
- status(`Loading model: ${modelId} (${dtype}) ...`);
345
- const t0 = performance.now();
346
- extractor = await pipeline(
347
- "image-feature-extraction",
348
- modelId,
349
- { dtype } // uses ONNX + wasm/webgpu under the hood
350
- );
351
- const t1 = performance.now();
352
- status(`Model ready in ${(t1 - t0).toFixed(0)} ms. Awaiting images...`);
353
- return extractor;
354
- }
355
-
356
- async function run() {
357
- try {
358
- runBtn.disabled = true;
359
-
360
- const [{img: imgA}, {img: imgB}] = await Promise.all([
361
- loadImageURL(fileA.files[0]),
362
- loadImageURL(fileB.files[0]),
363
- ]);
364
-
365
- // Draw base images
366
- const base = drawSideBySide(imgA, imgB);
367
-
368
- // Load extractor (once)
369
- await ensureExtractor();
370
-
371
- status('Extracting per-patch features ...');
372
- const t0 = performance.now();
373
-
374
- // We can pass Blob URLs/HTMLImageElements; Transformers.js handles preprocessing
375
- const out = await extractor([imgA, imgB]); // returns a Tensor of shape [2, L, D]
376
- const dims = out.dims; // expect [B, L, D]
377
- // Split the batch into two separate tensors (copying data slices)
378
- if (!(dims.length === 3 && dims[0] === 2)) {
379
- throw new Error(`Unexpected output dims: [${dims.join(',')}]`);
380
- }
381
- const B = dims[0], L = dims[1], D = dims[2];
382
-
383
- // Slice batch 0 and 1
384
- const stride = L * D;
385
- const dataA = out.data.subarray(0, stride);
386
- const dataB = out.data.subarray(stride, 2*stride);
387
-
388
- // Convert to [L',D] and drop CLS if present (to get 16x16)
389
- const Atd = tokens2D({ data: dataA, dims: [L, D] });
390
- const Btd = tokens2D({ data: dataB, dims: [L, D] });
391
-
392
- if (Atd.L !== GRID*GRID || Btd.L !== GRID*GRID) {
393
- console.warn('Token count not 16x16; continuing anyway.', Atd.L, Btd.L);
394
- }
395
-
396
- // Normalize rows for cosine similarity
397
- rowNormalize(Atd.data, Atd.L, Atd.D);
398
- rowNormalize(Btd.data, Btd.L, Btd.D);
399
-
400
- status('Computing similarity matrix ... (this is O(L^2·D))');
401
- const S = simMatrix(Atd.data, Atd.L, Btd.data, Btd.L, Atd.D);
402
-
403
- // Argmaxes for A→B and B→A
404
- const A2B = argmaxPerRow(S, Atd.L, Btd.L);
405
- const B2A = argmaxPerCol(S, Atd.L, Btd.L);
406
-
407
- // Build match list
408
- const thr = Number(thrSlider.value)/100.0;
409
- const pairs = [];
410
- for (let i=0; i<Atd.L; i++) {
411
- const j = A2B.idx[i];
412
- const sim = A2B.val[i];
413
- const mutual = (B2A.idx[j] === i);
414
- pairs.push({ i, j, sim, mutual });
415
- }
416
-
417
- // Redraw base (so grid toggle applies immediately)
418
- drawSideBySide(imgA, imgB);
419
- drawMatches(base, pairs, Btd.L, Number(kSlider.value), thr, mutualChk.checked);
420
-
421
- const t1 = performance.now();
422
- status(`Done. Tokens: ${Atd.L}×${Atd.D}. Max sim: ${Math.max(...pairs.map(p=>p.sim)).toFixed(3)}. Total ${(t1-t0).toFixed(0)} ms.`);
423
- } catch (err) {
424
- console.error(err);
425
- status('Error: ' + (err && err.message ? err.message : String(err)));
426
- } finally {
427
- runBtn.disabled = false;
428
- }
429
- }
430
-
431
- runBtn.onclick = run;
432
-
433
- status('Ready. Load two images, then click “Run patch matching”.');
434
- })();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
435
  </script>
 
 
436
  </body>
437
  </html>
 
1
  <!DOCTYPE html>
2
+
3
  <html lang="en">
4
  <head>
5
+ <meta charset="UTF-8">
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
+ <title>I-JEPA Patch Correspondence Analyzer</title>
8
+ <style>
9
+ body {
10
+ font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
11
+ margin: 0;
12
+ padding: 20px;
13
+ background: linear-gradient(135deg, #1a202c 0%, #2d3748 100%);
14
+ min-height: 100vh;
15
+ color: #e2e8f0;
16
+ }
17
+
18
+ ```
19
+ .container {
20
+ max-width: 1400px;
21
+ margin: 0 auto;
22
+ background: rgba(45, 55, 72, 0.8);
23
+ backdrop-filter: blur(10px);
24
+ border-radius: 20px;
25
+ padding: 30px;
26
+ box-shadow: 0 20px 40px rgba(0, 0, 0, 0.3);
27
+ border: 1px solid #4a5568;
28
+ }
29
+
30
+ h1 {
31
+ text-align: center;
32
+ background: linear-gradient(135deg, #60a5fa 0%, #a78bfa 100%);
33
+ -webkit-background-clip: text;
34
+ -webkit-text-fill-color: transparent;
35
+ background-clip: text;
36
+ margin-bottom: 10px;
37
+ font-size: 2.5em;
38
+ font-weight: 700;
39
+ }
40
+
41
+ .subtitle {
42
+ text-align: center;
43
+ color: #a0aec0;
44
+ margin-bottom: 30px;
45
+ font-size: 1.1em;
46
+ }
47
+
48
+ .upload-section {
49
+ display: grid;
50
+ grid-template-columns: 1fr 1fr;
51
+ gap: 30px;
52
+ margin-bottom: 30px;
53
+ }
54
+
55
+ .upload-box {
56
+ border: 2px dashed #4a5568;
57
+ border-radius: 15px;
58
+ padding: 40px;
59
+ text-align: center;
60
+ transition: all 0.3s ease;
61
+ background: rgba(26, 32, 44, 0.6);
62
+ position: relative;
63
+ overflow: hidden;
64
+ }
65
+
66
+ .upload-box:hover {
67
+ border-color: #60a5fa;
68
+ background: rgba(26, 32, 44, 0.8);
69
+ }
70
+
71
+ .upload-box.has-image {
72
+ border-color: #48bb78;
73
+ background: rgba(26, 32, 44, 0.9);
74
+ }
75
+
76
+ .upload-input {
77
+ position: absolute;
78
+ top: 0;
79
+ left: 0;
80
+ width: 100%;
81
+ height: 100%;
82
+ opacity: 0;
83
+ cursor: pointer;
84
+ }
85
+
86
+ .upload-content {
87
+ pointer-events: none;
88
+ }
89
+
90
+ .upload-icon {
91
+ font-size: 3em;
92
+ margin-bottom: 15px;
93
+ color: #718096;
94
+ }
95
+
96
+ .upload-text {
97
+ font-size: 1.1em;
98
+ color: #e2e8f0;
99
+ margin-bottom: 10px;
100
+ font-weight: 600;
101
+ }
102
+
103
+ .upload-hint {
104
+ font-size: 0.9em;
105
+ color: #a0aec0;
106
+ }
107
+
108
+ .preview-image {
109
+ max-width: 100%;
110
+ max-height: 200px;
111
+ border-radius: 10px;
112
+ margin-top: 15px;
113
+ box-shadow: 0 4px 8px rgba(0, 0, 0, 0.3);
114
+ }
115
+
116
+ .controls {
117
+ display: flex;
118
+ justify-content: center;
119
+ gap: 20px;
120
+ margin-bottom: 30px;
121
+ flex-wrap: wrap;
122
+ }
123
+
124
+ .btn {
125
+ padding: 12px 30px;
126
+ border: none;
127
+ border-radius: 12px;
128
+ cursor: pointer;
129
+ font-size: 1em;
130
+ font-weight: 600;
131
+ transition: all 0.3s ease;
132
+ text-transform: uppercase;
133
+ letter-spacing: 1px;
134
+ }
135
+
136
+ .btn-primary {
137
+ background: linear-gradient(135deg, #60a5fa 0%, #a78bfa 100%);
138
+ color: white;
139
+ }
140
+
141
+ .btn-primary:hover:not(:disabled) {
142
+ transform: translateY(-2px);
143
+ box-shadow: 0 8px 20px rgba(96, 165, 250, 0.4);
144
+ }
145
+
146
+ .btn-secondary {
147
+ background: #4a5568;
148
+ color: #e2e8f0;
149
+ }
150
+
151
+ .btn-secondary:hover {
152
+ background: #2d3748;
153
+ transform: translateY(-2px);
154
+ }
155
+
156
+ .btn:disabled {
157
+ background: #2d3748;
158
+ color: #718096;
159
+ cursor: not-allowed;
160
+ transform: none;
161
+ }
162
+
163
+ .loading {
164
+ text-align: center;
165
+ padding: 40px;
166
+ display: none;
167
+ }
168
+
169
+ .spinner {
170
+ width: 50px;
171
+ height: 50px;
172
+ border: 4px solid #2d3748;
173
+ border-top: 4px solid #60a5fa;
174
+ border-radius: 50%;
175
+ animation: spin 1s linear infinite;
176
+ margin: 0 auto 20px;
177
+ }
178
+
179
+ @keyframes spin {
180
+ 0% { transform: rotate(0deg); }
181
+ 100% { transform: rotate(360deg); }
182
+ }
183
+
184
+ .results {
185
+ display: none;
186
+ }
187
+
188
+ .visualization {
189
+ background: rgba(26, 32, 44, 0.6);
190
+ border-radius: 15px;
191
+ padding: 20px;
192
+ margin-bottom: 20px;
193
+ border: 1px solid #4a5568;
194
+ }
195
+
196
+ .images-container {
197
+ display: grid;
198
+ grid-template-columns: 1fr 1fr;
199
+ gap: 30px;
200
+ margin-bottom: 30px;
201
+ }
202
+
203
+ .image-analysis {
204
+ text-align: center;
205
+ }
206
+
207
+ .image-analysis h3 {
208
+ color: #e2e8f0;
209
+ margin-bottom: 15px;
210
+ }
211
+
212
+ .canvas-container {
213
+ position: relative;
214
+ display: inline-block;
215
+ border-radius: 10px;
216
+ overflow: hidden;
217
+ box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3);
218
+ }
219
+
220
+ .analysis-canvas {
221
+ display: block;
222
+ max-width: 100%;
223
+ height: auto;
224
+ cursor: crosshair;
225
+ }
226
+
227
+ .stats {
228
+ display: grid;
229
+ grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
230
+ gap: 15px;
231
+ margin-top: 20px;
232
+ }
233
+
234
+ .stat-card {
235
+ background: rgba(26, 32, 44, 0.8);
236
+ padding: 20px;
237
+ border-radius: 10px;
238
+ text-align: center;
239
+ border-left: 4px solid #60a5fa;
240
+ }
241
+
242
+ .stat-value {
243
+ font-size: 2em;
244
+ font-weight: bold;
245
+ color: #e2e8f0;
246
+ }
247
+
248
+ .stat-label {
249
+ color: #a0aec0;
250
+ margin-top: 5px;
251
+ }
252
+
253
+ .similarity-threshold {
254
+ margin: 20px 0;
255
+ text-align: center;
256
+ color: #e2e8f0;
257
+ }
258
+
259
+ .threshold-slider {
260
+ width: 300px;
261
+ margin: 0 10px;
262
+ -webkit-appearance: none;
263
+ appearance: none;
264
+ height: 8px;
265
+ background: #4a5568;
266
+ border-radius: 4px;
267
+ outline: none;
268
+ }
269
+
270
+ .threshold-slider::-webkit-slider-thumb {
271
+ -webkit-appearance: none;
272
+ appearance: none;
273
+ width: 20px;
274
+ height: 20px;
275
+ background: #60a5fa;
276
+ cursor: pointer;
277
+ border-radius: 50%;
278
+ }
279
+
280
+ .threshold-slider::-moz-range-thumb {
281
+ width: 20px;
282
+ height: 20px;
283
+ background: #60a5fa;
284
+ cursor: pointer;
285
+ border-radius: 50%;
286
+ border: none;
287
+ }
288
+
289
+ .error {
290
+ background: rgba(245, 101, 101, 0.2);
291
+ color: #fc8181;
292
+ padding: 15px;
293
+ border-radius: 10px;
294
+ margin: 20px 0;
295
+ text-align: center;
296
+ display: none;
297
+ border: 1px solid rgba(245, 101, 101, 0.3);
298
+ }
299
+
300
+ .info-panel {
301
+ background: rgba(26, 32, 44, 0.6);
302
+ border-radius: 10px;
303
+ padding: 20px;
304
+ margin-bottom: 20px;
305
+ border: 1px solid #4a5568;
306
+ }
307
+
308
+ .info-panel h4 {
309
+ color: #60a5fa;
310
+ margin-bottom: 10px;
311
+ }
312
+
313
+ .info-panel p {
314
+ color: #a0aec0;
315
+ margin: 5px 0;
316
+ font-size: 0.9em;
317
+ }
318
+
319
+ @media (max-width: 768px) {
320
+ .upload-section {
321
+ grid-template-columns: 1fr;
322
+ }
323
+
324
+ .images-container {
325
+ grid-template-columns: 1fr;
326
+ }
327
+
328
+ .controls {
329
+ flex-direction: column;
330
+ align-items: center;
331
+ }
332
+
333
+ .threshold-slider {
334
+ width: 200px;
335
+ }
336
+ }
337
  </style>
338
+ ```
339
+
340
  </head>
341
  <body>
342
+ <div class="container">
343
+ <h1>I-JEPA Patch Correspondence Analyzer</h1>
344
+ <p class="subtitle">Upload two images to analyze cross-patch correspondences using I-JEPA embeddings</p>
345
+
346
+ ```
347
+ <div class="upload-section">
348
+ <div class="upload-box" id="upload1">
349
+ <input type="file" class="upload-input" accept="image/*" id="file1">
350
+ <div class="upload-content">
351
+ <div class="upload-icon">🖼️</div>
352
+ <div class="upload-text">Upload Image 1</div>
353
+ <div class="upload-hint">Click or drag image here</div>
354
+ </div>
355
+ </div>
356
+
357
+ <div class="upload-box" id="upload2">
358
+ <input type="file" class="upload-input" accept="image/*" id="file2">
359
+ <div class="upload-content">
360
+ <div class="upload-icon">🖼️</div>
361
+ <div class="upload-text">Upload Image 2</div>
362
+ <div class="upload-hint">Click or drag image here</div>
363
+ </div>
364
+ </div>
365
+ </div>
366
+
367
+ <div class="controls">
368
+ <button class="btn btn-primary" id="analyzeBtn" disabled>
369
+ 🔍 Analyze Cross-Patch Correspondences
370
+ </button>
371
+ <button class="btn btn-secondary" id="clearBtn">
372
+ 🗑️ Clear Images
373
+ </button>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
374
  </div>
 
375
 
376
+ <div class="error" id="errorMsg"></div>
377
+
378
+ <div class="loading" id="loading">
379
+ <div class="spinner"></div>
380
+ <p>Loading I-JEPA model and analyzing images...</p>
381
+ <p><small>Using onnx-community/ijepa_vith14_1k for optimal browser performance</small></p>
382
  </div>
383
+
384
+ <div class="results" id="results">
385
+ <div class="info-panel">
386
+ <h4>How to Use:</h4>
387
+ <p>• Hover over any patch in either image to see its corresponding patches in the other image</p>
388
+ <p>• Adjust the similarity threshold to show more or fewer correspondences</p>
389
+ <p>• Blue outline shows the patch you're hovering over</p>
390
+ <p>• Colored patches show corresponding regions based on I-JEPA embeddings</p>
391
+ </div>
392
+
393
+ <div class="visualization">
394
+ <div class="similarity-threshold">
395
+ <label>Similarity Threshold: </label>
396
+ <input type="range" class="threshold-slider" id="thresholdSlider"
397
+ min="0" max="1" step="0.01" value="0.7">
398
+ <span id="thresholdValue">0.70</span>
399
+ </div>
400
+
401
+ <div class="images-container">
402
+ <div class="image-analysis">
403
+ <h3>Image 1</h3>
404
+ <div class="canvas-container">
405
+ <canvas id="canvas1" class="analysis-canvas"></canvas>
406
+ </div>
407
+ </div>
408
+
409
+ <div class="image-analysis">
410
+ <h3>Image 2</h3>
411
+ <div class="canvas-container">
412
+ <canvas id="canvas2" class="analysis-canvas"></canvas>
413
+ </div>
414
+ </div>
415
+ </div>
416
+
417
+ <div class="stats">
418
+ <div class="stat-card">
419
+ <div class="stat-value" id="totalPatches">0</div>
420
+ <div class="stat-label">Patches per Image</div>
421
+ </div>
422
+ <div class="stat-card">
423
+ <div class="stat-value" id="strongCorrespondences">0</div>
424
+ <div class="stat-label">Strong Correspondences</div>
425
+ </div>
426
+ <div class="stat-card">
427
+ <div class="stat-value" id="avgSimilarity">0.00</div>
428
+ <div class="stat-label">Average Cross-Similarity</div>
429
+ </div>
430
+ <div class="stat-card">
431
+ <div class="stat-value" id="maxSimilarity">0.00</div>
432
+ <div class="stat-label">Maximum Similarity</div>
433
+ </div>
434
+ </div>
435
+ </div>
436
  </div>
437
+ </div>
438
+
439
+ <script type="module">
440
+ import { pipeline, RawImage, matmul } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.7.2";
441
+
442
+ // Configuration
443
+ const MODEL_ID = "onnx-community/ijepa_vith14_1k";
444
+ const SUPPORTED_RESOLUTIONS = [224, 336, 448];
445
+ const MAX_PIXELS = 2097152; // 2MP limit for performance
446
+
447
+ // DOM elements
448
+ const file1Input = document.getElementById('file1');
449
+ const file2Input = document.getElementById('file2');
450
+ const upload1 = document.getElementById('upload1');
451
+ const upload2 = document.getElementById('upload2');
452
+ const analyzeBtn = document.getElementById('analyzeBtn');
453
+ const clearBtn = document.getElementById('clearBtn');
454
+ const loading = document.getElementById('loading');
455
+ const results = document.getElementById('results');
456
+ const errorMsg = document.getElementById('errorMsg');
457
+ const thresholdSlider = document.getElementById('thresholdSlider');
458
+ const thresholdValue = document.getElementById('thresholdValue');
459
+ const canvas1 = document.getElementById('canvas1');
460
+ const canvas2 = document.getElementById('canvas2');
461
+ const ctx1 = canvas1.getContext('2d');
462
+ const ctx2 = canvas2.getContext('2d');
463
+
464
+ // State
465
+ let extractor = null;
466
+ let image1Data = null;
467
+ let image2Data = null;
468
+ let features1 = null;
469
+ let features2 = null;
470
+ let crossSimilarities = null;
471
+ let patchesPerRow = 0;
472
+ let originalImages = { img1: null, img2: null };
473
+ let imageCropParams = { img1: null, img2: null };
474
+
475
+ // Utility functions
476
+ function showError(message) {
477
+ errorMsg.textContent = message;
478
+ errorMsg.style.display = 'block';
479
+ setTimeout(() => {
480
+ errorMsg.style.display = 'none';
481
+ }, 5000);
482
+ }
483
+
484
+ function showLoading(show) {
485
+ loading.style.display = show ? 'block' : 'none';
486
+ analyzeBtn.disabled = show;
487
+ }
488
+
489
+ function showResults(show) {
490
+ results.style.display = show ? 'block' : 'none';
491
+ }
492
+
493
+ function updateAnalyzeButton() {
494
+ analyzeBtn.disabled = !image1Data || !image2Data || !extractor;
495
+ }
496
+
497
+ function findClosestSupportedResolution(targetDim) {
498
+ return SUPPORTED_RESOLUTIONS.reduce((prev, curr) =>
499
+ Math.abs(curr - targetDim) < Math.abs(prev - targetDim) ? curr : prev
500
+ );
501
+ }
502
+
503
+ // Initialize model
504
+ async function initializeModel() {
505
+ try {
506
+ showLoading(true);
507
+ const isWebGpuSupported = !!navigator.gpu;
508
+ const device = isWebGpuSupported ? "webgpu" : "wasm";
509
+ const dtype = isWebGpuSupported ? "q4" : "q8";
510
+
511
+ console.log(`Loading I-JEPA model with ${device.toUpperCase()}...`);
512
+ extractor = await pipeline("image-feature-extraction", MODEL_ID, { device, dtype });
513
+
514
+ // Disable automatic resizing - we'll handle it ourselves
515
+ if (extractor?.processor?.image_processor) {
516
+ extractor.processor.image_processor.do_resize = false;
517
+ }
518
+
519
+ console.log('Model loaded successfully');
520
+ updateAnalyzeButton();
521
+ showLoading(false);
522
+ return true;
523
+ } catch (error) {
524
+ console.error('Error loading model:', error);
525
+ showError('Failed to load I-JEPA model. Please refresh and try again.');
526
+ showLoading(false);
527
+ return false;
528
+ }
529
+ }
530
+
531
+ // Process image to canvas
532
+ function processImageToCanvas(file, canvas, ctx, imageKey) {
533
+ return new Promise((resolve, reject) => {
534
+ const img = new Image();
535
+ img.onload = () => {
536
+ const { naturalWidth: w, naturalHeight: h } = img;
537
+
538
+ // Crop to square from center
539
+ const cropSize = Math.min(w, h);
540
+ const sx = (w - cropSize) / 2;
541
+ const sy = (h - cropSize) / 2;
542
+ imageCropParams[imageKey] = { sx, sy, sWidth: cropSize, sHeight: cropSize };
543
+
544
+ // Find optimal resolution
545
+ let scaledCropSize = cropSize;
546
+ if (scaledCropSize * scaledCropSize > MAX_PIXELS) {
547
+ scaledCropSize = Math.sqrt(MAX_PIXELS);
548
+ }
549
+ const chosenResolution = findClosestSupportedResolution(scaledCropSize);
550
+
551
+ // Set canvas size and draw
552
+ canvas.width = chosenResolution;
553
+ canvas.height = chosenResolution;
554
+
555
+ ctx.drawImage(
556
+ img,
557
+ sx, sy, cropSize, cropSize,
558
+ 0, 0, chosenResolution, chosenResolution
559
+ );
560
+
561
+ originalImages[imageKey] = img;
562
+ resolve(chosenResolution);
563
+ };
564
+ img.onerror = reject;
565
+ img.src = URL.createObjectURL(file);
566
+ });
567
+ }
568
+
569
+ // File upload handling
570
+ function handleFileUpload(fileInput, uploadBox, imageKey, canvasId) {
571
+ const file = fileInput.files[0];
572
+ if (!file) return;
573
+
574
+ const canvas = document.getElementById(canvasId);
575
+ const ctx = canvas.getContext('2d');
576
+
577
+ processImageToCanvas(file, canvas, ctx, imageKey)
578
+ .then(() => {
579
+ // Store image data
580
+ if (imageKey === 'img1') {
581
+ image1Data = file;
582
+ } else {
583
+ image2Data = file;
584
+ }
585
+
586
+ // Update UI
587
+ uploadBox.classList.add('has-image');
588
+ const content = uploadBox.querySelector('.upload-content');
589
+ content.innerHTML = `
590
+ <img src="${URL.createObjectURL(file)}" class="preview-image" alt="Preview">
591
+ <div style="margin-top: 10px; color: #48bb78; font-weight: 600;">✓ Image loaded</div>
592
+ `;
593
+
594
+ updateAnalyzeButton();
595
+ })
596
+ .catch(error => {
597
+ console.error('Error processing image:', error);
598
+ showError('Failed to process image. Please try a different file.');
599
+ });
600
+ }
601
+
602
+ // Extract features from canvas
603
+ async function extractFeatures(canvas) {
604
+ try {
605
+ const imageData = await RawImage.fromCanvas(canvas);
606
+ const features = await extractor(imageData, { pooling: "none" });
607
+
608
+ // Remove CLS token (first token)
609
+ const totalTokens = features.dims[1];
610
+ const nPatches = totalTokens - 1;
611
+ const patchFeatures = features.slice(null, [1, nPatches]);
612
+
613
+ // Calculate patches per row
614
+ const patchesPerRowCalc = Math.round(Math.sqrt(nPatches));
615
+ if (patchesPerRowCalc * patchesPerRowCalc !== nPatches) {
616
+ console.warn("Patch count is not a perfect square:", nPatches);
617
+ }
618
+
619
+ return { features: patchFeatures, patchesPerRow: patchesPerRowCalc };
620
+ } catch (error) {
621
+ console.error('Error extracting features:', error);
622
+ throw error;
623
+ }
624
+ }
625
+
626
+ // Calculate cross-similarities between two images
627
+ async function calculateCrossSimilarities(features1, features2) {
628
+ try {
629
+ // Normalize features
630
+ const normalized1 = features1.normalize(2, -1);
631
+ const normalized2 = features2.normalize(2, -1);
632
+
633
+ // Calculate cross-similarity matrix: img1_patches x img2_patches
634
+ const similarities = await matmul(normalized1, normalized2.permute(0, 2, 1));
635
+
636
+ return (await similarities.tolist())[0];
637
+ } catch (error) {
638
+ console.error('Error calculating similarities:', error);
639
+ throw error;
640
+ }
641
+ }
642
+
643
+ // Redraw original image on canvas
644
+ function redrawOriginalImage(canvas, ctx, imageKey) {
645
+ if (!originalImages[imageKey] || !imageCropParams[imageKey]) return;
646
+
647
+ const img = originalImages[imageKey];
648
+ const params = imageCropParams[imageKey];
649
+
650
+ ctx.drawImage(
651
+ img,
652
+ params.sx, params.sy, params.sWidth, params.sHeight,
653
+ 0, 0, canvas.width, canvas.height
654
+ );
655
+ }
656
+
657
+ // Color mapping for similarity visualization
658
+ const INFERNO_COLORMAP = [
659
+ [0.0, [0,0,4]], [0.1, [39,12,69]], [0.2, [84,15,104]], [0.3, [128,31,103]], [0.4, [170,48,88]],
660
+ [0.5, [209,70,68]], [0.6, [240,97,47]], [0.7, [253,138,28]], [0.8, [252,185,26]], [0.9, [240,231,56]], [1.0, [252,255,160]]
661
+ ];
662
+
663
+ function getInfernoColor(t) {
664
+ for (let i = 1; i < INFERNO_COLORMAP.length; i++) {
665
+ const [tp, cp] = INFERNO_COLORMAP[i-1];
666
+ const [tc, cc] = INFERNO_COLORMAP[i];
667
+ if (t <= tc) {
668
+ const a = (t - tp) / (tc - tp);
669
+ const r = cp[0] + a * (cc[0] - cp[0]);
670
+ const g = cp[1] + a * (cc[1] - cp[1]);
671
+ const b = cp[2] + a * (cc[2] - cp[2]);
672
+ return `rgb(${Math.round(r)}, ${Math.round(g)}, ${Math.round(b)})`;
673
+ }
674
+ }
675
+ const last = INFERNO_COLORMAP[INFERNO_COLORMAP.length-1][1];
676
+ return `rgb(${last.join(",")})`;
677
+ }
678
+
679
+ // Draw highlights on canvas
680
+ function drawHighlights(canvas, ctx, imageKey, queryPatchIndex, isQueryImage) {
681
+ if (!crossSimilarities || !patchesPerRow) return;
682
+
683
+ const patchSize = canvas.width / patchesPerRow;
684
+ const threshold = parseFloat(thresholdSlider.value);
685
+
686
+ // Redraw original image
687
+ redrawOriginalImage(canvas, ctx, imageKey);
688
+
689
+ if (isQueryImage) {
690
+ // Draw query patch highlight
691
+ const qy = Math.floor(queryPatchIndex / patchesPerRow);
692
+ const qx = queryPatchIndex % patchesPerRow;
693
+
694
+ ctx.strokeStyle = "#60a5fa";
695
+ ctx.lineWidth = 3;
696
+ ctx.strokeRect(qx * patchSize, qy * patchSize, patchSize, patchSize);
697
+ } else {
698
+ // Draw corresponding patches
699
+ const similarities = crossSimilarities[queryPatchIndex] || [];
700
+ const maxSim = Math.max(...similarities);
701
+ const minSim = Math.min(...similarities);
702
+ const range = maxSim - minSim;
703
+
704
+ for (let i = 0; i < similarities.length; i++) {
705
+ const sim = similarities[i];
706
+ if (sim >= threshold) {
707
+ const py = Math.floor(i / patchesPerRow);
708
+ const px = i % patchesPerRow;
709
+
710
+ // Normalize similarity for color mapping
711
+ const normalizedSim = range > 1e-4 ? (sim - minSim) / range : 1;
712
+ const alpha = Math.pow(normalizedSim, 2) * 0.8;
713
+
714
+ ctx.fillStyle = `rgba(96, 165, 250, ${alpha})`;
715
+ ctx.fillRect(px * patchSize, py * patchSize, patchSize, patchSize);
716
+ }
717
+ }
718
+ }
719
+ }
720
+
721
+ // Clear highlights
722
+ function clearHighlights() {
723
+ redrawOriginalImage(canvas1, ctx1, 'img1');
724
+ redrawOriginalImage(canvas2, ctx2, 'img2');
725
+ }
726
+
727
+ // Mouse event handlers
728
+ function handleMouseMove(canvas, imageKey, isImage1) {
729
+ return function(event) {
730
+ if (!crossSimilarities || !patchesPerRow) return;
731
+
732
+ const rect = canvas.getBoundingClientRect();
733
+ const scaleX = canvas.width / rect.width;
734
+ const scaleY = canvas.height / rect.height;
735
+ const x = (event.clientX - rect.left) * scaleX;
736
+ const y = (event.clientY - rect.top) * scaleY;
737
+
738
+ if (x < 0 || x >= canvas.width || y < 0 || y >= canvas.height) return;
739
+
740
+ const patchSize = canvas.width / patchesPerRow;
741
+ const patchX = Math.floor(x / patchSize);
742
+ const patchY = Math.floor(y / patchSize);
743
+ const patchIndex = patchY * patchesPerRow + patchX;
744
+
745
+ if (patchIndex < 0 || patchIndex >= patchesPerRow * patchesPerRow) return;
746
+
747
+ // Draw highlights on both canvases
748
+ drawHighlights(canvas1, ctx1, 'img1', patchIndex, isImage1);
749
+ drawHighlights(canvas2, ctx2, 'img2', patchIndex, !isImage1);
750
+ };
751
+ }
752
+
753
+ // Update statistics
754
+ function updateStatistics() {
755
+ if (!crossSimilarities) return;
756
+
757
+ const threshold = parseFloat(thresholdSlider.value);
758
+ const totalPatches = patchesPerRow * patchesPerRow;
759
+
760
+ let strongCorrespondences = 0;
761
+ let totalSimilarity = 0;
762
+ let maxSim = 0;
763
+ let count = 0;
764
+
765
+ for (let i = 0; i < crossSimilarities.length; i++) {
766
+ for (let j = 0; j < crossSimilarities[i].length; j++) {
767
+ const sim = crossSimilarities[i][j];
768
+ totalSimilarity += sim;
769
+ maxSim = Math.max(maxSim, sim);
770
+ count++;
771
+
772
+ if (sim >= threshold) {
773
+ strongCorrespondences++;
774
+ }
775
+ }
776
+ }
777
+
778
+ document.getElementById('totalPatches').textContent = totalPatches;
779
+ document.getElementById('strongCorrespondences').textContent = strongCorrespondences;
780
+ document.getElementById('avgSimilarity').textContent = (totalSimilarity / count).toFixed(3);
781
+ document.getElementById('maxSimilarity').textContent = maxSim.toFixed(3);
782
+ }
783
+
784
+ // Event listeners
785
+ file1Input.addEventListener('change', () => handleFileUpload(file1Input, upload1, 'img1', 'canvas1'));
786
+ file2Input.addEventListener('change', () => handleFileUpload(file2Input, upload2, 'img2', 'canvas2'));
787
+
788
+ clearBtn.addEventListener('click', () => {
789
+ // Reset all data
790
+ image1Data = null;
791
+ image2Data = null;
792
+ features1 = null;
793
+ features2 = null;
794
+ crossSimilarities = null;
795
+ patchesPerRow = 0;
796
+ originalImages = { img1: null, img2: null };
797
+ imageCropParams = { img1: null, img2: null };
798
+
799
+ // Reset UI
800
+ file1Input.value = '';
801
+ file2Input.value = '';
802
+ upload1.classList.remove('has-image');
803
+ upload2.classList.remove('has-image');
804
+
805
+ upload1.querySelector('.upload-content').innerHTML = `
806
+ <div class="upload-icon">🖼️</div>
807
+ <div class="upload-text">Upload Image 1</div>
808
+ <div class="upload-hint">Click or drag image here</div>
809
+ `;
810
+
811
+ upload2.querySelector('.upload-content').innerHTML = `
812
+ <div class="upload-icon">🖼️</div>
813
+ <div class="upload-text">Upload Image 2</div>
814
+ <div class="upload-hint">Click or drag image here</div>
815
+ `;
816
+
817
+ // Clear canvases
818
+ ctx1.clearRect(0, 0, canvas1.width, canvas1.height);
819
+ ctx2.clearRect(0, 0, canvas2.width, canvas2.height);
820
+
821
+ showResults(false);
822
+ updateAnalyzeButton();
823
+ });
824
+
825
+ thresholdSlider.addEventListener('input', () => {
826
+ const threshold = parseFloat(thresholdSlider.value);
827
+ thresholdValue.textContent = threshold.toFixed(2);
828
+ updateStatistics();
829
+ });
830
+
831
+ // Main analysis function
832
+ analyzeBtn.addEventListener('click', async () => {
833
+ if (!image1Data || !image2Data || !extractor) return;
834
+
835
+ showLoading(true);
836
+ showResults(false);
837
+
838
+ try {
839
+ console.log('Extracting features from both images...');
840
+
841
+ // Extract features from both images
842
+ const result1 = await extractFeatures(canvas1);
843
+ const result2 = await extractFeatures(canvas2);
844
+
845
+ features1 = result1.features;
846
+ features2 = result2.features;
847
+ patchesPerRow = result1.patchesPerRow;
848
+
849
+ console.log(`Patch grid: ${patchesPerRow}x${patchesPerRow} patches per image`);
850
+
851
+ // Calculate cross-similarities
852
+ console.log('Calculating cross-similarities...');
853
+ crossSimilarities = await calculateCrossSimilarities(features1, features2);
854
+
855
+ // Set up mouse event listeners
856
+ canvas1.addEventListener('mousemove', handleMouseMove(canvas1, 'img1', true));
857
+ canvas1.addEventListener('mouseleave', clearHighlights);
858
+ canvas2.addEventListener('mousemove', handleMouseMove(canvas2, 'img2', false));
859
+ canvas2.addEventListener('mouseleave', clearHighlights);
860
+
861
+ // Update statistics
862
+ updateStatistics();
863
+
864
+ // Show results
865
+ showResults(true);
866
+ showLoading(false);
867
+
868
+ console.log('Analysis complete!');
869
+
870
+ } catch (error) {
871
+ console.error('Analysis error:', error);
872
+ showError('Failed to analyze images. Please try again with different images.');
873
+ showLoading(false);
874
+ }
875
+ });
876
+
877
+ // Drag and drop support
878
+ ['upload1', 'upload2'].forEach((id, index) => {
879
+ const uploadBox = document.getElementById(id);
880
+ const fileInput = document.getElementById(`file${index + 1}`);
881
+
882
+ uploadBox.addEventListener('dragover', (e) => {
883
+ e.preventDefault();
884
+ uploadBox.style.borderColor = '#60a5fa';
885
+ });
886
+
887
+ uploadBox.addEventListener('dragleave', (e) => {
888
+ e.preventDefault();
889
+ uploadBox.style.borderColor = '#4a5568';
890
+ });
891
+
892
+ uploadBox.addEventListener('drop', (e) => {
893
+ e.preventDefault();
894
+ uploadBox.style.borderColor = '#4a5568';
895
+
896
+ const files = e.dataTransfer.files;
897
+ if (files.length > 0 && files[0].type.startsWith('image/')) {
898
+ fileInput.files = files;
899
+ const imageKey = index === 0 ? 'img1' : 'img2';
900
+ const canvasId = index === 0 ? 'canvas1' : 'canvas2';
901
+ handleFileUpload(fileInput, uploadBox, imageKey, canvasId);
902
+ }
903
+ });
904
+ });
905
+
906
+ // Initialize on load
907
+ window.addEventListener('load', () => {
908
+ console.log('Initializing I-JEPA Patch Correspondence Analyzer...');
909
+ initializeModel();
910
+ });
911
  </script>
912
+ ```
913
+
914
  </body>
915
  </html>