lorien-danger commited on
Commit
bc892ce
·
verified ·
1 Parent(s): 48416d0

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +424 -811
index.html CHANGED
@@ -1,824 +1,437 @@
1
  <!DOCTYPE html>
2
-
3
  <html lang="en">
4
  <head>
5
- <meta charset="UTF-8">
6
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
- <title>I-JEPA Patch Correspondence Analyzer</title>
8
- <style>
9
- body {
10
- font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
11
- margin: 0;
12
- padding: 20px;
13
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
14
- min-height: 100vh;
15
- color: #333;
16
- }
17
-
18
- ```
19
- .container {
20
- max-width: 1400px;
21
- margin: 0 auto;
22
- background: rgba(255, 255, 255, 0.95);
23
- border-radius: 20px;
24
- padding: 30px;
25
- box-shadow: 0 20px 40px rgba(0, 0, 0, 0.1);
26
- }
27
-
28
- h1 {
29
- text-align: center;
30
- color: #4a5568;
31
- margin-bottom: 10px;
32
- font-size: 2.5em;
33
- font-weight: 300;
34
- }
35
-
36
- .subtitle {
37
- text-align: center;
38
- color: #718096;
39
- margin-bottom: 30px;
40
- font-size: 1.1em;
41
- }
42
-
43
- .upload-section {
44
- display: grid;
45
- grid-template-columns: 1fr 1fr;
46
- gap: 30px;
47
- margin-bottom: 30px;
48
- }
49
-
50
- .upload-box {
51
- border: 3px dashed #cbd5e0;
52
- border-radius: 15px;
53
- padding: 40px;
54
- text-align: center;
55
- transition: all 0.3s ease;
56
- background: #f7fafc;
57
- position: relative;
58
- overflow: hidden;
59
- }
60
-
61
- .upload-box:hover {
62
- border-color: #667eea;
63
- background: #edf2f7;
64
- transform: translateY(-2px);
65
- }
66
-
67
- .upload-box.has-image {
68
- border-color: #48bb78;
69
- background: #f0fff4;
70
- }
71
-
72
- .upload-input {
73
- position: absolute;
74
- top: 0;
75
- left: 0;
76
- width: 100%;
77
- height: 100%;
78
- opacity: 0;
79
- cursor: pointer;
80
- }
81
-
82
- .upload-content {
83
- pointer-events: none;
84
- }
85
-
86
- .upload-icon {
87
- font-size: 3em;
88
- margin-bottom: 15px;
89
- color: #a0aec0;
90
- }
91
-
92
- .upload-text {
93
- font-size: 1.1em;
94
- color: #4a5568;
95
- margin-bottom: 10px;
96
- }
97
-
98
- .upload-hint {
99
- font-size: 0.9em;
100
- color: #718096;
101
- }
102
-
103
- .preview-image {
104
- max-width: 100%;
105
- max-height: 200px;
106
- border-radius: 10px;
107
- margin-top: 15px;
108
- box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
109
- }
110
-
111
- .controls {
112
- display: flex;
113
- justify-content: center;
114
- gap: 20px;
115
- margin-bottom: 30px;
116
- flex-wrap: wrap;
117
- }
118
-
119
- .btn {
120
- padding: 12px 30px;
121
- border: none;
122
- border-radius: 25px;
123
- cursor: pointer;
124
- font-size: 1em;
125
- font-weight: 600;
126
- transition: all 0.3s ease;
127
- text-transform: uppercase;
128
- letter-spacing: 1px;
129
- }
130
-
131
- .btn-primary {
132
- background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
133
- color: white;
134
- }
135
-
136
- .btn-primary:hover {
137
- transform: translateY(-2px);
138
- box-shadow: 0 8px 20px rgba(102, 126, 234, 0.4);
139
- }
140
-
141
- .btn-secondary {
142
- background: #e2e8f0;
143
- color: #4a5568;
144
- }
145
-
146
- .btn-secondary:hover {
147
- background: #cbd5e0;
148
- transform: translateY(-2px);
149
- }
150
-
151
- .btn:disabled {
152
- background: #e2e8f0;
153
- color: #a0aec0;
154
- cursor: not-allowed;
155
- transform: none;
156
- }
157
-
158
- .loading {
159
- text-align: center;
160
- padding: 40px;
161
- display: none;
162
- }
163
-
164
- .spinner {
165
- width: 50px;
166
- height: 50px;
167
- border: 4px solid #e2e8f0;
168
- border-top: 4px solid #667eea;
169
- border-radius: 50%;
170
- animation: spin 1s linear infinite;
171
- margin: 0 auto 20px;
172
- }
173
-
174
- @keyframes spin {
175
- 0% { transform: rotate(0deg); }
176
- 100% { transform: rotate(360deg); }
177
- }
178
-
179
- .results {
180
- display: none;
181
- }
182
-
183
- .visualization {
184
- background: white;
185
- border-radius: 15px;
186
- padding: 20px;
187
- margin-bottom: 20px;
188
- box-shadow: 0 4px 12px rgba(0, 0, 0, 0.05);
189
- }
190
-
191
- .images-container {
192
- display: grid;
193
- grid-template-columns: 1fr 1fr;
194
- gap: 30px;
195
- margin-bottom: 30px;
196
- }
197
-
198
- .image-analysis {
199
- text-align: center;
200
- }
201
-
202
- .analysis-image {
203
- max-width: 100%;
204
- height: 300px;
205
- object-fit: contain;
206
- border-radius: 10px;
207
- box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
208
- }
209
-
210
- .patch-grid {
211
- display: inline-block;
212
- position: relative;
213
- margin-top: 15px;
214
- }
215
-
216
- .patch {
217
- position: absolute;
218
- border: 2px solid transparent;
219
- cursor: pointer;
220
- transition: all 0.3s ease;
221
- }
222
-
223
- .patch:hover {
224
- border-color: #667eea;
225
- background: rgba(102, 126, 234, 0.2);
226
- z-index: 10;
227
- }
228
-
229
- .patch.highlighted {
230
- border-color: #e53e3e;
231
- background: rgba(229, 62, 62, 0.3);
232
- z-index: 20;
233
- }
234
-
235
- .patch.corresponding {
236
- border-color: #38a169;
237
- background: rgba(56, 161, 105, 0.3);
238
- z-index: 15;
239
- }
240
-
241
- .correspondences {
242
- margin-top: 20px;
243
- }
244
-
245
- .correspondence-line {
246
- position: absolute;
247
- height: 2px;
248
- background: linear-gradient(90deg, #e53e3e, #38a169);
249
- z-index: 5;
250
- opacity: 0.8;
251
- transform-origin: left center;
252
- }
253
-
254
- .stats {
255
- display: grid;
256
- grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
257
- gap: 15px;
258
- margin-top: 20px;
259
- }
260
-
261
- .stat-card {
262
- background: #f7fafc;
263
- padding: 20px;
264
- border-radius: 10px;
265
- text-align: center;
266
- border-left: 4px solid #667eea;
267
- }
268
-
269
- .stat-value {
270
- font-size: 2em;
271
- font-weight: bold;
272
- color: #4a5568;
273
- }
274
-
275
- .stat-label {
276
- color: #718096;
277
- margin-top: 5px;
278
- }
279
-
280
- .similarity-threshold {
281
- margin: 20px 0;
282
- text-align: center;
283
- }
284
-
285
- .threshold-slider {
286
- width: 300px;
287
- margin: 0 10px;
288
- }
289
-
290
- .error {
291
- background: #fed7d7;
292
- color: #c53030;
293
- padding: 15px;
294
- border-radius: 10px;
295
- margin: 20px 0;
296
- text-align: center;
297
- display: none;
298
- }
299
-
300
- @media (max-width: 768px) {
301
- .upload-section {
302
- grid-template-columns: 1fr;
303
- }
304
-
305
- .images-container {
306
- grid-template-columns: 1fr;
307
- }
308
-
309
- .controls {
310
- flex-direction: column;
311
- align-items: center;
312
- }
313
- }
314
  </style>
315
- ```
316
-
317
  </head>
318
  <body>
319
- <div class="container">
320
- <h1>I-JEPA Patch Correspondence Analyzer</h1>
321
- <p class="subtitle">Upload two images to analyze patch-level correspondences using I-JEPA embeddings</p>
322
-
323
- ```
324
- <div class="upload-section">
325
- <div class="upload-box" id="upload1">
326
- <input type="file" class="upload-input" accept="image/*" id="file1">
327
- <div class="upload-content">
328
- <div class="upload-icon">📸</div>
329
- <div class="upload-text">Upload Image 1</div>
330
- <div class="upload-hint">Click or drag image here</div>
331
- </div>
332
- </div>
333
-
334
- <div class="upload-box" id="upload2">
335
- <input type="file" class="upload-input" accept="image/*" id="file2">
336
- <div class="upload-content">
337
- <div class="upload-icon">📸</div>
338
- <div class="upload-text">Upload Image 2</div>
339
- <div class="upload-hint">Click or drag image here</div>
340
- </div>
341
- </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342
  </div>
 
343
 
344
- <div class="controls">
345
- <button class="btn btn-primary" id="analyzeBtn" disabled>
346
- 🔍 Analyze Patch Correspondences
347
- </button>
348
- <button class="btn btn-secondary" id="clearBtn">
349
- 🗑️ Clear Images
350
- </button>
351
- </div>
352
-
353
- <div class="error" id="errorMsg"></div>
354
-
355
- <div class="loading" id="loading">
356
- <div class="spinner"></div>
357
- <p>Loading I-JEPA model and analyzing images...</p>
358
- <p><small>This may take a moment on first load as the model downloads</small></p>
359
  </div>
360
-
361
- <div class="results" id="results">
362
- <div class="visualization">
363
- <div class="similarity-threshold">
364
- <label>Similarity Threshold: </label>
365
- <input type="range" class="threshold-slider" id="thresholdSlider"
366
- min="0" max="1" step="0.01" value="0.7">
367
- <span id="thresholdValue">0.70</span>
368
- </div>
369
-
370
- <div class="images-container">
371
- <div class="image-analysis">
372
- <h3>Image 1</h3>
373
- <div class="patch-grid" id="grid1">
374
- <img class="analysis-image" id="img1" alt="Image 1">
375
- </div>
376
- </div>
377
-
378
- <div class="image-analysis">
379
- <h3>Image 2</h3>
380
- <div class="patch-grid" id="grid2">
381
- <img class="analysis-image" id="img2" alt="Image 2">
382
- </div>
383
- </div>
384
- </div>
385
-
386
- <div class="stats">
387
- <div class="stat-card">
388
- <div class="stat-value" id="totalPatches">0</div>
389
- <div class="stat-label">Total Patches per Image</div>
390
- </div>
391
- <div class="stat-card">
392
- <div class="stat-value" id="strongCorrespondences">0</div>
393
- <div class="stat-label">Strong Correspondences</div>
394
- </div>
395
- <div class="stat-card">
396
- <div class="stat-value" id="avgSimilarity">0.00</div>
397
- <div class="stat-label">Average Similarity</div>
398
- </div>
399
- <div class="stat-card">
400
- <div class="stat-value" id="maxSimilarity">0.00</div>
401
- <div class="stat-label">Max Similarity</div>
402
- </div>
403
- </div>
404
- </div>
405
  </div>
406
- </div>
407
-
408
- <script type="module">
409
- import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.7.3';
410
-
411
- // Configure environment
412
- env.allowRemoteModels = true;
413
- env.allowLocalModels = false;
414
-
415
- let model = null;
416
- let processor = null;
417
- let image1Data = null;
418
- let image2Data = null;
419
- let patchEmbeddings1 = null;
420
- let patchEmbeddings2 = null;
421
- let similarities = null;
422
-
423
- // Model configuration - using smallest I-JEPA model
424
- const MODEL_ID = "facebook/ijepa_vith14_1k";
425
- const PATCH_SIZE = 14;
426
- const IMAGE_SIZE = 224;
427
-
428
- // DOM elements
429
- const file1Input = document.getElementById('file1');
430
- const file2Input = document.getElementById('file2');
431
- const upload1 = document.getElementById('upload1');
432
- const upload2 = document.getElementById('upload2');
433
- const analyzeBtn = document.getElementById('analyzeBtn');
434
- const clearBtn = document.getElementById('clearBtn');
435
- const loading = document.getElementById('loading');
436
- const results = document.getElementById('results');
437
- const errorMsg = document.getElementById('errorMsg');
438
- const thresholdSlider = document.getElementById('thresholdSlider');
439
- const thresholdValue = document.getElementById('thresholdValue');
440
-
441
- // Utility functions
442
- function showError(message) {
443
- errorMsg.textContent = message;
444
- errorMsg.style.display = 'block';
445
- setTimeout(() => {
446
- errorMsg.style.display = 'none';
447
- }, 5000);
448
- }
449
-
450
- function showLoading(show) {
451
- loading.style.display = show ? 'block' : 'none';
452
- analyzeBtn.disabled = show;
453
- }
454
-
455
- function showResults(show) {
456
- results.style.display = show ? 'block' : 'none';
457
- }
458
-
459
- function updateAnalyzeButton() {
460
- analyzeBtn.disabled = !image1Data || !image2Data;
461
- }
462
-
463
- // File handling
464
- function handleFileUpload(fileInput, uploadBox, imageData, imageNum) {
465
- const file = fileInput.files[0];
466
- if (!file) return;
467
-
468
- const reader = new FileReader();
469
- reader.onload = function(e) {
470
- const img = new Image();
471
- img.onload = function() {
472
- // Store image data
473
- if (imageNum === 1) {
474
- image1Data = {
475
- url: e.target.result,
476
- width: img.width,
477
- height: img.height
478
- };
479
- } else {
480
- image2Data = {
481
- url: e.target.result,
482
- width: img.width,
483
- height: img.height
484
- };
485
- }
486
-
487
- // Update UI
488
- uploadBox.classList.add('has-image');
489
- const content = uploadBox.querySelector('.upload-content');
490
- content.innerHTML = `
491
- <img src="${e.target.result}" class="preview-image" alt="Preview ${imageNum}">
492
- <div style="margin-top: 10px; color: #48bb78; font-weight: 600;">✓ Image ${imageNum} loaded</div>
493
- `;
494
-
495
- updateAnalyzeButton();
496
- };
497
- img.src = e.target.result;
498
- };
499
- reader.readAsDataURL(file);
500
- }
501
-
502
- // Event listeners
503
- file1Input.addEventListener('change', () => handleFileUpload(file1Input, upload1, image1Data, 1));
504
- file2Input.addEventListener('change', () => handleFileUpload(file2Input, upload2, image2Data, 2));
505
-
506
- clearBtn.addEventListener('click', () => {
507
- // Reset all data
508
- image1Data = null;
509
- image2Data = null;
510
- patchEmbeddings1 = null;
511
- patchEmbeddings2 = null;
512
- similarities = null;
513
-
514
- // Reset UI
515
- file1Input.value = '';
516
- file2Input.value = '';
517
- upload1.classList.remove('has-image');
518
- upload2.classList.remove('has-image');
519
-
520
- upload1.querySelector('.upload-content').innerHTML = `
521
- <div class="upload-icon">📸</div>
522
- <div class="upload-text">Upload Image 1</div>
523
- <div class="upload-hint">Click or drag image here</div>
524
- `;
525
-
526
- upload2.querySelector('.upload-content').innerHTML = `
527
- <div class="upload-icon">📸</div>
528
- <div class="upload-text">Upload Image 2</div>
529
- <div class="upload-hint">Click or drag image here</div>
530
- `;
531
-
532
- showResults(false);
533
- updateAnalyzeButton();
534
- });
535
-
536
- thresholdSlider.addEventListener('input', () => {
537
- const threshold = parseFloat(thresholdSlider.value);
538
- thresholdValue.textContent = threshold.toFixed(2);
539
- if (similarities) {
540
- updateVisualization();
541
- }
542
- });
543
-
544
- // Model loading and processing
545
- async function loadModel() {
546
- try {
547
- if (!model) {
548
- model = await pipeline('feature-extraction', MODEL_ID);
549
- }
550
- return true;
551
- } catch (error) {
552
- console.error('Error loading model:', error);
553
- showError('Failed to load I-JEPA model. Please check your connection and try again.');
554
- return false;
555
- }
556
- }
557
-
558
- // Cosine similarity calculation
559
- function cosineSimilarity(a, b) {
560
- let dotProduct = 0;
561
- let normA = 0;
562
- let normB = 0;
563
-
564
- for (let i = 0; i < a.length; i++) {
565
- dotProduct += a[i] * b[i];
566
- normA += a[i] * a[i];
567
- normB += b[i] * b[i];
568
- }
569
-
570
- return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
571
- }
572
-
573
- // Extract patch embeddings from I-JEPA
574
- async function extractPatchEmbeddings(imageUrl) {
575
- try {
576
- // Create image element
577
- const img = new Image();
578
- img.crossOrigin = 'anonymous';
579
-
580
- return new Promise((resolve, reject) => {
581
- img.onload = async () => {
582
- try {
583
- // Extract features using the model
584
- const features = await model(img);
585
-
586
- // The features tensor should be [1, num_patches + 1, hidden_size]
587
- // We skip the first token (CLS token) to get just patch tokens
588
- const patchTokens = features.slice(null, [1, null], null).squeeze(0);
589
-
590
- // Convert to regular arrays for easier processing
591
- const embeddings = [];
592
- for (let i = 0; i < patchTokens.dims[0]; i++) {
593
- const embedding = [];
594
- for (let j = 0; j < patchTokens.dims[1]; j++) {
595
- embedding.push(patchTokens.data[i * patchTokens.dims[1] + j]);
596
- }
597
- embeddings.push(embedding);
598
- }
599
-
600
- resolve(embeddings);
601
- } catch (error) {
602
- reject(error);
603
- }
604
- };
605
- img.onerror = reject;
606
- img.src = imageUrl;
607
- });
608
- } catch (error) {
609
- console.error('Error extracting embeddings:', error);
610
- throw error;
611
- }
612
- }
613
-
614
- // Calculate patch correspondences
615
- function calculateCorrespondences() {
616
- const numPatches1 = patchEmbeddings1.length;
617
- const numPatches2 = patchEmbeddings2.length;
618
-
619
- similarities = [];
620
-
621
- for (let i = 0; i < numPatches1; i++) {
622
- similarities[i] = [];
623
- for (let j = 0; j < numPatches2; j++) {
624
- const sim = cosineSimilarity(patchEmbeddings1[i], patchEmbeddings2[j]);
625
- similarities[i][j] = sim;
626
- }
627
- }
628
- }
629
-
630
- // Create patch grid visualization
631
- function createPatchGrid(container, imageUrl, imageNum) {
632
- container.innerHTML = '';
633
-
634
- const img = document.createElement('img');
635
- img.className = 'analysis-image';
636
- img.src = imageUrl;
637
- img.alt = `Image ${imageNum}`;
638
-
639
- img.onload = () => {
640
- const imgRect = img.getBoundingClientRect();
641
- const patchesPerSide = Math.floor(IMAGE_SIZE / PATCH_SIZE); // 16 patches per side for 224/14
642
- const patchWidth = imgRect.width / patchesPerSide;
643
- const patchHeight = imgRect.height / patchesPerSide;
644
-
645
- // Create patch overlay divs
646
- for (let row = 0; row < patchesPerSide; row++) {
647
- for (let col = 0; col < patchesPerSide; col++) {
648
- const patchIndex = row * patchesPerSide + col;
649
-
650
- const patch = document.createElement('div');
651
- patch.className = 'patch';
652
- patch.style.left = `${col * patchWidth}px`;
653
- patch.style.top = `${row * patchHeight}px`;
654
- patch.style.width = `${patchWidth}px`;
655
- patch.style.height = `${patchHeight}px`;
656
- patch.dataset.patchIndex = patchIndex;
657
- patch.dataset.imageNum = imageNum;
658
-
659
- patch.addEventListener('mouseenter', () => highlightCorrespondingPatches(patchIndex, imageNum));
660
- patch.addEventListener('mouseleave', () => clearHighlights());
661
-
662
- container.appendChild(patch);
663
- }
664
- }
665
- };
666
-
667
- container.appendChild(img);
668
- }
669
-
670
- // Highlight corresponding patches
671
- function highlightCorrespondingPatches(patchIndex, imageNum) {
672
- clearHighlights();
673
-
674
- const threshold = parseFloat(thresholdSlider.value);
675
-
676
- // Highlight the hovered patch
677
- const hoveredPatch = document.querySelector(`[data-patch-index="${patchIndex}"][data-image-num="${imageNum}"]`);
678
- if (hoveredPatch) {
679
- hoveredPatch.classList.add('highlighted');
680
- }
681
-
682
- // Find and highlight corresponding patches in the other image
683
- const otherImageNum = imageNum === 1 ? 2 : 1;
684
-
685
- if (similarities) {
686
- const sims = imageNum === 1 ? similarities[patchIndex] : similarities.map(row => row[patchIndex]);
687
-
688
- sims.forEach((sim, otherPatchIndex) => {
689
- if (sim >= threshold) {
690
- const correspondingPatch = document.querySelector(`[data-patch-index="${otherPatchIndex}"][data-image-num="${otherImageNum}"]`);
691
- if (correspondingPatch) {
692
- correspondingPatch.classList.add('corresponding');
693
- }
694
- }
695
- });
696
- }
697
- }
698
-
699
- // Clear all highlights
700
- function clearHighlights() {
701
- document.querySelectorAll('.patch.highlighted, .patch.corresponding').forEach(patch => {
702
- patch.classList.remove('highlighted', 'corresponding');
703
- });
704
- }
705
-
706
- // Update statistics
707
- function updateStatistics() {
708
- if (!similarities) return;
709
-
710
- const threshold = parseFloat(thresholdSlider.value);
711
- const totalPatches = similarities.length;
712
-
713
- let strongCorrespondences = 0;
714
- let totalSimilarity = 0;
715
- let maxSim = 0;
716
- let count = 0;
717
-
718
- for (let i = 0; i < similarities.length; i++) {
719
- for (let j = 0; j < similarities[i].length; j++) {
720
- const sim = similarities[i][j];
721
- totalSimilarity += sim;
722
- maxSim = Math.max(maxSim, sim);
723
- count++;
724
-
725
- if (sim >= threshold) {
726
- strongCorrespondences++;
727
- }
728
- }
729
- }
730
-
731
- document.getElementById('totalPatches').textContent = totalPatches;
732
- document.getElementById('strongCorrespondences').textContent = strongCorrespondences;
733
- document.getElementById('avgSimilarity').textContent = (totalSimilarity / count).toFixed(3);
734
- document.getElementById('maxSimilarity').textContent = maxSim.toFixed(3);
735
- }
736
-
737
- // Update visualization based on current threshold
738
- function updateVisualization() {
739
- updateStatistics();
740
- // Patch highlighting is handled by mouse events
741
- }
742
-
743
- // Main analysis function
744
- analyzeBtn.addEventListener('click', async () => {
745
- if (!image1Data || !image2Data) return;
746
-
747
- showLoading(true);
748
- showResults(false);
749
-
750
- try {
751
- // Load model if not already loaded
752
- const modelLoaded = await loadModel();
753
- if (!modelLoaded) {
754
- showLoading(false);
755
- return;
756
- }
757
-
758
- // Extract patch embeddings for both images
759
- patchEmbeddings1 = await extractPatchEmbeddings(image1Data.url);
760
- patchEmbeddings2 = await extractPatchEmbeddings(image2Data.url);
761
-
762
- // Calculate correspondences
763
- calculateCorrespondences();
764
-
765
- // Create visualizations
766
- const grid1 = document.getElementById('grid1');
767
- const grid2 = document.getElementById('grid2');
768
-
769
- createPatchGrid(grid1, image1Data.url, 1);
770
- createPatchGrid(grid2, image2Data.url, 2);
771
-
772
- // Update statistics
773
- updateStatistics();
774
-
775
- // Show results
776
- showResults(true);
777
- showLoading(false);
778
-
779
- } catch (error) {
780
- console.error('Analysis error:', error);
781
- showError('Failed to analyze images. Please try again with different images.');
782
- showLoading(false);
783
- }
784
- });
785
-
786
- // Drag and drop support
787
- ['upload1', 'upload2'].forEach((id, index) => {
788
- const uploadBox = document.getElementById(id);
789
- const fileInput = document.getElementById(`file${index + 1}`);
790
-
791
- uploadBox.addEventListener('dragover', (e) => {
792
- e.preventDefault();
793
- uploadBox.style.borderColor = '#667eea';
794
- uploadBox.style.background = '#edf2f7';
795
- });
796
-
797
- uploadBox.addEventListener('dragleave', (e) => {
798
- e.preventDefault();
799
- uploadBox.style.borderColor = '#cbd5e0';
800
- uploadBox.style.background = '#f7fafc';
801
- });
802
-
803
- uploadBox.addEventListener('drop', (e) => {
804
- e.preventDefault();
805
- uploadBox.style.borderColor = '#cbd5e0';
806
- uploadBox.style.background = '#f7fafc';
807
-
808
- const files = e.dataTransfer.files;
809
- if (files.length > 0 && files[0].type.startsWith('image/')) {
810
- fileInput.files = files;
811
- handleFileUpload(fileInput, uploadBox, null, index + 1);
812
- }
813
- });
814
- });
815
-
816
- // Initial setup
817
- console.log('I-JEPA Patch Correspondence Analyzer initialized');
818
- console.log(`Using model: ${MODEL_ID}`);
819
- console.log(`Patch size: ${PATCH_SIZE}x${PATCH_SIZE}, Image size: ${IMAGE_SIZE}x${IMAGE_SIZE}`);
820
  </script>
821
- ```
822
-
823
  </body>
824
  </html>
 
1
  <!DOCTYPE html>
 
2
  <html lang="en">
3
  <head>
4
+ <meta charset="utf-8" />
5
+ <meta name="viewport" content="width=device-width,initial-scale=1" />
6
+ <title>I-JEPA Patch Matching (Browser, ONNX)</title>
7
+ <style>
8
+ :root { --w: 256; --gap: 40; --bg: #0b0d10; --fg: #e8f0f2; --muted:#92a2aa; --accent:#7bdcff; }
9
+ html,body { height:100%; margin:0; background:var(--bg); color:var(--fg); font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial; }
10
+ header { padding:16px 18px; border-bottom:1px solid #1b2228; display:flex; gap:16px; align-items:center; flex-wrap:wrap;}
11
+ header h1 { font-size:16px; margin:0; font-weight:600; letter-spacing:.2px;}
12
+ header .pill { padding:6px 10px; border:1px solid #24313a; border-radius:999px; color:#cfe7ff; }
13
+ main { display:grid; grid-template-columns: 320px 1fr; gap:16px; height:calc(100% - 66px); }
14
+ aside { border-right:1px solid #1b2228; padding:16px; overflow:auto; }
15
+ section { padding:16px; overflow:auto;}
16
+ fieldset { border:1px solid #24313a; border-radius:10px; padding:12px; margin:0 0 12px 0;}
17
+ legend { padding:0 6px; color:#c0d1da; font-size:12px; }
18
+ label { display:block; font-size:12px; color:#a9bac4; margin:8px 0 4px;}
19
+ input[type="file"] { width:100%; }
20
+ .row { display:flex; gap:8px; align-items:center; flex-wrap:wrap;}
21
+ .row > * { flex:1; min-width:0; }
22
+ input[type="range"] { width:100%; }
23
+ button { background:#0f1418; color:var(--fg); border:1px solid #2a3945; padding:10px 12px; border-radius:8px; cursor:pointer;}
24
+ button:disabled { opacity:.6; cursor:not-allowed;}
25
+ small.muted { color:var(--muted); }
26
+ .canv-wrap { display:flex; align-items:center; justify-content:center; }
27
+ canvas { background:#0a0c0f; border:1px solid #1f2830; border-radius:10px; }
28
+ .status { font-family: ui-monospace, SFMono-Regular, Menlo, monospace; font-size:12px; color:#c5e3ff; white-space:pre-wrap; background:#0a0f13; border:1px solid #23313b; padding:8px; border-radius:8px; min-height:2.5em;}
29
+ .gridlabel { font-size:11px; color:#7e909b; }
30
+ .foot { color:#7a8b95; font-size:12px; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  </style>
 
 
32
  </head>
33
  <body>
34
+ <header>
35
+ <h1>I-JEPA Patch Matching (Transformers.js + ONNX)</h1>
36
+ <span class="pill">Model: onnx-community/ijepa_vith14_22k · dtype=q8</span>
37
+ </header>
38
+
39
+ <main>
40
+ <aside>
41
+ <fieldset>
42
+ <legend>Inputs</legend>
43
+ <label>Image A</label>
44
+ <input id="fileA" type="file" accept="image/*" />
45
+ <label>Image B</label>
46
+ <input id="fileB" type="file" accept="image/*" />
47
+ <div class="row" style="margin-top:10px;">
48
+ <button id="runBtn" disabled>Run patch matching</button>
49
+ <button id="clearBtn">Clear</button>
50
+ </div>
51
+ <small class="muted">Images are resized to 224×224 internally to match the model.</small>
52
+ </fieldset>
53
+
54
+ <fieldset>
55
+ <legend>Matching</legend>
56
+ <label>Top-K lines <span id="kVal" class="gridlabel"></span></label>
57
+ <input id="k" type="range" min="8" max="256" step="8" value="64" />
58
+ <label>Min similarity (cosine) <span id="thrVal" class="gridlabel"></span></label>
59
+ <input id="thr" type="range" min="0" max="100" step="1" value="40" />
60
+ <div class="row">
61
+ <label class="row" style="gap:6px;align-items:center;">
62
+ <input id="mutual" type="checkbox" checked />
63
+ Mutual nearest neighbors only
64
+ </label>
65
+ </div>
66
+ <div class="row">
67
+ <label class="row" style="gap:6px;align-items:center;">
68
+ <input id="showGrid" type="checkbox" />
69
+ Show 16×16 patch grid overlay
70
+ </label>
71
+ </div>
72
+ </fieldset>
73
+
74
+ <fieldset>
75
+ <legend>Runtime</legend>
76
+ <div class="row">
77
+ <label class="row" style="gap:6px;align-items:center;">
78
+ <input id="preferGPU" type="checkbox" />
79
+ Try WebGPU (if available)
80
+ </label>
81
+ </div>
82
+ <label>Quantization</label>
83
+ <select id="dtype">
84
+ <option value="q8" selected>q8 (smallest, default)</option>
85
+ <option value="fp32">fp32</option>
86
+ </select>
87
+ <label>Model repo</label>
88
+ <input id="modelId" type="text" value="onnx-community/ijepa_vith14_22k" />
89
+ <small class="muted">Patch size is 14; tokens map to a 16×16 grid. CLS token is dropped if present.</small>
90
+ </fieldset>
91
+
92
+ <fieldset>
93
+ <legend>Status</legend>
94
+ <div id="status" class="status">Idle.</div>
95
+ </fieldset>
96
+ <div class="foot">
97
+ Preprocess (from model card): resize 224, rescale 1/255, normalize mean=std=0.5. Patch size=14.
98
+ Outputs are per-patch hidden states; we build a full cosine similarity matrix.
99
  </div>
100
+ </aside>
101
 
102
+ <section>
103
+ <div class="canv-wrap">
104
+ <canvas id="viz" width="544" height="240" aria-label="Patch correspondence visualizer"></canvas>
 
 
 
 
 
 
 
 
 
 
 
 
105
  </div>
106
+ <div class="row" style="margin-top:8px;">
107
+ <small class="muted">Left = Image A (224×224). Right = Image B (224×224). Lines connect matched patch centers.</small>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  </div>
109
+ </section>
110
+ </main>
111
+
112
+ <!-- Transformers.js UMD (exposes window.transformers) -->
113
+ <script src="https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.7.3/dist/transformers.min.js"></script>
114
+
115
+ <script>
116
+ (async () => {
117
+ const status = (msg) => document.getElementById('status').textContent = msg;
118
+
119
+ // UI elements
120
+ const fileA = document.getElementById('fileA');
121
+ const fileB = document.getElementById('fileB');
122
+ const runBtn = document.getElementById('runBtn');
123
+ const clearBtn = document.getElementById('clearBtn');
124
+ const preferGPU = document.getElementById('preferGPU');
125
+ const dtypeSel = document.getElementById('dtype');
126
+ const modelIdInput = document.getElementById('modelId');
127
+ const kSlider = document.getElementById('k');
128
+ const thrSlider = document.getElementById('thr');
129
+ const kVal = document.getElementById('kVal');
130
+ const thrVal = document.getElementById('thrVal');
131
+ const mutualChk = document.getElementById('mutual');
132
+ const gridChk = document.getElementById('showGrid');
133
+
134
+ const W = 224, H = 224, PATCH = 14; // per config
135
+ const GRID = W / PATCH; // 16
136
+ const GAP = 96; // gap between images on the canvas
137
+
138
+ const cvs = document.getElementById('viz');
139
+ const ctx = cvs.getContext('2d');
140
+
141
+ const setSliders = () => {
142
+ kVal.textContent = `(${kSlider.value})`;
143
+ thrVal.textContent = `(${(thrSlider.value/100).toFixed(2)})`;
144
+ };
145
+ setSliders();
146
+ kSlider.addEventListener('input', setSliders);
147
+ thrSlider.addEventListener('input', setSliders);
148
+
149
+ // Enable buttons when both files chosen
150
+ const updateReady = () => runBtn.disabled = !(fileA.files?.[0] && fileB.files?.[0]);
151
+ fileA.addEventListener('change', updateReady);
152
+ fileB.addEventListener('change', updateReady);
153
+
154
+ clearBtn.onclick = () => {
155
+ fileA.value = ''; fileB.value = '';
156
+ runBtn.disabled = true;
157
+ ctx.clearRect(0,0,cvs.width,cvs.height);
158
+ status('Cleared.');
159
+ };
160
+
161
+ // Load Transformers.js and configure runtime
162
+ const { pipeline, env } = window.transformers;
163
+
164
+ // Configure ONNX Runtime Web assets and caching
165
+ env.backends.onnx.wasm.wasmPaths = "https://cdn.jsdelivr.net/npm/onnxruntime-web@1.19.2/dist/";
166
+ env.useBrowserCache = true;
167
+ env.allowRemoteModels = true;
168
+
169
+ // WebGPU hint (Transformers.js chooses the best available)
170
+ preferGPU.addEventListener('change', () => {
171
+ // Hint: runtime picks WebGPU automatically if available; keeping as a user toggle placeholder
172
+ status(preferGPU.checked ? 'Will try WebGPU where possible.' : 'Defaulting to WASM backend.');
173
+ });
174
+
175
+ // Helpers
176
+ const loadImageURL = (file) => new Promise((resolve, reject) => {
177
+ const url = URL.createObjectURL(file);
178
+ const img = new Image();
179
+ img.onload = () => resolve({ url, img });
180
+ img.onerror = reject;
181
+ img.crossOrigin = "anonymous";
182
+ img.src = url;
183
+ });
184
+
185
+ function drawSideBySide(imgA, imgB) {
186
+ // Clear
187
+ ctx.fillStyle = '#0a0c0f';
188
+ ctx.fillRect(0, 0, cvs.width, cvs.height);
189
+
190
+ // Draw A and B (padded)
191
+ const leftX = 8, topY = 8;
192
+ ctx.drawImage(imgA, leftX, topY, W, H);
193
+ const rightX = leftX + W + GAP;
194
+ ctx.drawImage(imgB, rightX, topY, W, H);
195
+
196
+ // Optional grid
197
+ if (gridChk.checked) {
198
+ ctx.strokeStyle = 'rgba(255,255,255,0.15)';
199
+ ctx.lineWidth = 1;
200
+ for (let i=1;i<GRID;i++) {
201
+ const xA = leftX + i*PATCH, xB = rightX + i*PATCH, y = topY + i*PATCH;
202
+ ctx.beginPath(); ctx.moveTo(xA, topY); ctx.lineTo(xA, topY+H); ctx.stroke();
203
+ ctx.beginPath(); ctx.moveTo(leftX, y); ctx.lineTo(leftX+W, y); ctx.stroke();
204
+ ctx.beginPath(); ctx.moveTo(xB, topY); ctx.lineTo(xB, topY+H); ctx.stroke();
205
+ ctx.beginPath(); ctx.moveTo(rightX, y); ctx.lineTo(rightX+W, y); ctx.stroke();
206
+ }
207
+ }
208
+
209
+ return { leftX, rightX, topY };
210
+ }
211
+
212
+ // Normalize a [L,D] flat Float32Array in-place to unit vectors per row
213
+ function rowNormalize(data, L, D) {
214
+ for (let i=0; i<L; i++) {
215
+ let sum=0.0, off = i*D;
216
+ for (let j=0; j<D; j++) { const v = data[off+j]; sum += v*v; }
217
+ const inv = 1.0 / Math.max(Math.sqrt(sum), 1e-12);
218
+ for (let j=0; j<D; j++) data[off+j] *= inv;
219
+ }
220
+ }
221
+
222
+ // Cosine sim matrix S (L1 x L2) = A_norm (L1 x D) * B_norm^T (D x L2)
223
+ function simMatrix(A, L1, B, L2, D) {
224
+ const S = new Float32Array(L1 * L2);
225
+ for (let i=0; i<L1; i++) {
226
+ const ai = i*D;
227
+ for (let j=0; j<L2; j++) {
228
+ const bj = j*D;
229
+ let acc = 0.0;
230
+ // unrolled loop could help but keep simple & correct
231
+ for (let d=0; d<D; d++) acc += A[ai+d] * B[bj+d];
232
+ S[i*L2 + j] = acc;
233
+ }
234
+ }
235
+ return S;
236
+ }
237
+
238
+ function argmaxPerRow(S, rows, cols) {
239
+ const idx = new Int32Array(rows);
240
+ const val = new Float32Array(rows);
241
+ for (let i=0; i<rows; i++) {
242
+ let bestV = -Infinity, bestJ = -1, off = i*cols;
243
+ for (let j=0; j<cols; j++) {
244
+ const v = S[off + j];
245
+ if (v > bestV) { bestV = v; bestJ = j; }
246
+ }
247
+ idx[i] = bestJ; val[i] = bestV;
248
+ }
249
+ return { idx, val };
250
+ }
251
+
252
+ function argmaxPerCol(S, rows, cols) {
253
+ const idx = new Int32Array(cols);
254
+ const val = new Float32Array(cols);
255
+ for (let j=0; j<cols; j++) {
256
+ let bestV = -Infinity, bestI = -1;
257
+ for (let i=0; i<rows; i++) {
258
+ const v = S[i*cols + j];
259
+ if (v > bestV) { bestV = v; bestI = i; }
260
+ }
261
+ idx[j] = bestI; val[j] = bestV;
262
+ }
263
+ return { idx, val };
264
+ }
265
+
266
+ function gridCenter(k) {
267
+ // k in [0, L). grid is row-major over 16x16.
268
+ const r = Math.floor(k / GRID);
269
+ const c = k % GRID;
270
+ return { r, c, cx: c*PATCH + PATCH/2, cy: r*PATCH + PATCH/2 };
271
+ }
272
+
273
+ function drawMatches(base, matches, L2, topK, thr, mutualOnly) {
274
+ const { leftX, rightX, topY } = base;
275
+ // Sort by similarity desc
276
+ matches.sort((a,b) => b.sim - a.sim);
277
+ const K = Math.min(topK, matches.length);
278
+
279
+ // Draw lines
280
+ for (let n=0, drawn=0; n<matches.length && drawn<K; n++) {
281
+ const m = matches[n];
282
+ if (m.sim < thr) continue;
283
+ if (mutualOnly && !m.mutual) continue;
284
+
285
+ const A = gridCenter(m.i);
286
+ const B = gridCenter(m.j);
287
+
288
+ const x1 = leftX + A.cx, y1 = topY + A.cy;
289
+ const x2 = rightX + B.cx, y2 = topY + B.cy;
290
+
291
+ // color by similarity (blue→cyan)
292
+ const t = Math.min(1, Math.max(0, (m.sim - thr) / (1 - thr)));
293
+ const r = Math.floor(60 + 40*t);
294
+ const g = Math.floor(200 + 30*t);
295
+ const b = Math.floor(255);
296
+ ctx.strokeStyle = `rgba(${r},${g},${b},${0.85})`;
297
+ ctx.lineWidth = 1.25;
298
+
299
+ ctx.beginPath();
300
+ ctx.moveTo(x1, y1);
301
+ ctx.lineTo(x2, y2);
302
+ ctx.stroke();
303
+
304
+ drawn++;
305
+ }
306
+ }
307
+
308
+ // Extract per-patch tokens as a flat Float32Array [L,D]
309
+ function tokens2D(tensor) {
310
+ // Expect dims [B, L, D] OR [L, D]
311
+ const dims = tensor.dims;
312
+ let L, D, data = tensor.data;
313
+ if (dims.length === 3) {
314
+ L = dims[1]; D = dims[2];
315
+ } else if (dims.length === 2) {
316
+ L = dims[0]; D = dims[1];
317
+ } else {
318
+ throw new Error(`Unexpected tensor shape: [${dims.join(',')}]`);
319
+ }
320
+
321
+ // If CLS present, drop first token to get a perfect square (16x16)
322
+ const isSquare = (n) => Number.isInteger(Math.sqrt(n));
323
+ if (!isSquare(L) && isSquare(L - 1)) {
324
+ // Slice off the first token (CLS) returns [L-1, D]
325
+ const out = new Float32Array((L - 1) * D);
326
+ let dst = 0, src = D; // skip first row
327
+ for (let i=1;i<L;i++, src += D) {
328
+ out.set(data.subarray(src, src + D), dst);
329
+ dst += D;
330
+ }
331
+ return { data: out, L: L - 1, D };
332
+ }
333
+
334
+ // Already square grid
335
+ return { data: Float32Array.from(data), L, D };
336
+ }
337
+
338
+ let extractor = null;
339
+
340
+ async function ensureExtractor() {
341
+ if (extractor) return extractor;
342
+ const modelId = modelIdInput.value.trim();
343
+ const dtype = dtypeSel.value; // "q8" or "fp32"
344
+ status(`Loading model: ${modelId} (${dtype}) ...`);
345
+ const t0 = performance.now();
346
+ extractor = await pipeline(
347
+ "image-feature-extraction",
348
+ modelId,
349
+ { dtype } // uses ONNX + wasm/webgpu under the hood
350
+ );
351
+ const t1 = performance.now();
352
+ status(`Model ready in ${(t1 - t0).toFixed(0)} ms. Awaiting images...`);
353
+ return extractor;
354
+ }
355
+
356
+ async function run() {
357
+ try {
358
+ runBtn.disabled = true;
359
+
360
+ const [{img: imgA}, {img: imgB}] = await Promise.all([
361
+ loadImageURL(fileA.files[0]),
362
+ loadImageURL(fileB.files[0]),
363
+ ]);
364
+
365
+ // Draw base images
366
+ const base = drawSideBySide(imgA, imgB);
367
+
368
+ // Load extractor (once)
369
+ await ensureExtractor();
370
+
371
+ status('Extracting per-patch features ...');
372
+ const t0 = performance.now();
373
+
374
+ // We can pass Blob URLs/HTMLImageElements; Transformers.js handles preprocessing
375
+ const out = await extractor([imgA, imgB]); // returns a Tensor of shape [2, L, D]
376
+ const dims = out.dims; // expect [B, L, D]
377
+ // Split the batch into two separate tensors (copying data slices)
378
+ if (!(dims.length === 3 && dims[0] === 2)) {
379
+ throw new Error(`Unexpected output dims: [${dims.join(',')}]`);
380
+ }
381
+ const B = dims[0], L = dims[1], D = dims[2];
382
+
383
+ // Slice batch 0 and 1
384
+ const stride = L * D;
385
+ const dataA = out.data.subarray(0, stride);
386
+ const dataB = out.data.subarray(stride, 2*stride);
387
+
388
+ // Convert to [L',D] and drop CLS if present (to get 16x16)
389
+ const Atd = tokens2D({ data: dataA, dims: [L, D] });
390
+ const Btd = tokens2D({ data: dataB, dims: [L, D] });
391
+
392
+ if (Atd.L !== GRID*GRID || Btd.L !== GRID*GRID) {
393
+ console.warn('Token count not 16x16; continuing anyway.', Atd.L, Btd.L);
394
+ }
395
+
396
+ // Normalize rows for cosine similarity
397
+ rowNormalize(Atd.data, Atd.L, Atd.D);
398
+ rowNormalize(Btd.data, Btd.L, Btd.D);
399
+
400
+ status('Computing similarity matrix ... (this is O(L^2·D))');
401
+ const S = simMatrix(Atd.data, Atd.L, Btd.data, Btd.L, Atd.D);
402
+
403
+ // Argmaxes for A→B and B→A
404
+ const A2B = argmaxPerRow(S, Atd.L, Btd.L);
405
+ const B2A = argmaxPerCol(S, Atd.L, Btd.L);
406
+
407
+ // Build match list
408
+ const thr = Number(thrSlider.value)/100.0;
409
+ const pairs = [];
410
+ for (let i=0; i<Atd.L; i++) {
411
+ const j = A2B.idx[i];
412
+ const sim = A2B.val[i];
413
+ const mutual = (B2A.idx[j] === i);
414
+ pairs.push({ i, j, sim, mutual });
415
+ }
416
+
417
+ // Redraw base (so grid toggle applies immediately)
418
+ drawSideBySide(imgA, imgB);
419
+ drawMatches(base, pairs, Btd.L, Number(kSlider.value), thr, mutualChk.checked);
420
+
421
+ const t1 = performance.now();
422
+ status(`Done. Tokens: ${Atd.L}×${Atd.D}. Max sim: ${Math.max(...pairs.map(p=>p.sim)).toFixed(3)}. Total ${(t1-t0).toFixed(0)} ms.`);
423
+ } catch (err) {
424
+ console.error(err);
425
+ status('Error: ' + (err && err.message ? err.message : String(err)));
426
+ } finally {
427
+ runBtn.disabled = false;
428
+ }
429
+ }
430
+
431
+ runBtn.onclick = run;
432
+
433
+ status('Ready. Load two images, then click “Run patch matching”.');
434
+ })();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
435
  </script>
 
 
436
  </body>
437
  </html>