lorien-danger's picture
Update index.html
768e90f verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>I-JEPA Patch Correspondence Analyzer</title>
<style>
body {
font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
margin: 0;
padding: 20px;
background: linear-gradient(135deg, #1a202c 0%, #2d3748 100%);
min-height: 100vh;
color: #e2e8f0;
}
```
.container {
max-width: 1400px;
margin: 0 auto;
background: rgba(45, 55, 72, 0.8);
backdrop-filter: blur(10px);
border-radius: 20px;
padding: 30px;
box-shadow: 0 20px 40px rgba(0, 0, 0, 0.3);
border: 1px solid #4a5568;
}
h1 {
text-align: center;
background: linear-gradient(135deg, #60a5fa 0%, #a78bfa 100%);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
background-clip: text;
margin-bottom: 10px;
font-size: 2.5em;
font-weight: 700;
}
.subtitle {
text-align: center;
color: #a0aec0;
margin-bottom: 30px;
font-size: 1.1em;
}
.upload-section {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 30px;
margin-bottom: 30px;
}
.upload-box {
border: 2px dashed #4a5568;
border-radius: 15px;
padding: 40px;
text-align: center;
transition: all 0.3s ease;
background: rgba(26, 32, 44, 0.6);
position: relative;
overflow: hidden;
}
.upload-box:hover {
border-color: #60a5fa;
background: rgba(26, 32, 44, 0.8);
}
.upload-box.has-image {
border-color: #48bb78;
background: rgba(26, 32, 44, 0.9);
}
.upload-input {
position: absolute;
top: 0;
left: 0;
width: 100%;
height: 100%;
opacity: 0;
cursor: pointer;
}
.upload-content {
pointer-events: none;
}
.upload-icon {
font-size: 3em;
margin-bottom: 15px;
color: #718096;
}
.upload-text {
font-size: 1.1em;
color: #e2e8f0;
margin-bottom: 10px;
font-weight: 600;
}
.upload-hint {
font-size: 0.9em;
color: #a0aec0;
}
.preview-image {
max-width: 100%;
max-height: 200px;
border-radius: 10px;
margin-top: 15px;
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.3);
}
.controls {
display: flex;
justify-content: center;
gap: 20px;
margin-bottom: 30px;
flex-wrap: wrap;
}
.btn {
padding: 12px 30px;
border: none;
border-radius: 12px;
cursor: pointer;
font-size: 1em;
font-weight: 600;
transition: all 0.3s ease;
text-transform: uppercase;
letter-spacing: 1px;
}
.btn-primary {
background: linear-gradient(135deg, #60a5fa 0%, #a78bfa 100%);
color: white;
}
.btn-primary:hover:not(:disabled) {
transform: translateY(-2px);
box-shadow: 0 8px 20px rgba(96, 165, 250, 0.4);
}
.btn-secondary {
background: #4a5568;
color: #e2e8f0;
}
.btn-secondary:hover {
background: #2d3748;
transform: translateY(-2px);
}
.btn:disabled {
background: #2d3748;
color: #718096;
cursor: not-allowed;
transform: none;
}
.loading {
text-align: center;
padding: 40px;
display: none;
}
.spinner {
width: 50px;
height: 50px;
border: 4px solid #2d3748;
border-top: 4px solid #60a5fa;
border-radius: 50%;
animation: spin 1s linear infinite;
margin: 0 auto 20px;
}
@keyframes spin {
0% { transform: rotate(0deg); }
100% { transform: rotate(360deg); }
}
.results {
display: none;
}
.visualization {
background: rgba(26, 32, 44, 0.6);
border-radius: 15px;
padding: 20px;
margin-bottom: 20px;
border: 1px solid #4a5568;
}
.images-container {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 30px;
margin-bottom: 30px;
}
.image-analysis {
text-align: center;
}
.image-analysis h3 {
color: #e2e8f0;
margin-bottom: 15px;
}
.canvas-container {
position: relative;
display: inline-block;
border-radius: 10px;
overflow: hidden;
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3);
}
.analysis-canvas {
display: block;
max-width: 100%;
height: auto;
cursor: crosshair;
}
.stats {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
gap: 15px;
margin-top: 20px;
}
.stat-card {
background: rgba(26, 32, 44, 0.8);
padding: 20px;
border-radius: 10px;
text-align: center;
border-left: 4px solid #60a5fa;
}
.stat-value {
font-size: 2em;
font-weight: bold;
color: #e2e8f0;
}
.stat-label {
color: #a0aec0;
margin-top: 5px;
}
.similarity-threshold {
margin: 20px 0;
text-align: center;
color: #e2e8f0;
}
.threshold-slider {
width: 300px;
margin: 0 10px;
-webkit-appearance: none;
appearance: none;
height: 8px;
background: #4a5568;
border-radius: 4px;
outline: none;
}
.threshold-slider::-webkit-slider-thumb {
-webkit-appearance: none;
appearance: none;
width: 20px;
height: 20px;
background: #60a5fa;
cursor: pointer;
border-radius: 50%;
}
.threshold-slider::-moz-range-thumb {
width: 20px;
height: 20px;
background: #60a5fa;
cursor: pointer;
border-radius: 50%;
border: none;
}
.error {
background: rgba(245, 101, 101, 0.2);
color: #fc8181;
padding: 15px;
border-radius: 10px;
margin: 20px 0;
text-align: center;
display: none;
border: 1px solid rgba(245, 101, 101, 0.3);
}
.info-panel {
background: rgba(26, 32, 44, 0.6);
border-radius: 10px;
padding: 20px;
margin-bottom: 20px;
border: 1px solid #4a5568;
}
.info-panel h4 {
color: #60a5fa;
margin-bottom: 10px;
}
.info-panel p {
color: #a0aec0;
margin: 5px 0;
font-size: 0.9em;
}
@media (max-width: 768px) {
.upload-section {
grid-template-columns: 1fr;
}
.images-container {
grid-template-columns: 1fr;
}
.controls {
flex-direction: column;
align-items: center;
}
.threshold-slider {
width: 200px;
}
}
</style>
```
</head>
<body>
<div class="container">
<h1>I-JEPA Patch Correspondence Analyzer</h1>
<p class="subtitle">Upload two images to analyze cross-patch correspondences using I-JEPA embeddings</p>
```
<div class="upload-section">
<div class="upload-box" id="upload1">
<input type="file" class="upload-input" accept="image/*" id="file1">
<div class="upload-content">
<div class="upload-icon">🖼️</div>
<div class="upload-text">Upload Image 1</div>
<div class="upload-hint">Click or drag image here</div>
</div>
</div>
<div class="upload-box" id="upload2">
<input type="file" class="upload-input" accept="image/*" id="file2">
<div class="upload-content">
<div class="upload-icon">🖼️</div>
<div class="upload-text">Upload Image 2</div>
<div class="upload-hint">Click or drag image here</div>
</div>
</div>
</div>
<div class="controls">
<button class="btn btn-primary" id="analyzeBtn" disabled>
🔍 Analyze Cross-Patch Correspondences
</button>
<button class="btn btn-secondary" id="clearBtn">
🗑️ Clear Images
</button>
</div>
<div class="error" id="errorMsg"></div>
<div class="loading" id="loading">
<div class="spinner"></div>
<p>Loading I-JEPA model and analyzing images...</p>
<p><small>Using onnx-community/ijepa_vith14_1k for optimal browser performance</small></p>
</div>
<div class="results" id="results">
<div class="info-panel">
<h4>How to Use:</h4>
<p>• Hover over any patch in either image to see its corresponding patches in the other image</p>
<p>• Adjust the similarity threshold to show more or fewer correspondences</p>
<p>• Blue outline shows the patch you're hovering over</p>
<p>• Colored patches show corresponding regions based on I-JEPA embeddings</p>
</div>
<div class="visualization">
<div class="similarity-threshold">
<label>Similarity Threshold: </label>
<input type="range" class="threshold-slider" id="thresholdSlider"
min="0" max="1" step="0.01" value="0.7">
<span id="thresholdValue">0.70</span>
</div>
<div class="images-container">
<div class="image-analysis">
<h3>Image 1</h3>
<div class="canvas-container">
<canvas id="canvas1" class="analysis-canvas"></canvas>
</div>
</div>
<div class="image-analysis">
<h3>Image 2</h3>
<div class="canvas-container">
<canvas id="canvas2" class="analysis-canvas"></canvas>
</div>
</div>
</div>
<div class="stats">
<div class="stat-card">
<div class="stat-value" id="totalPatches">0</div>
<div class="stat-label">Patches per Image</div>
</div>
<div class="stat-card">
<div class="stat-value" id="strongCorrespondences">0</div>
<div class="stat-label">Strong Correspondences</div>
</div>
<div class="stat-card">
<div class="stat-value" id="avgSimilarity">0.00</div>
<div class="stat-label">Average Cross-Similarity</div>
</div>
<div class="stat-card">
<div class="stat-value" id="maxSimilarity">0.00</div>
<div class="stat-label">Maximum Similarity</div>
</div>
</div>
</div>
</div>
</div>
<script type="module">
import { pipeline, RawImage, matmul } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.7.2";
// Configuration
const MODEL_ID = "onnx-community/ijepa_vith14_1k";
const SUPPORTED_RESOLUTIONS = [224, 336, 448];
const MAX_PIXELS = 2097152; // 2MP limit for performance
// DOM elements
const file1Input = document.getElementById('file1');
const file2Input = document.getElementById('file2');
const upload1 = document.getElementById('upload1');
const upload2 = document.getElementById('upload2');
const analyzeBtn = document.getElementById('analyzeBtn');
const clearBtn = document.getElementById('clearBtn');
const loading = document.getElementById('loading');
const results = document.getElementById('results');
const errorMsg = document.getElementById('errorMsg');
const thresholdSlider = document.getElementById('thresholdSlider');
const thresholdValue = document.getElementById('thresholdValue');
const canvas1 = document.getElementById('canvas1');
const canvas2 = document.getElementById('canvas2');
const ctx1 = canvas1.getContext('2d');
const ctx2 = canvas2.getContext('2d');
// State
let extractor = null;
let image1Data = null;
let image2Data = null;
let features1 = null;
let features2 = null;
let crossSimilarities = null;
let patchesPerRow = 0;
let originalImages = { img1: null, img2: null };
let imageCropParams = { img1: null, img2: null };
// Utility functions
function showError(message) {
errorMsg.textContent = message;
errorMsg.style.display = 'block';
setTimeout(() => {
errorMsg.style.display = 'none';
}, 5000);
}
function showLoading(show) {
loading.style.display = show ? 'block' : 'none';
analyzeBtn.disabled = show;
}
function showResults(show) {
results.style.display = show ? 'block' : 'none';
}
function updateAnalyzeButton() {
analyzeBtn.disabled = !image1Data || !image2Data || !extractor;
}
function findClosestSupportedResolution(targetDim) {
return SUPPORTED_RESOLUTIONS.reduce((prev, curr) =>
Math.abs(curr - targetDim) < Math.abs(prev - targetDim) ? curr : prev
);
}
// Initialize model
async function initializeModel() {
try {
showLoading(true);
const isWebGpuSupported = !!navigator.gpu;
const device = isWebGpuSupported ? "webgpu" : "wasm";
const dtype = isWebGpuSupported ? "q4" : "q8";
console.log(`Loading I-JEPA model with ${device.toUpperCase()}...`);
extractor = await pipeline("image-feature-extraction", MODEL_ID, { device, dtype });
// Disable automatic resizing - we'll handle it ourselves
if (extractor?.processor?.image_processor) {
extractor.processor.image_processor.do_resize = false;
}
console.log('Model loaded successfully');
updateAnalyzeButton();
showLoading(false);
return true;
} catch (error) {
console.error('Error loading model:', error);
showError('Failed to load I-JEPA model. Please refresh and try again.');
showLoading(false);
return false;
}
}
// Process image to canvas
function processImageToCanvas(file, canvas, ctx, imageKey) {
return new Promise((resolve, reject) => {
const img = new Image();
img.onload = () => {
const { naturalWidth: w, naturalHeight: h } = img;
// Crop to square from center
const cropSize = Math.min(w, h);
const sx = (w - cropSize) / 2;
const sy = (h - cropSize) / 2;
imageCropParams[imageKey] = { sx, sy, sWidth: cropSize, sHeight: cropSize };
// Find optimal resolution
let scaledCropSize = cropSize;
if (scaledCropSize * scaledCropSize > MAX_PIXELS) {
scaledCropSize = Math.sqrt(MAX_PIXELS);
}
const chosenResolution = findClosestSupportedResolution(scaledCropSize);
// Set canvas size and draw
canvas.width = chosenResolution;
canvas.height = chosenResolution;
ctx.drawImage(
img,
sx, sy, cropSize, cropSize,
0, 0, chosenResolution, chosenResolution
);
originalImages[imageKey] = img;
resolve(chosenResolution);
};
img.onerror = reject;
img.src = URL.createObjectURL(file);
});
}
// File upload handling
function handleFileUpload(fileInput, uploadBox, imageKey, canvasId) {
const file = fileInput.files[0];
if (!file) return;
const canvas = document.getElementById(canvasId);
const ctx = canvas.getContext('2d');
processImageToCanvas(file, canvas, ctx, imageKey)
.then(() => {
// Store image data
if (imageKey === 'img1') {
image1Data = file;
} else {
image2Data = file;
}
// Update UI
uploadBox.classList.add('has-image');
const content = uploadBox.querySelector('.upload-content');
content.innerHTML = `
<img src="${URL.createObjectURL(file)}" class="preview-image" alt="Preview">
<div style="margin-top: 10px; color: #48bb78; font-weight: 600;">✓ Image loaded</div>
`;
updateAnalyzeButton();
})
.catch(error => {
console.error('Error processing image:', error);
showError('Failed to process image. Please try a different file.');
});
}
// Extract features from canvas
async function extractFeatures(canvas) {
try {
const imageData = await RawImage.fromCanvas(canvas);
const features = await extractor(imageData, { pooling: "none" });
// Remove CLS token (first token)
const totalTokens = features.dims[1];
const nPatches = totalTokens - 1;
const patchFeatures = features.slice(null, [1, nPatches]);
// Calculate patches per row
const patchesPerRowCalc = Math.round(Math.sqrt(nPatches));
if (patchesPerRowCalc * patchesPerRowCalc !== nPatches) {
console.warn("Patch count is not a perfect square:", nPatches);
}
return { features: patchFeatures, patchesPerRow: patchesPerRowCalc };
} catch (error) {
console.error('Error extracting features:', error);
throw error;
}
}
// Calculate cross-similarities between two images
async function calculateCrossSimilarities(features1, features2) {
try {
// Normalize features
const normalized1 = features1.normalize(2, -1);
const normalized2 = features2.normalize(2, -1);
// Calculate cross-similarity matrix: img1_patches x img2_patches
const similarities = await matmul(normalized1, normalized2.permute(0, 2, 1));
return (await similarities.tolist())[0];
} catch (error) {
console.error('Error calculating similarities:', error);
throw error;
}
}
// Redraw original image on canvas
function redrawOriginalImage(canvas, ctx, imageKey) {
if (!originalImages[imageKey] || !imageCropParams[imageKey]) return;
const img = originalImages[imageKey];
const params = imageCropParams[imageKey];
ctx.drawImage(
img,
params.sx, params.sy, params.sWidth, params.sHeight,
0, 0, canvas.width, canvas.height
);
}
// Color mapping for similarity visualization
const INFERNO_COLORMAP = [
[0.0, [0,0,4]], [0.1, [39,12,69]], [0.2, [84,15,104]], [0.3, [128,31,103]], [0.4, [170,48,88]],
[0.5, [209,70,68]], [0.6, [240,97,47]], [0.7, [253,138,28]], [0.8, [252,185,26]], [0.9, [240,231,56]], [1.0, [252,255,160]]
];
function getInfernoColor(t) {
for (let i = 1; i < INFERNO_COLORMAP.length; i++) {
const [tp, cp] = INFERNO_COLORMAP[i-1];
const [tc, cc] = INFERNO_COLORMAP[i];
if (t <= tc) {
const a = (t - tp) / (tc - tp);
const r = cp[0] + a * (cc[0] - cp[0]);
const g = cp[1] + a * (cc[1] - cp[1]);
const b = cp[2] + a * (cc[2] - cp[2]);
return `rgb(${Math.round(r)}, ${Math.round(g)}, ${Math.round(b)})`;
}
}
const last = INFERNO_COLORMAP[INFERNO_COLORMAP.length-1][1];
return `rgb(${last.join(",")})`;
}
// Draw highlights on canvas
function drawHighlights(canvas, ctx, imageKey, queryPatchIndex, isQueryImage) {
if (!crossSimilarities || !patchesPerRow) return;
const patchSize = canvas.width / patchesPerRow;
const threshold = parseFloat(thresholdSlider.value);
// Redraw original image
redrawOriginalImage(canvas, ctx, imageKey);
if (isQueryImage) {
// Draw query patch highlight
const qy = Math.floor(queryPatchIndex / patchesPerRow);
const qx = queryPatchIndex % patchesPerRow;
ctx.strokeStyle = "#60a5fa";
ctx.lineWidth = 3;
ctx.strokeRect(qx * patchSize, qy * patchSize, patchSize, patchSize);
} else {
// Draw corresponding patches
const similarities = crossSimilarities[queryPatchIndex] || [];
const maxSim = Math.max(...similarities);
const minSim = Math.min(...similarities);
const range = maxSim - minSim;
for (let i = 0; i < similarities.length; i++) {
const sim = similarities[i];
if (sim >= threshold) {
const py = Math.floor(i / patchesPerRow);
const px = i % patchesPerRow;
// Normalize similarity for color mapping
const normalizedSim = range > 1e-4 ? (sim - minSim) / range : 1;
const alpha = Math.pow(normalizedSim, 2) * 0.8;
ctx.fillStyle = `rgba(96, 165, 250, ${alpha})`;
ctx.fillRect(px * patchSize, py * patchSize, patchSize, patchSize);
}
}
}
}
// Clear highlights
function clearHighlights() {
redrawOriginalImage(canvas1, ctx1, 'img1');
redrawOriginalImage(canvas2, ctx2, 'img2');
}
// Mouse event handlers
function handleMouseMove(canvas, imageKey, isImage1) {
return function(event) {
if (!crossSimilarities || !patchesPerRow) return;
const rect = canvas.getBoundingClientRect();
const scaleX = canvas.width / rect.width;
const scaleY = canvas.height / rect.height;
const x = (event.clientX - rect.left) * scaleX;
const y = (event.clientY - rect.top) * scaleY;
if (x < 0 || x >= canvas.width || y < 0 || y >= canvas.height) return;
const patchSize = canvas.width / patchesPerRow;
const patchX = Math.floor(x / patchSize);
const patchY = Math.floor(y / patchSize);
const patchIndex = patchY * patchesPerRow + patchX;
if (patchIndex < 0 || patchIndex >= patchesPerRow * patchesPerRow) return;
// Draw highlights on both canvases
drawHighlights(canvas1, ctx1, 'img1', patchIndex, isImage1);
drawHighlights(canvas2, ctx2, 'img2', patchIndex, !isImage1);
};
}
// Update statistics
function updateStatistics() {
if (!crossSimilarities) return;
const threshold = parseFloat(thresholdSlider.value);
const totalPatches = patchesPerRow * patchesPerRow;
let strongCorrespondences = 0;
let totalSimilarity = 0;
let maxSim = 0;
let count = 0;
for (let i = 0; i < crossSimilarities.length; i++) {
for (let j = 0; j < crossSimilarities[i].length; j++) {
const sim = crossSimilarities[i][j];
totalSimilarity += sim;
maxSim = Math.max(maxSim, sim);
count++;
if (sim >= threshold) {
strongCorrespondences++;
}
}
}
document.getElementById('totalPatches').textContent = totalPatches;
document.getElementById('strongCorrespondences').textContent = strongCorrespondences;
document.getElementById('avgSimilarity').textContent = (totalSimilarity / count).toFixed(3);
document.getElementById('maxSimilarity').textContent = maxSim.toFixed(3);
}
// Event listeners
file1Input.addEventListener('change', () => handleFileUpload(file1Input, upload1, 'img1', 'canvas1'));
file2Input.addEventListener('change', () => handleFileUpload(file2Input, upload2, 'img2', 'canvas2'));
clearBtn.addEventListener('click', () => {
// Reset all data
image1Data = null;
image2Data = null;
features1 = null;
features2 = null;
crossSimilarities = null;
patchesPerRow = 0;
originalImages = { img1: null, img2: null };
imageCropParams = { img1: null, img2: null };
// Reset UI
file1Input.value = '';
file2Input.value = '';
upload1.classList.remove('has-image');
upload2.classList.remove('has-image');
upload1.querySelector('.upload-content').innerHTML = `
<div class="upload-icon">🖼️</div>
<div class="upload-text">Upload Image 1</div>
<div class="upload-hint">Click or drag image here</div>
`;
upload2.querySelector('.upload-content').innerHTML = `
<div class="upload-icon">🖼️</div>
<div class="upload-text">Upload Image 2</div>
<div class="upload-hint">Click or drag image here</div>
`;
// Clear canvases
ctx1.clearRect(0, 0, canvas1.width, canvas1.height);
ctx2.clearRect(0, 0, canvas2.width, canvas2.height);
showResults(false);
updateAnalyzeButton();
});
thresholdSlider.addEventListener('input', () => {
const threshold = parseFloat(thresholdSlider.value);
thresholdValue.textContent = threshold.toFixed(2);
updateStatistics();
});
// Main analysis function
analyzeBtn.addEventListener('click', async () => {
if (!image1Data || !image2Data || !extractor) return;
showLoading(true);
showResults(false);
try {
console.log('Extracting features from both images...');
// Extract features from both images
const result1 = await extractFeatures(canvas1);
const result2 = await extractFeatures(canvas2);
features1 = result1.features;
features2 = result2.features;
patchesPerRow = result1.patchesPerRow;
console.log(`Patch grid: ${patchesPerRow}x${patchesPerRow} patches per image`);
// Calculate cross-similarities
console.log('Calculating cross-similarities...');
crossSimilarities = await calculateCrossSimilarities(features1, features2);
// Set up mouse event listeners
canvas1.addEventListener('mousemove', handleMouseMove(canvas1, 'img1', true));
canvas1.addEventListener('mouseleave', clearHighlights);
canvas2.addEventListener('mousemove', handleMouseMove(canvas2, 'img2', false));
canvas2.addEventListener('mouseleave', clearHighlights);
// Update statistics
updateStatistics();
// Show results
showResults(true);
showLoading(false);
console.log('Analysis complete!');
} catch (error) {
console.error('Analysis error:', error);
showError('Failed to analyze images. Please try again with different images.');
showLoading(false);
}
});
// Drag and drop support
['upload1', 'upload2'].forEach((id, index) => {
const uploadBox = document.getElementById(id);
const fileInput = document.getElementById(`file${index + 1}`);
uploadBox.addEventListener('dragover', (e) => {
e.preventDefault();
uploadBox.style.borderColor = '#60a5fa';
});
uploadBox.addEventListener('dragleave', (e) => {
e.preventDefault();
uploadBox.style.borderColor = '#4a5568';
});
uploadBox.addEventListener('drop', (e) => {
e.preventDefault();
uploadBox.style.borderColor = '#4a5568';
const files = e.dataTransfer.files;
if (files.length > 0 && files[0].type.startsWith('image/')) {
fileInput.files = files;
const imageKey = index === 0 ? 'img1' : 'img2';
const canvasId = index === 0 ? 'canvas1' : 'canvas2';
handleFileUpload(fileInput, uploadBox, imageKey, canvasId);
}
});
});
// Initialize on load
window.addEventListener('load', () => {
console.log('Initializing I-JEPA Patch Correspondence Analyzer...');
initializeModel();
});
</script>
```
</body>
</html>