background-removal / js /upload.js
sdragly's picture
Segmentation model
f952998
import { CATEGORIES, getCategoryInfo } from './models.js';
import { saveItem, savePhoto, loadPhotos, updatePhoto, deletePhoto, blobToURL } from './store.js';
import { removeBackground, autoSegment } from './ml-client.js';
import { showSegmentReview } from './segment-review.js';
import { showPolygonSelector } from './polygon-select.js';
import { normalizeColors } from './color-normalize.js';
import { tagOp } from './diag.js';
let selectedCategory = 'princess';
let imageBlob = null; // original photo
let croppedBlob = null; // after polygon crop (or same as imageBlob if skipped)
let polygon = null; // normalized polygon coords from selector
let segmentedBlob = null; // after background removal
let previewURL = null;
let croppedURL = null;
let fullDrawingMode = false;
let photoURLs = []; // object URLs for gallery thumbnails
// Cap the gallery at the N most recent photos. Each thumbnail is an
// <img> that iOS may decode on scroll-into-view; even downscaled photos
// add up, and we don't need to show every photo ever taken.
const GALLERY_MAX = 10;
// Any photo bigger than this is almost certainly a legacy full-size
// capture from before downscalePhoto existed. We rewrite them in place
// on load so subsequent sessions don't pay the cost again.
const LEGACY_MIGRATE_BYTES = 800 * 1024;
async function migrateLegacyPhotos(photos) {
for (const p of photos) {
if (!p.imageBlob || p.imageBlob.size <= LEGACY_MIGRATE_BYTES) continue;
tagOp(`migrate: ${Math.round(p.imageBlob.size / 1024)}KB photo`);
try {
const small = await downscalePhoto(p.imageBlob);
if (small && small.size < p.imageBlob.size) {
p.imageBlob = small;
await updatePhoto(p);
}
} catch (err) {
console.warn('[upload] photo migration failed', err);
}
// Yield so iOS gets a chance to reclaim the full-res decode buffer
// before we touch the next one.
await new Promise(r => setTimeout(r, 50));
}
}
export async function initUpload() {
tagOp('upload: init');
const screen = document.getElementById('upload-screen');
photoURLs.forEach(u => URL.revokeObjectURL(u));
photoURLs = [];
let photos = await loadPhotos();
await migrateLegacyPhotos(photos);
// Show only the most recent few; keep oldest photos in IDB (user can
// still delete them explicitly via the × button after we re-render).
photos = photos
.slice()
.sort((a, b) => (b.createdAt || 0) - (a.createdAt || 0))
.slice(0, GALLERY_MAX);
tagOp(`upload: ${photos.length} gallery photos`);
screen.innerHTML = `
<div class="upload-header">
<a href="#/" class="back-btn">\u2190 Back</a>
<h2>Add something new!</h2>
</div>
<div class="camera-zone">
<input type="file" accept="image/*" capture="environment" id="camera-input" class="camera-input"/>
<label for="camera-input" id="camera-label" class="camera-label">
<span class="camera-icon">\u{1F4F8}</span>
<span class="camera-text">Tap to photograph!</span>
</label>
</div>
${photos.length > 0 ? `
<div class="photo-gallery">
<p class="picker-label">Or use a previous photo</p>
<div class="photo-gallery-scroll" id="photo-gallery">
${photos.map(p => {
const url = blobToURL(p.imageBlob);
photoURLs.push(url);
return `
<div class="photo-gallery-item" data-photo-id="${p.id}">
<img src="${url}" class="photo-gallery-thumb" draggable="false"
loading="lazy" decoding="async"/>
<button class="photo-gallery-delete" data-photo-id="${p.id}">\u00D7</button>
</div>
`;
}).join('')}
</div>
</div>
` : ''}
<button id="select-area-btn" class="pill select-area-btn" style="display:none">
\u2702\uFE0F Select area
</button>
<div id="category-section" class="category-picker">
<p class="picker-label">What kind?</p>
<div class="category-grid" id="category-grid"></div>
</div>
<div class="name-input-wrap" id="name-wrap">
<label for="asset-name">Name (optional)</label>
<input id="asset-name" type="text" placeholder="Give it a name..." class="name-input"/>
</div>
<div class="upload-actions" id="upload-actions" style="display:none">
<button id="submit-btn" class="big-button magic-btn">
\u2728 Magic Time! \u2728
</button>
<button id="full-drawing-btn" class="pill">
\u2728 Full Drawing (auto-split)
</button>
</div>
<div id="upload-status"></div>
`;
buildCategoryGrid();
bindEvents(photos);
resetState();
}
function buildCategoryGrid() {
const grid = document.getElementById('category-grid');
grid.innerHTML = CATEGORIES.map(c => `
<button class="category-btn ${c.id === selectedCategory ? 'selected' : ''}" data-cat="${c.id}">
<span class="cat-emoji">${c.emoji}</span>
<span class="cat-label">${c.label}</span>
</button>
`).join('');
}
// Modern phone cameras produce ~3000x4000 JPEGs. Fully decoded that's
// ~48 MB of RGBA pixels, and iOS holds it pinned for the lifetime of the
// file reference. We only need ~1280 on the long edge for everything
// downstream (OUTPUT_DIM=768 for save, MODNet preprocesses to short-edge
// 512 for inference, polygon selector and preview render well below this).
// Re-encoding as JPEG at 85% keeps the on-disk size tiny too.
async function downscalePhoto(file, maxLongEdge = 1280) {
// We have to decode once to read dimensions. Close it immediately.
const probe = await createImageBitmap(file);
const { width: origW, height: origH } = probe;
probe.close();
if (Math.max(origW, origH) <= maxLongEdge) {
return file; // already small enough, keep original
}
const ratio = maxLongEdge / Math.max(origW, origH);
const w = Math.round(origW * ratio);
const h = Math.round(origH * ratio);
// Second decode directly at the target size — the big one is short-lived.
const bitmap = await createImageBitmap(file, {
resizeWidth: w,
resizeHeight: h,
resizeQuality: 'medium',
});
const canvas = new OffscreenCanvas(w, h);
canvas.getContext('2d').drawImage(bitmap, 0, 0);
bitmap.close();
return canvas.convertToBlob({ type: 'image/jpeg', quality: 0.85 });
}
function selectImage(blob) {
imageBlob = blob;
croppedBlob = null;
polygon = null;
segmentedBlob = null;
if (previewURL) URL.revokeObjectURL(previewURL);
if (croppedURL) URL.revokeObjectURL(croppedURL);
previewURL = URL.createObjectURL(blob);
croppedURL = null;
showPhotoPreview(previewURL);
document.getElementById('select-area-btn').style.display = '';
document.getElementById('upload-status').innerHTML = '';
document.getElementById('upload-actions').style.display = '';
updateSubmitState();
}
function bindEvents(photos) {
// Category picker
document.getElementById('category-grid').addEventListener('click', (e) => {
const btn = e.target.closest('.category-btn');
if (!btn) return;
selectedCategory = btn.dataset.cat;
document.querySelectorAll('.category-btn').forEach(b => b.classList.remove('selected'));
btn.classList.add('selected');
});
// Camera input — downscale immediately so we never hold the full-res
// camera JPEG, then save to gallery.
document.getElementById('camera-input').addEventListener('change', async (e) => {
const file = e.target.files[0];
if (!file) return;
tagOp(`capture: downscaling ${Math.round(file.size / 1024)}KB`);
const resized = await downscalePhoto(file);
tagOp(`capture: downscaled -> ${Math.round(resized.size / 1024)}KB`);
// White-balance before saving so the gallery thumb, polygon preview
// and eventual background-removal input all see the same image.
const normalized = await normalizeColors(resized);
await savePhoto(normalized);
selectImage(normalized);
});
// Photo gallery — tap to reuse, X to delete
const gallery = document.getElementById('photo-gallery');
if (gallery) {
gallery.addEventListener('click', async (e) => {
const delBtn = e.target.closest('.photo-gallery-delete');
if (delBtn) {
e.stopPropagation();
const id = delBtn.dataset.photoId;
await deletePhoto(id);
initUpload();
return;
}
const item = e.target.closest('.photo-gallery-item');
if (!item) return;
const photo = photos.find(p => p.id === item.dataset.photoId);
if (photo) {
// Downscale is idempotent — no-op for photos saved after this fix,
// catches any full-res photos left over from before.
const resized = await downscalePhoto(photo.imageBlob);
selectImage(resized);
}
});
}
// Select area
document.getElementById('select-area-btn').addEventListener('click', handleSelectArea);
// Name input
document.getElementById('asset-name').addEventListener('input', updateSubmitState);
// Submit -> segmentation
document.getElementById('submit-btn').addEventListener('click', handleMagicTime);
// Full drawing mode
document.getElementById('full-drawing-btn').addEventListener('click', handleFullDrawing);
}
function showPhotoPreview(url, caption) {
const label = document.getElementById('camera-label');
label.innerHTML = `
<img src="${url}" class="camera-preview"/>
<span class="camera-retake">${caption || 'Tap to retake'}</span>
`;
label.className = 'camera-preview-label';
}
async function handleSelectArea() {
if (!previewURL) return;
const result = await showPolygonSelector(previewURL);
if (!result) return;
croppedBlob = result.imageBlob;
polygon = result.polygon;
if (croppedURL) URL.revokeObjectURL(croppedURL);
croppedURL = URL.createObjectURL(croppedBlob);
showPhotoPreview(previewURL, '\u2702\uFE0F Area selected \u2014 tap to retake photo');
document.getElementById('select-area-btn').textContent = '\u2702\uFE0F Reselect area';
updateSubmitState();
}
function updateSubmitState() {
const btn = document.getElementById('submit-btn');
if (btn) btn.disabled = !imageBlob;
}
async function handleMagicTime() {
const name = document.getElementById('asset-name').value.trim();
if (!imageBlob) return;
const btn = document.getElementById('submit-btn');
const status = document.getElementById('upload-status');
btn.disabled = true;
document.getElementById('upload-actions').style.display = 'none';
status.innerHTML = `
<div class="processing">
<span class="crown-spin">\u{1F451}</span>
<p id="seg-message">Removing the background!</p>
<div id="seg-progress-bar" class="progress-bar"><div id="seg-progress-fill" class="progress-fill"></div></div>
<p class="progress-hint">This might take a moment.</p>
</div>
`;
// Re-normalize as a safety net for legacy gallery photos that were
// captured before normalizeColors existed. Post-feature captures are
// already normalized, so this is a fast idempotent pass.
let skipBlob = croppedBlob || imageBlob;
try {
const normalized = await normalizeColors(imageBlob);
if (!croppedBlob) skipBlob = normalized;
let segResult = await removeBackground(normalized, (info) => {
const msg = document.getElementById('seg-message');
const fill = document.getElementById('seg-progress-fill');
if (msg) msg.textContent = info.message;
if (fill && info.progress != null) fill.style.width = `${info.progress}%`;
});
if (polygon && polygon.length >= 3) {
segResult = await intersectSegmentationWithPolygon(segResult, polygon);
}
segmentedBlob = segResult;
const segURL = URL.createObjectURL(segmentedBlob);
const beforeURL = previewURL;
status.innerHTML = `
<div class="seg-preview">
<p class="seg-preview-label">How does it look?</p>
<div class="seg-comparison">
<div class="seg-compare-item">
<img src="${beforeURL}" class="seg-compare-img" alt="Before"/>
<span>Before</span>
</div>
<div class="seg-compare-item">
<img src="${segURL}" class="seg-compare-img checkerboard-bg" alt="After"/>
<span>After</span>
</div>
</div>
<div class="seg-actions">
<button id="seg-approve" class="big-button">\u2705 Looks good!</button>
<button id="seg-skip" class="pill">Save without removing</button>
<button id="seg-retry" class="pill">Retake photo</button>
</div>
</div>
`;
document.getElementById('seg-approve').addEventListener('click', () => {
tagOp('approve: clicked');
saveAsset(name, segmentedBlob);
});
document.getElementById('seg-skip').addEventListener('click', () => {
tagOp('skip: clicked');
saveAsset(name, skipBlob);
});
document.getElementById('seg-retry').addEventListener('click', resetToCamera);
} catch (err) {
console.error('Segmentation failed:', err);
status.innerHTML = `
<div class="error-state">
<p>Background removal didn't work this time.</p>
<p class="error-detail">${err.message}</p>
<div class="seg-actions">
<button id="seg-skip-err" class="big-button">Save without removing</button>
<button id="seg-retry-err" class="pill">Try again</button>
</div>
</div>
`;
document.getElementById('seg-skip-err').addEventListener('click', () => saveAsset(name, skipBlob));
document.getElementById('seg-retry-err').addEventListener('click', () => {
document.getElementById('upload-actions').style.display = '';
btn.disabled = false;
status.innerHTML = '';
});
}
}
async function handleFullDrawing() {
if (!imageBlob) return;
const btn = document.getElementById('full-drawing-btn');
const status = document.getElementById('upload-status');
document.getElementById('upload-actions').style.display = 'none';
function showProgress(message, progress) {
const msg = document.getElementById('seg-message');
const fill = document.getElementById('seg-progress-fill');
if (msg) msg.textContent = message;
if (fill && progress != null) fill.style.width = `${progress}%`;
}
status.innerHTML = `
<div class="processing">
<span class="crown-spin">\u{1F451}</span>
<p id="seg-message">Removing the background...</p>
<div id="seg-progress-bar" class="progress-bar"><div id="seg-progress-fill" class="progress-fill"></div></div>
<p class="progress-hint">This might take a moment.</p>
</div>
`;
try {
const bgRemoved = await removeBackground(imageBlob, (info) => {
showProgress(info.message, info.progress * 0.4);
});
await new Promise(r => setTimeout(r, 200));
showProgress('Finding all the parts...', 40);
const segments = await autoSegment(bgRemoved, (info) => {
const overall = 40 + (info.progress || 0) * 0.4;
showProgress(info.message, overall);
});
if (segments.length === 0) {
status.innerHTML = `
<div class="error-state">
<p>Couldn't find any distinct parts in this drawing.</p>
<div class="seg-actions">
<button id="fd-retry" class="pill">Try again</button>
</div>
</div>
`;
document.getElementById('fd-retry').addEventListener('click', () => {
document.getElementById('upload-actions').style.display = '';
status.innerHTML = '';
});
return;
}
showProgress('Ready for review!', 100);
const bgRemovedURL = URL.createObjectURL(bgRemoved);
const reviewResult = await showSegmentReview(bgRemovedURL, segments);
URL.revokeObjectURL(bgRemovedURL);
if (!reviewResult) {
document.getElementById('upload-actions').style.display = '';
status.innerHTML = '';
return;
}
status.innerHTML = `
<div class="processing">
<span class="crown-spin">\u{1F451}</span>
<p>Saving everything...</p>
</div>
`;
let savedCount = 0;
if (reviewResult.princess) {
await saveItem({
name: '',
category: 'princess',
imageBlob: reviewResult.princess.blob,
scale: 0.8,
zIndex: 2,
});
savedCount++;
}
for (const item of reviewResult.clothing) {
const catInfo = getCategoryInfo(item.category);
await saveItem({
name: '',
category: item.category,
imageBlob: item.blob,
scale: catInfo.scale || 0.5,
zIndex: catInfo.zIndex,
});
savedCount++;
}
status.innerHTML = `
<div class="success">
<p class="success-text">\u2705 Saved ${savedCount} item${savedCount !== 1 ? 's' : ''}!</p>
<a href="#/" class="big-button">Back to game</a>
<button id="fd-another" class="pill">Segment another drawing</button>
</div>
`;
document.getElementById('fd-another').addEventListener('click', resetToCamera);
} catch (err) {
console.error('Full drawing pipeline failed:', err);
status.innerHTML = `
<div class="error-state">
<p>Something went wrong during segmentation.</p>
<p class="error-detail">${err.message}</p>
<div class="seg-actions">
<button id="fd-retry-err" class="pill">Try again</button>
</div>
</div>
`;
document.getElementById('fd-retry-err').addEventListener('click', () => {
document.getElementById('upload-actions').style.display = '';
status.innerHTML = '';
});
}
}
function resetToCamera() {
resetState();
initUpload();
}
async function intersectSegmentationWithPolygon(segBlob, poly) {
const bitmap = await createImageBitmap(segBlob);
const fullW = bitmap.width;
const fullH = bitmap.height;
const pxPoly = poly.map(p => ({ x: p.x * fullW, y: p.y * fullH }));
let minX = fullW, minY = fullH, maxX = 0, maxY = 0;
for (const p of pxPoly) {
minX = Math.min(minX, p.x);
minY = Math.min(minY, p.y);
maxX = Math.max(maxX, p.x);
maxY = Math.max(maxY, p.y);
}
const pad = 4;
minX = Math.max(0, Math.floor(minX) - pad);
minY = Math.max(0, Math.floor(minY) - pad);
maxX = Math.min(fullW, Math.ceil(maxX) + pad);
maxY = Math.min(fullH, Math.ceil(maxY) + pad);
const cropW = maxX - minX;
const cropH = maxY - minY;
const canvas = new OffscreenCanvas(cropW, cropH);
const ctx = canvas.getContext('2d');
ctx.beginPath();
ctx.moveTo(pxPoly[0].x - minX, pxPoly[0].y - minY);
for (let i = 1; i < pxPoly.length; i++) {
ctx.lineTo(pxPoly[i].x - minX, pxPoly[i].y - minY);
}
ctx.closePath();
ctx.clip();
ctx.drawImage(bitmap, minX, minY, cropW, cropH, 0, 0, cropW, cropH);
return canvas.convertToBlob({ type: 'image/png' });
}
async function saveAsset(name, blob) {
tagOp('save: clicked');
const status = document.getElementById('upload-status');
status.innerHTML = `
<div class="processing">
<span class="crown-spin">\u{1F451}</span>
<p>Saving!</p>
</div>
`;
// iOS is likely still near the high-water mark from MODNet inference.
// Drop the cropped intermediate (keep imageBlob/previewURL — the
// "Select another from this photo" flow re-uses them) and give the
// GC a moment before IndexedDB clones `blob` into a transaction.
tagOp('save: freeing intermediates');
croppedBlob = null;
if (croppedURL) { URL.revokeObjectURL(croppedURL); croppedURL = null; }
await new Promise(r => setTimeout(r, 100));
try {
const catInfo = getCategoryInfo(selectedCategory);
tagOp(`save: writing to IDB (${Math.round(blob.size / 1024)}KB)`);
await saveItem({
name,
category: selectedCategory,
imageBlob: blob,
scale: catInfo.scale || 0.5,
zIndex: catInfo.zIndex,
});
tagOp('save: IDB done');
status.innerHTML = `
<div class="success">
<p class="success-text">\u2705 All done!</p>
<button id="another-from-photo" class="big-button">Select another from this photo</button>
<a href="#/" class="pill">Back to game</a>
</div>
`;
tagOp('save: success shown');
document.getElementById('another-from-photo').addEventListener('click', () => {
resetForAnotherSelection();
});
} catch (err) {
tagOp(`save: error ${err.message}`);
status.innerHTML = `
<div class="error-state">
<p>Oops! Something went wrong.</p>
<p class="error-detail">${err.message}</p>
</div>
`;
}
}
function resetForAnotherSelection() {
const status = document.getElementById('upload-status');
status.innerHTML = '';
document.getElementById('upload-actions').style.display = '';
document.getElementById('submit-btn').disabled = false;
croppedBlob = null;
polygon = null;
segmentedBlob = null;
if (croppedURL) URL.revokeObjectURL(croppedURL);
croppedURL = null;
document.getElementById('asset-name').value = '';
showPhotoPreview(previewURL);
document.getElementById('select-area-btn').style.display = '';
document.getElementById('select-area-btn').textContent = '\u2702\uFE0F Select area';
updateSubmitState();
}
function resetState() {
selectedCategory = 'princess';
imageBlob = null;
croppedBlob = null;
polygon = null;
segmentedBlob = null;
if (previewURL) URL.revokeObjectURL(previewURL);
if (croppedURL) URL.revokeObjectURL(croppedURL);
previewURL = null;
croppedURL = null;
}