Spaces:

Lightricks
/

ltmarx

Running

App Files Files Community

harelcain commited on Feb 25

Commit

53bf5b7

verified ·

1 Parent(s): 1671d28

Upload 13 files

Browse files

Files changed (2) hide show

README.md +75 -26
core/detector.ts +339 -23

README.md CHANGED Viewed

@@ -10,7 +10,9 @@ pinned: false
 # LTMarX — Video Watermarking
-Imperceptible 32-bit watermarking for video. Embeds a payload into the luminance channel using DWT/DCT transform-domain quantization (DM-QIM) with BCH error correction. Survives re-encoding, rescaling, brightness/contrast/saturation changes.
 All processing runs in the browser — no server round-trips needed.
@@ -44,13 +46,13 @@ core/           Pure TypeScript watermark engine (isomorphic, zero platform deps
 ├── dwt.ts          Haar DWT (forward/inverse, multi-level)
 ├── dct.ts          8×8 DCT with zigzag scan
 ├── dmqim.ts        Dither-Modulated QIM (embed/extract with soft decisions)
-├── bch.ts          BCH codec (GF(2^m), Berlekamp-Massey decoding)
-├── crc.ts          CRC-4 / CRC-8 / CRC-16
-├── tiling.ts       Periodic tile layout for redundant embedding
-├── masking.ts      Perceptual masking (variance-adaptive delta)
 ├── keygen.ts       Seeded PRNG for dithers and permutations
 ├── embedder.ts     Y-plane → watermarked Y-plane
-├── detector.ts     Y-plane → payload + confidence
 ├── presets.ts      Named configurations (light → fortress)
 └── types.ts        Shared types
@@ -58,15 +60,16 @@ web/            Frontend (Vite + React + Tailwind)
 ├── src/
 │   ├── App.tsx
 │   ├── components/
-│   │   ├── EmbedPanel.tsx          Upload, configure, embed
 │   │   ├── DetectPanel.tsx         Upload, detect, display results
-│   │   ├── ApiDocs.tsx             Inline API reference
 │   │   ├── ComparisonView.tsx      Side-by-side / difference viewer
-│   │   ├── RobustnessTest.tsx      Automated attack testing
 │   │   ├── StrengthSlider.tsx      Preset selector with snap points
-│   │   └── ResultCard.tsx          Detection result display
 │   ├── lib/
-│   │   └── video-io.ts            Frame extraction, encoding, attack utilities
 │   └── workers/
 │       └── watermark.worker.ts
 └── index.html
@@ -86,33 +89,70 @@ tests/          Vitest test suite
 ### Embedding
 ```
-Y plane → 2-level Haar DWT → HL subband → tile grid →
-  per tile: 8×8 DCT blocks → select mid-freq coefficients →
-  DM-QIM embed coded bits → inverse DCT → inverse DWT → modified Y plane
 ```
 ### Payload Encoding
 ```
-32-bit payload → CRC append → BCH encode → keyed interleave → map to coefficients
 ```
 ### Detection
 ```
-Y plane → DWT → HL subband → tile grid →
   per tile: DCT → DM-QIM soft extract →
-  soft-combine across tiles and frames → BCH decode → CRC verify → payload
 ```
 ## Presets
-| Preset | Delta | Tile Period | BCH Code | Masking | Use Case |
-|--------|-------|-------------|----------|---------|----------|
-| **Light** | 50 | 256px | (63,36,5) | No | Near-invisible, mild compression |
-| **Moderate** | 80 | 232px | (63,36,5) | Yes | Balanced with perceptual masking |
-| **Strong** | 110 | 208px | (63,36,5) | Yes | More frequencies, handles rescaling |
-| **Fortress** | 150 | 192px | (63,36,5) | Yes | Maximum robustness |
 ## API
@@ -124,7 +164,7 @@ import { getPreset } from './core/presets';
 const config = getPreset('moderate');
 const result = embedWatermark(yPlane, width, height, payload, key, config);
-// result.yPlane: watermarked Y plane
 // result.psnr: quality metric (dB)
 ```
@@ -140,6 +180,15 @@ const result = detectWatermarkMultiFrame(yPlanes, width, height, key, config);
 // result.confidence: 0–1
 ```
 ### Auto-Detection (tries all presets)
 ```typescript
@@ -164,8 +213,8 @@ npm test              # Run all tests
 npm run test:watch    # Watch mode
 ```
-Tests cover: DWT round-trip, DCT round-trip, DM-QIM embed/extract, BCH encode/decode with error correction, CRC append/verify, full embed→detect pipeline across presets, false positive rejection, wrong key rejection.
 ## Browser Encoding
-The web UI encodes watermarked video using ffmpeg.wasm (x264 in WebAssembly). To avoid memory pressure, frames are encoded in chunks of 100 and concatenated at the end. Peak memory stays at ~chunk size rather than scaling with video length.

 # LTMarX — Video Watermarking
+Imperceptible 32-bit watermarking for video. Embeds a payload into the luminance channel using DWT/DCT transform-domain quantization (DM-QIM) with BCH error correction.
+Survives re-encoding, rescaling, brightness/contrast/saturation adjustments, and cropping up to ~20%.
 All processing runs in the browser — no server round-trips needed.
 ├── dwt.ts          Haar DWT (forward/inverse, multi-level)
 ├── dct.ts          8×8 DCT with zigzag scan
 ├── dmqim.ts        Dither-Modulated QIM (embed/extract with soft decisions)
+├── bch.ts          BCH(63,36,5) over GF(2^6), Berlekamp-Massey decoding
+├── crc.ts          CRC-4 integrity check
+├── tiling.ts       Periodic tile layout + autocorrelation-based grid recovery
+├── masking.ts      Perceptual masking (variance-adaptive quantization step)
 ├── keygen.ts       Seeded PRNG for dithers and permutations
 ├── embedder.ts     Y-plane → watermarked Y-plane
+├── detector.ts     Y-plane(s) → payload + confidence
 ├── presets.ts      Named configurations (light → fortress)
 └── types.ts        Shared types
 ├── src/
 │   ├── App.tsx
 │   ├── components/
+│   │   ├── EmbedPanel.tsx          Upload, configure, embed, download
 │   │   ├── DetectPanel.tsx         Upload, detect, display results
 │   │   ├── ComparisonView.tsx      Side-by-side / difference viewer
+│   │   ├── RobustnessTest.tsx      Automated attack battery (re-encode, crop, etc.)
+│   │   ├── HowItWorks.tsx          Interactive explainer with D3 visualizations
 │   │   ├── StrengthSlider.tsx      Preset selector with snap points
+│   │   ├── ResultCard.tsx          Detection result display
+│   │   └── ApiDocs.tsx             Inline API reference
 │   ├── lib/
+│   │   └── video-io.ts            Frame extraction, encoding, attack simulations
 │   └── workers/
 │       └── watermark.worker.ts
 └── index.html
 ### Embedding
 ```
+Y plane → 2-level Haar DWT → HL subband → periodic tile grid →
+  per tile: 8×8 DCT blocks → select mid-freq zigzag coefficients →
+  DM-QIM embed coded bits (with per-block dithering and perceptual masking) →
+  inverse DCT → inverse DWT → modified Y plane
 ```
 ### Payload Encoding
 ```
+32-bit payload → CRC-4 append → BCH(63,36,5) encode → keyed interleave →
+  map to DCT coefficients across tiles (with wraparound redundancy)
 ```
 ### Detection
 ```
+Y plane(s) → DWT → HL subband → tile grid →
   per tile: DCT → DM-QIM soft extract →
+  soft-combine across tiles and frames → keyed de-interleave →
+  BCH soft decode → CRC verify → payload
 ```
+### Crop-Resilient Detection
+When the frame has been cropped, the detector doesn't know the original tile grid alignment. It searches over three alignment parameters:
+1. **DWT padding** (0–3 per axis) — the crop may break DWT pixel pairing
+2. **DCT block shift** (0–7 per axis) — the crop may misalign 8×8 block boundaries within the subband
+3. **Tile dither offset** (0–N per axis) — the crop shifts which tile-phase position each block maps to
+The total search space is 16 × 64 × N² candidates (~37K for the strong preset). To make this fast:
+- DCT coefficients are precomputed once per (pad, shift) combination using only tile 0
+- Dither offsets are swept cheaply using just DM-QIM re-extraction on cached coefficients
+- Candidates are ranked by signal magnitude (sum of squared averaged soft bits)
+- Only the top 50 candidates are fully decoded with all frames
+This runs in ~1 second for 32 frames on a 512×512 video.
 ## Presets
+| Preset | Delta | Tile Period | Zigzag Positions | Masking | Use Case |
+|--------|-------|-------------|------------------|---------|----------|
+| **Light** | 50 | 256px | 3–14 (mid-freq) | No | Near-invisible, mild compression |
+| **Moderate** | 62 | 240px | 3–14 (mid-freq) | Yes | Balanced with perceptual masking |
+| **Strong** | 110 | 208px | 1–20 (low+mid) | Yes | Heavy re-encoding, rescaling, cropping |
+| **Fortress** | 150 | 192px | 1–20 (low+mid) | Yes | Maximum robustness |
+All presets use BCH(63,36,5) with CRC-4 and 2-level DWT.
+Higher delta = stronger embedding = more visible artifacts but better survival under attacks. The "strong" and "fortress" presets use more DCT coefficients (zigzag positions 1–20 vs 3–14) for additional redundancy.
+## Robustness
+The web UI includes an automated robustness test battery. Each test applies an attack to the watermarked video and attempts detection:
+| Attack | Variants Tested |
+|--------|----------------|
+| **Re-encode** | CRF 23, 28, 33, 38, 43 |
+| **Downscale** | 25%, 50%, 75%, 90% |
+| **Brightness** | -0.2, +0.2, +0.4 |
+| **Contrast** | 0.5×, 1.5×, 2.0× |
+| **Saturation** | 0×, 0.5×, 2.0× |
+| **Crop** | 5%, 10%, 15%, 20% (per side) |
 ## API
 const config = getPreset('moderate');
 const result = embedWatermark(yPlane, width, height, payload, key, config);
+// result.yPlane: watermarked Y plane (Uint8Array)
 // result.psnr: quality metric (dB)
 ```
 // result.confidence: 0–1
 ```
+### Crop-Resilient Detection
+```typescript
+const result = detectWatermarkMultiFrame(
+  yPlanes, width, height, key, config,
+  { cropResilient: true }
+);
+```
 ### Auto-Detection (tries all presets)
 ```typescript
 npm run test:watch    # Watch mode
 ```
+25 tests across 6 files covering: DWT round-trip, DCT round-trip, DM-QIM embed/extract, BCH encode/decode with error correction, CRC append/verify, full embed-detect pipeline across presets, false positive rejection (wrong key, unwatermarked frame), crop-resilient detection (arbitrary offset and ~20% crop).
 ## Browser Encoding
+The web UI encodes watermarked video using ffmpeg.wasm (x264 in WebAssembly). To avoid memory pressure, frames are encoded in chunks of 100 and concatenated at the end. Peak memory stays proportional to chunk size rather than scaling with video length.

core/detector.ts CHANGED Viewed

@@ -5,13 +5,13 @@
  */
 import type { WatermarkConfig, DetectionResult, Buffer2D } from './types.js';
-import { yPlaneToBuffer, dwtForward, extractSubband } from './dwt.js';
 import { dctForward8x8, extractBlock, ZIGZAG_ORDER } from './dct.js';
 import { dmqimExtractSoft } from './dmqim.js';
 import { crcVerify } from './crc.js';
 import { BchCodec } from './bch.js';
 import { generateDithers, generatePermutation } from './keygen.js';
-import { computeTileGrid, getTileOrigin, getTileBlocks, type TileGrid } from './tiling.js';
 import { blockAcEnergy, computeMaskingFactors } from './masking.js';
 import { bitsToPayload } from './embedder.js';
 import { PRESETS } from './presets.js';
@@ -57,7 +57,10 @@ function extractSoftBitsFromSubband(
   hlSubband: Buffer2D,
   tileGrid: TileGrid,
   key: string,
-  config: WatermarkConfig
 ): { tileSoftBits: Float64Array[]; totalTiles: number } | null {
   if (tileGrid.totalTiles === 0) return null;
@@ -75,13 +78,18 @@ function extractSoftBitsFromSubband(
     zigCoeffIdx[z] = r * 8 + c;
   }
   for (let tileIdx = 0; tileIdx < tileGrid.totalTiles; tileIdx++) {
     let ditherIdx = 0; // Reset per tile — matches embedder
     const origin = getTileOrigin(tileGrid, tileIdx);
     const blocks = getTileBlocks(origin.x, origin.y, tileGrid.tilePeriod, hlSubband.width, hlSubband.height);
     const softBits = new Float64Array(codedLength);
-    const bitWeights = new Float64Array(codedLength);
     let maskingFactors: Float64Array | null = null;
     if (config.perceptualMasking && blocks.length > 0) {
@@ -103,27 +111,44 @@ function extractSoftBitsFromSubband(
       const maskFactor = maskingFactors ? maskingFactors[bi] : 1.0;
       const effectiveDelta = config.delta * maskFactor;
-      // Weight by masking factor: blocks with tiny effective delta produce
-      // unreliable soft bits (noise magnitude scales as 1/delta), so their
-      // contribution should be proportionally smaller.
-      const weight = maskFactor;
       for (let z = 0; z < zigCoeffIdx.length; z++) {
         if (bitIdx >= codedLength) bitIdx = 0;
         const coeffIdx = zigCoeffIdx[z];
-        const dither = dithers[ditherIdx++];
         const soft = dmqimExtractSoft(blockBuf[coeffIdx], effectiveDelta, dither);
-        softBits[bitIdx] += soft * weight;
-        bitWeights[bitIdx] += weight;
         bitIdx++;
       }
     }
     for (let i = 0; i < codedLength; i++) {
-      if (bitWeights[i] > 0) softBits[i] /= bitWeights[i];
     }
     tileSoftBits.push(softBits);
@@ -132,10 +157,22 @@ function extractSoftBitsFromSubband(
   return { tileSoftBits, totalTiles: tileGrid.totalTiles };
 }
 /**
  * Detect watermark from multiple Y planes.
  * Extracts soft decisions from each frame independently, then combines
  * across frames and tiles (never averages raw pixels).
  */
 export function detectWatermarkMultiFrame(
   yPlanes: Uint8Array[],
@@ -143,6 +180,7 @@ export function detectWatermarkMultiFrame(
   height: number,
   key: string,
   config: WatermarkConfig,
 ): DetectionResult {
   const noResult: DetectionResult = {
     detected: false,
@@ -158,15 +196,23 @@ export function detectWatermarkMultiFrame(
   const bch = new BchCodec(config.bch);
   const perm = generatePermutation(key, codedLength);
-  // Helper: try to detect with given frames and explicit tile grid
-  const tryWithGrid = (
     frames: FrameDWT[],
-    makeGrid: (hlSubband: Buffer2D, stp: number) => TileGrid,
   ): DetectionResult | null => {
     const softBits: Float64Array[] = [];
     for (const { hlSubband, subbandTilePeriod } of frames) {
-      const tileGrid = makeGrid(hlSubband, subbandTilePeriod);
-      const frameResult = extractSoftBitsFromSubband(hlSubband, tileGrid, key, config);
       if (frameResult) softBits.push(...frameResult.tileSoftBits);
     }
     if (softBits.length === 0) return null;
@@ -175,11 +221,279 @@ export function detectWatermarkMultiFrame(
   // Fast path: zero-phase grid (uncropped frames)
   const frameDWTs = yPlanes.map((yp) => computeFrameDWT(yp, width, height, config));
-  const fast = tryWithGrid(frameDWTs, (hl, stp) =>
-    computeTileGrid(hl.width, hl.height, stp));
   if (fast) return fast;
-  return noResult;
 }
 /**
@@ -247,7 +561,7 @@ function decodeFromSoftBits(
 /** Minimum confidence to report a detection (low threshold is fine —
  *  the statistical model already ensures noise scores near 0%) */
-const MIN_CONFIDENCE = 0.10;
 /**
  * Try to decode soft bits into a payload
@@ -312,8 +626,9 @@ export function autoDetect(
   width: number,
   height: number,
   key: string,
 ): AutoDetectResult {
-  return autoDetectMultiFrame([yPlane], width, height, key);
 }
 /**
@@ -324,6 +639,7 @@ export function autoDetectMultiFrame(
   width: number,
   height: number,
   key: string,
 ): AutoDetectResult {
   let best: AutoDetectResult = {
     detected: false,
@@ -335,7 +651,7 @@ export function autoDetectMultiFrame(
   };
   for (const [name, config] of Object.entries(PRESETS)) {
-    const result = detectWatermarkMultiFrame(yPlanes, width, height, key, config);
     if (result.detected && result.confidence > best.confidence) {
       best = { ...result, presetUsed: name as PresetName };
     }

  */
 import type { WatermarkConfig, DetectionResult, Buffer2D } from './types.js';
+import { createBuffer2D, yPlaneToBuffer, dwtForward, extractSubband } from './dwt.js';
 import { dctForward8x8, extractBlock, ZIGZAG_ORDER } from './dct.js';
 import { dmqimExtractSoft } from './dmqim.js';
 import { crcVerify } from './crc.js';
 import { BchCodec } from './bch.js';
 import { generateDithers, generatePermutation } from './keygen.js';
+import { computeTileGrid, recoverTileGrid, getTileOrigin, getTileBlocks, type TileGrid } from './tiling.js';
 import { blockAcEnergy, computeMaskingFactors } from './masking.js';
 import { bitsToPayload } from './embedder.js';
 import { PRESETS } from './presets.js';
   hlSubband: Buffer2D,
   tileGrid: TileGrid,
   key: string,
+  config: WatermarkConfig,
+  ditherOffX: number = 0,
+  ditherOffY: number = 0,
+  blocksPerSide: number = 0,
 ): { tileSoftBits: Float64Array[]; totalTiles: number } | null {
   if (tileGrid.totalTiles === 0) return null;
     zigCoeffIdx[z] = r * 8 + c;
   }
+  const hasDitherOffset = ditherOffX !== 0 || ditherOffY !== 0;
+  const numZig = config.zigzagPositions.length;
   for (let tileIdx = 0; tileIdx < tileGrid.totalTiles; tileIdx++) {
     let ditherIdx = 0; // Reset per tile — matches embedder
     const origin = getTileOrigin(tileGrid, tileIdx);
     const blocks = getTileBlocks(origin.x, origin.y, tileGrid.tilePeriod, hlSubband.width, hlSubband.height);
+    const tileOriginBlockRow = Math.floor(origin.y / 8);
+    const tileOriginBlockCol = Math.floor(origin.x / 8);
     const softBits = new Float64Array(codedLength);
+    const bitCounts = new Float64Array(codedLength);
     let maskingFactors: Float64Array | null = null;
     if (config.perceptualMasking && blocks.length > 0) {
       const maskFactor = maskingFactors ? maskingFactors[bi] : 1.0;
       const effectiveDelta = config.delta * maskFactor;
+      // Compute dither index and bit index: when dither offset is active,
+      // remap block position to find the embedder's dither and bit assignment
+      // for this spatial location within the periodic tile structure.
+      let blockDitherBase: number;
+      if (hasDitherOffset && blocksPerSide > 0) {
+        const relBr = row - tileOriginBlockRow;
+        const relBc = col - tileOriginBlockCol;
+        const origR = (relBr + ditherOffY) % blocksPerSide;
+        const origC = (relBc + ditherOffX) % blocksPerSide;
+        blockDitherBase = (origR * blocksPerSide + origC) * numZig;
+        // Remap bitIdx to match the embedder's bit assignment at the original position
+        bitIdx = ((origR * blocksPerSide + origC) * numZig) % codedLength;
+      } else {
+        blockDitherBase = ditherIdx;
+      }
       for (let z = 0; z < zigCoeffIdx.length; z++) {
         if (bitIdx >= codedLength) bitIdx = 0;
         const coeffIdx = zigCoeffIdx[z];
+        const dither = hasDitherOffset ? dithers[blockDitherBase + z] : dithers[ditherIdx++];
         const soft = dmqimExtractSoft(blockBuf[coeffIdx], effectiveDelta, dither);
+        softBits[bitIdx] += soft;
+        bitCounts[bitIdx]++;
         bitIdx++;
       }
+      if (!hasDitherOffset) {
+        // ditherIdx already incremented in the loop above
+      } else {
+        ditherIdx += numZig; // keep in sync
+      }
     }
     for (let i = 0; i < codedLength; i++) {
+      if (bitCounts[i] > 0) softBits[i] /= bitCounts[i];
     }
     tileSoftBits.push(softBits);
   return { tileSoftBits, totalTiles: tileGrid.totalTiles };
 }
+/** Options for crop-resilient detection */
+export interface DetectOptions {
+  /** Enable grid-phase search for cropped content */
+  cropResilient?: boolean;
+}
 /**
  * Detect watermark from multiple Y planes.
  * Extracts soft decisions from each frame independently, then combines
  * across frames and tiles (never averages raw pixels).
+ *
+ * When cropResilient is true, searches over:
+ *   - 16 DWT-pad combinations (0..3 × 0..3 for dwtLevels=2)
+ *   - N×N tile-phase offsets (block-aligned, N = tilePeriod/8)
+ * Signal magnitude from one frame ranks candidates cheaply, then the
+ * top candidates are decoded using all frames.
  */
 export function detectWatermarkMultiFrame(
   yPlanes: Uint8Array[],
   height: number,
   key: string,
   config: WatermarkConfig,
+  options?: DetectOptions,
 ): DetectionResult {
   const noResult: DetectionResult = {
     detected: false,
   const bch = new BchCodec(config.bch);
   const perm = generatePermutation(key, codedLength);
+  // Helper: try to detect with given frames and explicit tile grid.
+  // makeSubbandAndGrid can optionally transform the subband (e.g. shift it).
+  interface SubbandAndGrid {
+    subband: Buffer2D;
+    grid: TileGrid;
+    ditherOffX?: number;
+    ditherOffY?: number;
+    blocksPerSide?: number;
+  }
+  const tryDetect = (
     frames: FrameDWT[],
+    makeSubbandAndGrid: (hlSubband: Buffer2D, stp: number) => SubbandAndGrid,
   ): DetectionResult | null => {
     const softBits: Float64Array[] = [];
     for (const { hlSubband, subbandTilePeriod } of frames) {
+      const { subband, grid, ditherOffX, ditherOffY, blocksPerSide: bps } = makeSubbandAndGrid(hlSubband, subbandTilePeriod);
+      const frameResult = extractSoftBitsFromSubband(subband, grid, key, config, ditherOffX ?? 0, ditherOffY ?? 0, bps ?? 0);
       if (frameResult) softBits.push(...frameResult.tileSoftBits);
     }
     if (softBits.length === 0) return null;
   // Fast path: zero-phase grid (uncropped frames)
   const frameDWTs = yPlanes.map((yp) => computeFrameDWT(yp, width, height, config));
+  const fast = tryDetect(frameDWTs, (hl, stp) => ({
+    subband: hl,
+    grid: computeTileGrid(hl.width, hl.height, stp),
+  }));
   if (fast) return fast;
+  if (!options?.cropResilient) return noResult;
+  // ── Crop-resilient: joint search over DWT-pad × pixel-shift × dither-offset ──
+  //
+  // A crop of C pixels causes three alignment problems:
+  //   1. DWT pixel pairing: pad by C%4 → search 0..3 per axis (16 combos)
+  //   2. DCT block alignment: subband shift % 8 → search 0..7 per axis (64)
+  //   3. Tile dither offset: which block within the tile period does the
+  //      detector's block 0 correspond to? Search 0..blocksPerTileSide-1
+  //      per axis.
+  //
+  // All three must be correct simultaneously for signal to emerge, so we
+  // search them jointly. For each (pad, shift), we compute DCT blocks once
+  // per scoring frame, then sweep dither offsets cheaply (DMQIM re-indexing
+  // only, no DCT recomputation).
+  //
+  // Scoring uses min(4, nFrames) frames for reliable ranking.
+  // Top candidates are decoded with ALL frames.
+  const subbandTilePeriod = Math.floor(config.tilePeriod / (1 << config.dwtLevels));
+  const effectiveTP = Math.max(8, Math.floor(subbandTilePeriod / 8) * 8);
+  const blocksPerSide = effectiveTP / 8;
+  const dwtPads = 1 << config.dwtLevels; // 4 for dwtLevels=2
+  // Scoring: use frame 0 only for fast candidate ranking (36K candidates)
+  const nScoringFrames = 1;
+  interface Candidate {
+    padTop: number;
+    padLeft: number;
+    shiftX: number;
+    shiftY: number;
+    ditherOffX: number;
+    ditherOffY: number;
+    signalMag: number;
+  }
+  const candidates: Candidate[] = [];
+  // Precompute DWTs for scoring frames, cached by pad
+  const scoringDWTCache = new Map<string, FrameDWT[]>();
+  const getScoringDWTs = (padTop: number, padLeft: number): FrameDWT[] => {
+    const cacheKey = `${padTop},${padLeft}`;
+    let cached = scoringDWTCache.get(cacheKey);
+    if (!cached) {
+      cached = [];
+      for (let fi = 0; fi < nScoringFrames; fi++) {
+        if (padTop === 0 && padLeft === 0) {
+          cached.push(frameDWTs[fi]);
+        } else {
+          const { padded, paddedW, paddedH } = padYPlane(yPlanes[fi], width, height, padLeft, padTop);
+          cached.push(computeFrameDWT(padded, paddedW, paddedH, config));
+        }
+      }
+      scoringDWTCache.set(cacheKey, cached);
+    }
+    return cached;
+  };
+  // Precompute zigzag → coefficient index mapping for scoring
+  const numZig = config.zigzagPositions.length;
+  const zigCoeffIdx = new Int32Array(numZig);
+  for (let z = 0; z < numZig; z++) {
+    const [r, c] = ZIGZAG_ORDER[config.zigzagPositions[z]];
+    zigCoeffIdx[z] = r * 8 + c;
+  }
+  const scoreDithers = generateDithers(key, 1024, config.delta);
+  const blockBuf = new Float64Array(64);
+  // Phase 1: score all candidates with DCT caching.
+  // For each (pad, shift), compute DCT once per scoring frame, then sweep
+  // all dither offsets using only DMQIM re-indexing (no DCT recomputation).
+  for (let padTop = 0; padTop < dwtPads; padTop++) {
+    for (let padLeft = 0; padLeft < dwtPads; padLeft++) {
+      const scoreDWTs = getScoringDWTs(padTop, padLeft);
+      for (let shiftY = 0; shiftY < 8; shiftY++) {
+        for (let shiftX = 0; shiftX < 8; shiftX++) {
+          const hl0 = scoreDWTs[0].hlSubband;
+          const newW = hl0.width - shiftX;
+          const newH = hl0.height - shiftY;
+          if (newW < effectiveTP || newH < effectiveTP) continue;
+          const grid = computeTileGrid(newW, newH, subbandTilePeriod);
+          if (grid.totalTiles === 0) continue;
+          // Use tile 0 for scoring (fast; sufficient for ranking)
+          const tile0Origin = getTileOrigin(grid, 0);
+          const tile0Blocks = getTileBlocks(
+            tile0Origin.x, tile0Origin.y, grid.tilePeriod, newW, newH
+          );
+          const nBlocks = tile0Blocks.length;
+          if (nBlocks === 0) continue;
+          const tile0OriginBR = Math.floor(tile0Origin.y / 8);
+          const tile0OriginBC = Math.floor(tile0Origin.x / 8);
+          const relBR = new Int32Array(nBlocks);
+          const relBC = new Int32Array(nBlocks);
+          for (let bi = 0; bi < nBlocks; bi++) {
+            relBR[bi] = tile0Blocks[bi].row - tile0OriginBR;
+            relBC[bi] = tile0Blocks[bi].col - tile0OriginBC;
+          }
+          // Precompute DCT coefficients + effective deltas per scoring frame
+          const frameCoeffs: Float64Array[] = [];
+          const frameDeltas: Float64Array[] = [];
+          for (let fi = 0; fi < scoreDWTs.length; fi++) {
+            const shifted = createShiftedSubband(scoreDWTs[fi].hlSubband, shiftX, shiftY);
+            const coeffs = new Float64Array(nBlocks * numZig);
+            const deltas = new Float64Array(nBlocks);
+            if (config.perceptualMasking) {
+              const energies = new Float64Array(nBlocks);
+              for (let bi = 0; bi < nBlocks; bi++) {
+                extractBlock(shifted.data, newW, tile0Blocks[bi].row, tile0Blocks[bi].col, blockBuf);
+                dctForward8x8(blockBuf);
+                energies[bi] = blockAcEnergy(blockBuf);
+                for (let z = 0; z < numZig; z++) {
+                  coeffs[bi * numZig + z] = blockBuf[zigCoeffIdx[z]];
+                }
+              }
+              const factors = computeMaskingFactors(energies);
+              for (let bi = 0; bi < nBlocks; bi++) {
+                deltas[bi] = config.delta * factors[bi];
+              }
+            } else {
+              for (let bi = 0; bi < nBlocks; bi++) {
+                extractBlock(shifted.data, newW, tile0Blocks[bi].row, tile0Blocks[bi].col, blockBuf);
+                dctForward8x8(blockBuf);
+                for (let z = 0; z < numZig; z++) {
+                  coeffs[bi * numZig + z] = blockBuf[zigCoeffIdx[z]];
+                }
+                deltas[bi] = config.delta;
+              }
+            }
+            frameCoeffs.push(coeffs);
+            frameDeltas.push(deltas);
+          }
+          // Sweep all dither offsets using cached coefficients (DMQIM only)
+          for (let ditherOffY = 0; ditherOffY < blocksPerSide; ditherOffY++) {
+            for (let ditherOffX = 0; ditherOffX < blocksPerSide; ditherOffX++) {
+              if (padTop === 0 && padLeft === 0 && shiftX === 0 && shiftY === 0
+                  && ditherOffX === 0 && ditherOffY === 0) {
+                continue; // Already tried in fast path
+              }
+              const avg = new Float64Array(codedLength);
+              let nSamples = 0;
+              for (let fi = 0; fi < frameCoeffs.length; fi++) {
+                const coeffs = frameCoeffs[fi];
+                const deltas = frameDeltas[fi];
+                const softBits = new Float64Array(codedLength);
+                const bitCounts = new Float64Array(codedLength);
+                for (let bi = 0; bi < nBlocks; bi++) {
+                  const origR = ((relBR[bi] + ditherOffY) % blocksPerSide + blocksPerSide) % blocksPerSide;
+                  const origC = ((relBC[bi] + ditherOffX) % blocksPerSide + blocksPerSide) % blocksPerSide;
+                  const blockDitherBase = (origR * blocksPerSide + origC) * numZig;
+                  const ed = deltas[bi];
+                  // Remap bitIdx to match embedder's bit assignment at original position
+                  let bitIdx = ((origR * blocksPerSide + origC) * numZig) % codedLength;
+                  for (let z = 0; z < numZig; z++) {
+                    if (bitIdx >= codedLength) bitIdx = 0;
+                    const soft = dmqimExtractSoft(coeffs[bi * numZig + z], ed, scoreDithers[blockDitherBase + z]);
+                    softBits[bitIdx] += soft;
+                    bitCounts[bitIdx]++;
+                    bitIdx++;
+                  }
+                }
+                for (let i = 0; i < codedLength; i++) {
+                  if (bitCounts[i] > 0) softBits[i] /= bitCounts[i];
+                  avg[i] += softBits[i];
+                }
+                nSamples++;
+              }
+              let mag = 0;
+              for (let i = 0; i < codedLength; i++) {
+                avg[i] /= nSamples;
+                mag += avg[i] * avg[i];
+              }
+              candidates.push({ padTop, padLeft, shiftX, shiftY, ditherOffX, ditherOffY, signalMag: mag });
+            }
+          }
+        }
+      }
+    }
+  }
+  // Sort by signal magnitude and decode top candidates with all frames
+  candidates.sort((a, b) => b.signalMag - a.signalMag);
+  const MAX_DECODE = 50;
+  let bestResult: DetectionResult | null = null;
+  for (let i = 0; i < Math.min(MAX_DECODE, candidates.length); i++) {
+    const { padTop, padLeft, shiftX, shiftY, ditherOffX, ditherOffY } = candidates[i];
+    const dwts = (padTop === 0 && padLeft === 0)
+      ? frameDWTs
+      : yPlanes.map((yp) => {
+          const { padded, paddedW, paddedH } = padYPlane(yp, width, height, padLeft, padTop);
+          return computeFrameDWT(padded, paddedW, paddedH, config);
+        });
+    const result = tryDetect(dwts, (hl) => {
+      const shifted = createShiftedSubband(hl, shiftX, shiftY);
+      const grid = computeTileGrid(shifted.width, shifted.height, subbandTilePeriod);
+      return { subband: shifted, grid, ditherOffX, ditherOffY, blocksPerSide };
+    });
+    if (result && (!bestResult || result.confidence > bestResult.confidence)) {
+      bestResult = result;
+    }
+    if (bestResult && bestResult.confidence >= 0.95) break;
+  }
+  return bestResult ?? noResult;
+}
+/**
+ * Pad a Y plane with edge-replicated border pixels to realign DWT pixel pairing.
+ */
+function padYPlane(
+  yPlane: Uint8Array,
+  width: number,
+  height: number,
+  padLeft: number,
+  padTop: number,
+): { padded: Uint8Array; paddedW: number; paddedH: number } {
+  const paddedW = width + padLeft;
+  const paddedH = height + padTop;
+  const padded = new Uint8Array(paddedW * paddedH);
+  for (let y = 0; y < paddedH; y++) {
+    const srcY = Math.max(0, y - padTop);
+    for (let x = 0; x < paddedW; x++) {
+      const srcX = Math.max(0, x - padLeft);
+      padded[y * paddedW + x] = yPlane[srcY * width + srcX];
+    }
+  }
+  return { padded, paddedW, paddedH };
+}
+/**
+ * Create a shifted view of a subband (cheap array copy).
+ */
+function createShiftedSubband(hl: Buffer2D, shiftX: number, shiftY: number): Buffer2D {
+  const newW = hl.width - shiftX;
+  const newH = hl.height - shiftY;
+  const shifted = createBuffer2D(newW, newH);
+  for (let y = 0; y < newH; y++) {
+    const srcOff = (y + shiftY) * hl.width + shiftX;
+    const dstOff = y * newW;
+    for (let x = 0; x < newW; x++) {
+      shifted.data[dstOff + x] = hl.data[srcOff + x];
+    }
+  }
+  return shifted;
 }
 /**
 /** Minimum confidence to report a detection (low threshold is fine —
  *  the statistical model already ensures noise scores near 0%) */
+const MIN_CONFIDENCE = 0.75;
 /**
  * Try to decode soft bits into a payload
   width: number,
   height: number,
   key: string,
+  options?: DetectOptions,
 ): AutoDetectResult {
+  return autoDetectMultiFrame([yPlane], width, height, key, options);
 }
 /**
   width: number,
   height: number,
   key: string,
+  options?: DetectOptions,
 ): AutoDetectResult {
   let best: AutoDetectResult = {
     detected: false,
   };
   for (const [name, config] of Object.entries(PRESETS)) {
+    const result = detectWatermarkMultiFrame(yPlanes, width, height, key, config, options);
     if (result.detected && result.confidence > best.confidence) {
       best = { ...result, presetUsed: name as PresetName };
     }