Spaces:
Running
Running
Upload 13 files
Browse files- README.md +75 -26
- core/detector.ts +339 -23
README.md
CHANGED
|
@@ -10,7 +10,9 @@ pinned: false
|
|
| 10 |
|
| 11 |
# LTMarX β Video Watermarking
|
| 12 |
|
| 13 |
-
Imperceptible 32-bit watermarking for video. Embeds a payload into the luminance channel using DWT/DCT transform-domain quantization (DM-QIM) with BCH error correction.
|
|
|
|
|
|
|
| 14 |
|
| 15 |
All processing runs in the browser β no server round-trips needed.
|
| 16 |
|
|
@@ -44,13 +46,13 @@ core/ Pure TypeScript watermark engine (isomorphic, zero platform deps
|
|
| 44 |
βββ dwt.ts Haar DWT (forward/inverse, multi-level)
|
| 45 |
βββ dct.ts 8Γ8 DCT with zigzag scan
|
| 46 |
βββ dmqim.ts Dither-Modulated QIM (embed/extract with soft decisions)
|
| 47 |
-
βββ bch.ts BCH
|
| 48 |
-
βββ crc.ts CRC-4
|
| 49 |
-
βββ tiling.ts Periodic tile layout
|
| 50 |
-
βββ masking.ts Perceptual masking (variance-adaptive
|
| 51 |
βββ keygen.ts Seeded PRNG for dithers and permutations
|
| 52 |
βββ embedder.ts Y-plane β watermarked Y-plane
|
| 53 |
-
βββ detector.ts Y-plane β payload + confidence
|
| 54 |
βββ presets.ts Named configurations (light β fortress)
|
| 55 |
βββ types.ts Shared types
|
| 56 |
|
|
@@ -58,15 +60,16 @@ web/ Frontend (Vite + React + Tailwind)
|
|
| 58 |
βββ src/
|
| 59 |
β βββ App.tsx
|
| 60 |
β βββ components/
|
| 61 |
-
β β βββ EmbedPanel.tsx Upload, configure, embed
|
| 62 |
β β βββ DetectPanel.tsx Upload, detect, display results
|
| 63 |
-
β β βββ ApiDocs.tsx Inline API reference
|
| 64 |
β β βββ ComparisonView.tsx Side-by-side / difference viewer
|
| 65 |
-
β β βββ RobustnessTest.tsx Automated attack
|
|
|
|
| 66 |
β β βββ StrengthSlider.tsx Preset selector with snap points
|
| 67 |
-
β β
|
|
|
|
| 68 |
β βββ lib/
|
| 69 |
-
β β βββ video-io.ts Frame extraction, encoding, attack
|
| 70 |
β βββ workers/
|
| 71 |
β βββ watermark.worker.ts
|
| 72 |
βββ index.html
|
|
@@ -86,33 +89,70 @@ tests/ Vitest test suite
|
|
| 86 |
### Embedding
|
| 87 |
|
| 88 |
```
|
| 89 |
-
Y plane β 2-level Haar DWT β HL subband β tile grid β
|
| 90 |
-
per tile: 8Γ8 DCT blocks β select mid-freq coefficients β
|
| 91 |
-
DM-QIM embed coded bits
|
|
|
|
| 92 |
```
|
| 93 |
|
| 94 |
### Payload Encoding
|
| 95 |
|
| 96 |
```
|
| 97 |
-
32-bit payload β CRC append β BCH encode β keyed interleave β
|
|
|
|
| 98 |
```
|
| 99 |
|
| 100 |
### Detection
|
| 101 |
|
| 102 |
```
|
| 103 |
-
Y plane β DWT β HL subband β tile grid β
|
| 104 |
per tile: DCT β DM-QIM soft extract β
|
| 105 |
-
soft-combine across tiles and frames β
|
|
|
|
| 106 |
```
|
| 107 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
## Presets
|
| 109 |
|
| 110 |
-
| Preset | Delta | Tile Period |
|
| 111 |
-
|--------|-------|-------------|----------|---------|----------|
|
| 112 |
-
| **Light** | 50 | 256px | (
|
| 113 |
-
| **Moderate** |
|
| 114 |
-
| **Strong** | 110 | 208px | (
|
| 115 |
-
| **Fortress** | 150 | 192px | (
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
|
| 117 |
## API
|
| 118 |
|
|
@@ -124,7 +164,7 @@ import { getPreset } from './core/presets';
|
|
| 124 |
|
| 125 |
const config = getPreset('moderate');
|
| 126 |
const result = embedWatermark(yPlane, width, height, payload, key, config);
|
| 127 |
-
// result.yPlane: watermarked Y plane
|
| 128 |
// result.psnr: quality metric (dB)
|
| 129 |
```
|
| 130 |
|
|
@@ -140,6 +180,15 @@ const result = detectWatermarkMultiFrame(yPlanes, width, height, key, config);
|
|
| 140 |
// result.confidence: 0β1
|
| 141 |
```
|
| 142 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
### Auto-Detection (tries all presets)
|
| 144 |
|
| 145 |
```typescript
|
|
@@ -164,8 +213,8 @@ npm test # Run all tests
|
|
| 164 |
npm run test:watch # Watch mode
|
| 165 |
```
|
| 166 |
|
| 167 |
-
|
| 168 |
|
| 169 |
## Browser Encoding
|
| 170 |
|
| 171 |
-
The web UI encodes watermarked video using ffmpeg.wasm (x264 in WebAssembly). To avoid memory pressure, frames are encoded in chunks of 100 and concatenated at the end. Peak memory stays
|
|
|
|
| 10 |
|
| 11 |
# LTMarX β Video Watermarking
|
| 12 |
|
| 13 |
+
Imperceptible 32-bit watermarking for video. Embeds a payload into the luminance channel using DWT/DCT transform-domain quantization (DM-QIM) with BCH error correction.
|
| 14 |
+
|
| 15 |
+
Survives re-encoding, rescaling, brightness/contrast/saturation adjustments, and cropping up to ~20%.
|
| 16 |
|
| 17 |
All processing runs in the browser β no server round-trips needed.
|
| 18 |
|
|
|
|
| 46 |
βββ dwt.ts Haar DWT (forward/inverse, multi-level)
|
| 47 |
βββ dct.ts 8Γ8 DCT with zigzag scan
|
| 48 |
βββ dmqim.ts Dither-Modulated QIM (embed/extract with soft decisions)
|
| 49 |
+
βββ bch.ts BCH(63,36,5) over GF(2^6), Berlekamp-Massey decoding
|
| 50 |
+
βββ crc.ts CRC-4 integrity check
|
| 51 |
+
βββ tiling.ts Periodic tile layout + autocorrelation-based grid recovery
|
| 52 |
+
βββ masking.ts Perceptual masking (variance-adaptive quantization step)
|
| 53 |
βββ keygen.ts Seeded PRNG for dithers and permutations
|
| 54 |
βββ embedder.ts Y-plane β watermarked Y-plane
|
| 55 |
+
βββ detector.ts Y-plane(s) β payload + confidence
|
| 56 |
βββ presets.ts Named configurations (light β fortress)
|
| 57 |
βββ types.ts Shared types
|
| 58 |
|
|
|
|
| 60 |
βββ src/
|
| 61 |
β βββ App.tsx
|
| 62 |
β βββ components/
|
| 63 |
+
β β βββ EmbedPanel.tsx Upload, configure, embed, download
|
| 64 |
β β βββ DetectPanel.tsx Upload, detect, display results
|
|
|
|
| 65 |
β β βββ ComparisonView.tsx Side-by-side / difference viewer
|
| 66 |
+
β β βββ RobustnessTest.tsx Automated attack battery (re-encode, crop, etc.)
|
| 67 |
+
β β βββ HowItWorks.tsx Interactive explainer with D3 visualizations
|
| 68 |
β β βββ StrengthSlider.tsx Preset selector with snap points
|
| 69 |
+
β β βββ ResultCard.tsx Detection result display
|
| 70 |
+
β β βββ ApiDocs.tsx Inline API reference
|
| 71 |
β βββ lib/
|
| 72 |
+
β β βββ video-io.ts Frame extraction, encoding, attack simulations
|
| 73 |
β βββ workers/
|
| 74 |
β βββ watermark.worker.ts
|
| 75 |
βββ index.html
|
|
|
|
| 89 |
### Embedding
|
| 90 |
|
| 91 |
```
|
| 92 |
+
Y plane β 2-level Haar DWT β HL subband β periodic tile grid β
|
| 93 |
+
per tile: 8Γ8 DCT blocks β select mid-freq zigzag coefficients β
|
| 94 |
+
DM-QIM embed coded bits (with per-block dithering and perceptual masking) β
|
| 95 |
+
inverse DCT β inverse DWT β modified Y plane
|
| 96 |
```
|
| 97 |
|
| 98 |
### Payload Encoding
|
| 99 |
|
| 100 |
```
|
| 101 |
+
32-bit payload β CRC-4 append β BCH(63,36,5) encode β keyed interleave β
|
| 102 |
+
map to DCT coefficients across tiles (with wraparound redundancy)
|
| 103 |
```
|
| 104 |
|
| 105 |
### Detection
|
| 106 |
|
| 107 |
```
|
| 108 |
+
Y plane(s) β DWT β HL subband β tile grid β
|
| 109 |
per tile: DCT β DM-QIM soft extract β
|
| 110 |
+
soft-combine across tiles and frames β keyed de-interleave β
|
| 111 |
+
BCH soft decode β CRC verify β payload
|
| 112 |
```
|
| 113 |
|
| 114 |
+
### Crop-Resilient Detection
|
| 115 |
+
|
| 116 |
+
When the frame has been cropped, the detector doesn't know the original tile grid alignment. It searches over three alignment parameters:
|
| 117 |
+
|
| 118 |
+
1. **DWT padding** (0β3 per axis) β the crop may break DWT pixel pairing
|
| 119 |
+
2. **DCT block shift** (0β7 per axis) β the crop may misalign 8Γ8 block boundaries within the subband
|
| 120 |
+
3. **Tile dither offset** (0βN per axis) β the crop shifts which tile-phase position each block maps to
|
| 121 |
+
|
| 122 |
+
The total search space is 16 Γ 64 Γ NΒ² candidates (~37K for the strong preset). To make this fast:
|
| 123 |
+
|
| 124 |
+
- DCT coefficients are precomputed once per (pad, shift) combination using only tile 0
|
| 125 |
+
- Dither offsets are swept cheaply using just DM-QIM re-extraction on cached coefficients
|
| 126 |
+
- Candidates are ranked by signal magnitude (sum of squared averaged soft bits)
|
| 127 |
+
- Only the top 50 candidates are fully decoded with all frames
|
| 128 |
+
|
| 129 |
+
This runs in ~1 second for 32 frames on a 512Γ512 video.
|
| 130 |
+
|
| 131 |
## Presets
|
| 132 |
|
| 133 |
+
| Preset | Delta | Tile Period | Zigzag Positions | Masking | Use Case |
|
| 134 |
+
|--------|-------|-------------|------------------|---------|----------|
|
| 135 |
+
| **Light** | 50 | 256px | 3β14 (mid-freq) | No | Near-invisible, mild compression |
|
| 136 |
+
| **Moderate** | 62 | 240px | 3β14 (mid-freq) | Yes | Balanced with perceptual masking |
|
| 137 |
+
| **Strong** | 110 | 208px | 1β20 (low+mid) | Yes | Heavy re-encoding, rescaling, cropping |
|
| 138 |
+
| **Fortress** | 150 | 192px | 1β20 (low+mid) | Yes | Maximum robustness |
|
| 139 |
+
|
| 140 |
+
All presets use BCH(63,36,5) with CRC-4 and 2-level DWT.
|
| 141 |
+
|
| 142 |
+
Higher delta = stronger embedding = more visible artifacts but better survival under attacks. The "strong" and "fortress" presets use more DCT coefficients (zigzag positions 1β20 vs 3β14) for additional redundancy.
|
| 143 |
+
|
| 144 |
+
## Robustness
|
| 145 |
+
|
| 146 |
+
The web UI includes an automated robustness test battery. Each test applies an attack to the watermarked video and attempts detection:
|
| 147 |
+
|
| 148 |
+
| Attack | Variants Tested |
|
| 149 |
+
|--------|----------------|
|
| 150 |
+
| **Re-encode** | CRF 23, 28, 33, 38, 43 |
|
| 151 |
+
| **Downscale** | 25%, 50%, 75%, 90% |
|
| 152 |
+
| **Brightness** | -0.2, +0.2, +0.4 |
|
| 153 |
+
| **Contrast** | 0.5Γ, 1.5Γ, 2.0Γ |
|
| 154 |
+
| **Saturation** | 0Γ, 0.5Γ, 2.0Γ |
|
| 155 |
+
| **Crop** | 5%, 10%, 15%, 20% (per side) |
|
| 156 |
|
| 157 |
## API
|
| 158 |
|
|
|
|
| 164 |
|
| 165 |
const config = getPreset('moderate');
|
| 166 |
const result = embedWatermark(yPlane, width, height, payload, key, config);
|
| 167 |
+
// result.yPlane: watermarked Y plane (Uint8Array)
|
| 168 |
// result.psnr: quality metric (dB)
|
| 169 |
```
|
| 170 |
|
|
|
|
| 180 |
// result.confidence: 0β1
|
| 181 |
```
|
| 182 |
|
| 183 |
+
### Crop-Resilient Detection
|
| 184 |
+
|
| 185 |
+
```typescript
|
| 186 |
+
const result = detectWatermarkMultiFrame(
|
| 187 |
+
yPlanes, width, height, key, config,
|
| 188 |
+
{ cropResilient: true }
|
| 189 |
+
);
|
| 190 |
+
```
|
| 191 |
+
|
| 192 |
### Auto-Detection (tries all presets)
|
| 193 |
|
| 194 |
```typescript
|
|
|
|
| 213 |
npm run test:watch # Watch mode
|
| 214 |
```
|
| 215 |
|
| 216 |
+
25 tests across 6 files covering: DWT round-trip, DCT round-trip, DM-QIM embed/extract, BCH encode/decode with error correction, CRC append/verify, full embed-detect pipeline across presets, false positive rejection (wrong key, unwatermarked frame), crop-resilient detection (arbitrary offset and ~20% crop).
|
| 217 |
|
| 218 |
## Browser Encoding
|
| 219 |
|
| 220 |
+
The web UI encodes watermarked video using ffmpeg.wasm (x264 in WebAssembly). To avoid memory pressure, frames are encoded in chunks of 100 and concatenated at the end. Peak memory stays proportional to chunk size rather than scaling with video length.
|
core/detector.ts
CHANGED
|
@@ -5,13 +5,13 @@
|
|
| 5 |
*/
|
| 6 |
|
| 7 |
import type { WatermarkConfig, DetectionResult, Buffer2D } from './types.js';
|
| 8 |
-
import { yPlaneToBuffer, dwtForward, extractSubband } from './dwt.js';
|
| 9 |
import { dctForward8x8, extractBlock, ZIGZAG_ORDER } from './dct.js';
|
| 10 |
import { dmqimExtractSoft } from './dmqim.js';
|
| 11 |
import { crcVerify } from './crc.js';
|
| 12 |
import { BchCodec } from './bch.js';
|
| 13 |
import { generateDithers, generatePermutation } from './keygen.js';
|
| 14 |
-
import { computeTileGrid, getTileOrigin, getTileBlocks, type TileGrid } from './tiling.js';
|
| 15 |
import { blockAcEnergy, computeMaskingFactors } from './masking.js';
|
| 16 |
import { bitsToPayload } from './embedder.js';
|
| 17 |
import { PRESETS } from './presets.js';
|
|
@@ -57,7 +57,10 @@ function extractSoftBitsFromSubband(
|
|
| 57 |
hlSubband: Buffer2D,
|
| 58 |
tileGrid: TileGrid,
|
| 59 |
key: string,
|
| 60 |
-
config: WatermarkConfig
|
|
|
|
|
|
|
|
|
|
| 61 |
): { tileSoftBits: Float64Array[]; totalTiles: number } | null {
|
| 62 |
if (tileGrid.totalTiles === 0) return null;
|
| 63 |
|
|
@@ -75,13 +78,18 @@ function extractSoftBitsFromSubband(
|
|
| 75 |
zigCoeffIdx[z] = r * 8 + c;
|
| 76 |
}
|
| 77 |
|
|
|
|
|
|
|
|
|
|
| 78 |
for (let tileIdx = 0; tileIdx < tileGrid.totalTiles; tileIdx++) {
|
| 79 |
let ditherIdx = 0; // Reset per tile β matches embedder
|
| 80 |
const origin = getTileOrigin(tileGrid, tileIdx);
|
| 81 |
const blocks = getTileBlocks(origin.x, origin.y, tileGrid.tilePeriod, hlSubband.width, hlSubband.height);
|
|
|
|
|
|
|
| 82 |
|
| 83 |
const softBits = new Float64Array(codedLength);
|
| 84 |
-
const
|
| 85 |
|
| 86 |
let maskingFactors: Float64Array | null = null;
|
| 87 |
if (config.perceptualMasking && blocks.length > 0) {
|
|
@@ -103,27 +111,44 @@ function extractSoftBitsFromSubband(
|
|
| 103 |
const maskFactor = maskingFactors ? maskingFactors[bi] : 1.0;
|
| 104 |
const effectiveDelta = config.delta * maskFactor;
|
| 105 |
|
| 106 |
-
//
|
| 107 |
-
//
|
| 108 |
-
//
|
| 109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
for (let z = 0; z < zigCoeffIdx.length; z++) {
|
| 112 |
if (bitIdx >= codedLength) bitIdx = 0;
|
| 113 |
|
| 114 |
const coeffIdx = zigCoeffIdx[z];
|
| 115 |
-
const dither = dithers[ditherIdx++];
|
| 116 |
|
| 117 |
const soft = dmqimExtractSoft(blockBuf[coeffIdx], effectiveDelta, dither);
|
| 118 |
-
softBits[bitIdx] += soft
|
| 119 |
-
|
| 120 |
|
| 121 |
bitIdx++;
|
| 122 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
}
|
| 124 |
|
| 125 |
for (let i = 0; i < codedLength; i++) {
|
| 126 |
-
if (
|
| 127 |
}
|
| 128 |
|
| 129 |
tileSoftBits.push(softBits);
|
|
@@ -132,10 +157,22 @@ function extractSoftBitsFromSubband(
|
|
| 132 |
return { tileSoftBits, totalTiles: tileGrid.totalTiles };
|
| 133 |
}
|
| 134 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
/**
|
| 136 |
* Detect watermark from multiple Y planes.
|
| 137 |
* Extracts soft decisions from each frame independently, then combines
|
| 138 |
* across frames and tiles (never averages raw pixels).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
*/
|
| 140 |
export function detectWatermarkMultiFrame(
|
| 141 |
yPlanes: Uint8Array[],
|
|
@@ -143,6 +180,7 @@ export function detectWatermarkMultiFrame(
|
|
| 143 |
height: number,
|
| 144 |
key: string,
|
| 145 |
config: WatermarkConfig,
|
|
|
|
| 146 |
): DetectionResult {
|
| 147 |
const noResult: DetectionResult = {
|
| 148 |
detected: false,
|
|
@@ -158,15 +196,23 @@ export function detectWatermarkMultiFrame(
|
|
| 158 |
const bch = new BchCodec(config.bch);
|
| 159 |
const perm = generatePermutation(key, codedLength);
|
| 160 |
|
| 161 |
-
// Helper: try to detect with given frames and explicit tile grid
|
| 162 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
frames: FrameDWT[],
|
| 164 |
-
|
| 165 |
): DetectionResult | null => {
|
| 166 |
const softBits: Float64Array[] = [];
|
| 167 |
for (const { hlSubband, subbandTilePeriod } of frames) {
|
| 168 |
-
const
|
| 169 |
-
const frameResult = extractSoftBitsFromSubband(
|
| 170 |
if (frameResult) softBits.push(...frameResult.tileSoftBits);
|
| 171 |
}
|
| 172 |
if (softBits.length === 0) return null;
|
|
@@ -175,11 +221,279 @@ export function detectWatermarkMultiFrame(
|
|
| 175 |
|
| 176 |
// Fast path: zero-phase grid (uncropped frames)
|
| 177 |
const frameDWTs = yPlanes.map((yp) => computeFrameDWT(yp, width, height, config));
|
| 178 |
-
const fast =
|
| 179 |
-
|
|
|
|
|
|
|
| 180 |
if (fast) return fast;
|
| 181 |
|
| 182 |
-
return noResult;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
}
|
| 184 |
|
| 185 |
/**
|
|
@@ -247,7 +561,7 @@ function decodeFromSoftBits(
|
|
| 247 |
|
| 248 |
/** Minimum confidence to report a detection (low threshold is fine β
|
| 249 |
* the statistical model already ensures noise scores near 0%) */
|
| 250 |
-
const MIN_CONFIDENCE = 0.
|
| 251 |
|
| 252 |
/**
|
| 253 |
* Try to decode soft bits into a payload
|
|
@@ -312,8 +626,9 @@ export function autoDetect(
|
|
| 312 |
width: number,
|
| 313 |
height: number,
|
| 314 |
key: string,
|
|
|
|
| 315 |
): AutoDetectResult {
|
| 316 |
-
return autoDetectMultiFrame([yPlane], width, height, key);
|
| 317 |
}
|
| 318 |
|
| 319 |
/**
|
|
@@ -324,6 +639,7 @@ export function autoDetectMultiFrame(
|
|
| 324 |
width: number,
|
| 325 |
height: number,
|
| 326 |
key: string,
|
|
|
|
| 327 |
): AutoDetectResult {
|
| 328 |
let best: AutoDetectResult = {
|
| 329 |
detected: false,
|
|
@@ -335,7 +651,7 @@ export function autoDetectMultiFrame(
|
|
| 335 |
};
|
| 336 |
|
| 337 |
for (const [name, config] of Object.entries(PRESETS)) {
|
| 338 |
-
const result = detectWatermarkMultiFrame(yPlanes, width, height, key, config);
|
| 339 |
if (result.detected && result.confidence > best.confidence) {
|
| 340 |
best = { ...result, presetUsed: name as PresetName };
|
| 341 |
}
|
|
|
|
| 5 |
*/
|
| 6 |
|
| 7 |
import type { WatermarkConfig, DetectionResult, Buffer2D } from './types.js';
|
| 8 |
+
import { createBuffer2D, yPlaneToBuffer, dwtForward, extractSubband } from './dwt.js';
|
| 9 |
import { dctForward8x8, extractBlock, ZIGZAG_ORDER } from './dct.js';
|
| 10 |
import { dmqimExtractSoft } from './dmqim.js';
|
| 11 |
import { crcVerify } from './crc.js';
|
| 12 |
import { BchCodec } from './bch.js';
|
| 13 |
import { generateDithers, generatePermutation } from './keygen.js';
|
| 14 |
+
import { computeTileGrid, recoverTileGrid, getTileOrigin, getTileBlocks, type TileGrid } from './tiling.js';
|
| 15 |
import { blockAcEnergy, computeMaskingFactors } from './masking.js';
|
| 16 |
import { bitsToPayload } from './embedder.js';
|
| 17 |
import { PRESETS } from './presets.js';
|
|
|
|
| 57 |
hlSubband: Buffer2D,
|
| 58 |
tileGrid: TileGrid,
|
| 59 |
key: string,
|
| 60 |
+
config: WatermarkConfig,
|
| 61 |
+
ditherOffX: number = 0,
|
| 62 |
+
ditherOffY: number = 0,
|
| 63 |
+
blocksPerSide: number = 0,
|
| 64 |
): { tileSoftBits: Float64Array[]; totalTiles: number } | null {
|
| 65 |
if (tileGrid.totalTiles === 0) return null;
|
| 66 |
|
|
|
|
| 78 |
zigCoeffIdx[z] = r * 8 + c;
|
| 79 |
}
|
| 80 |
|
| 81 |
+
const hasDitherOffset = ditherOffX !== 0 || ditherOffY !== 0;
|
| 82 |
+
const numZig = config.zigzagPositions.length;
|
| 83 |
+
|
| 84 |
for (let tileIdx = 0; tileIdx < tileGrid.totalTiles; tileIdx++) {
|
| 85 |
let ditherIdx = 0; // Reset per tile β matches embedder
|
| 86 |
const origin = getTileOrigin(tileGrid, tileIdx);
|
| 87 |
const blocks = getTileBlocks(origin.x, origin.y, tileGrid.tilePeriod, hlSubband.width, hlSubband.height);
|
| 88 |
+
const tileOriginBlockRow = Math.floor(origin.y / 8);
|
| 89 |
+
const tileOriginBlockCol = Math.floor(origin.x / 8);
|
| 90 |
|
| 91 |
const softBits = new Float64Array(codedLength);
|
| 92 |
+
const bitCounts = new Float64Array(codedLength);
|
| 93 |
|
| 94 |
let maskingFactors: Float64Array | null = null;
|
| 95 |
if (config.perceptualMasking && blocks.length > 0) {
|
|
|
|
| 111 |
const maskFactor = maskingFactors ? maskingFactors[bi] : 1.0;
|
| 112 |
const effectiveDelta = config.delta * maskFactor;
|
| 113 |
|
| 114 |
+
// Compute dither index and bit index: when dither offset is active,
|
| 115 |
+
// remap block position to find the embedder's dither and bit assignment
|
| 116 |
+
// for this spatial location within the periodic tile structure.
|
| 117 |
+
let blockDitherBase: number;
|
| 118 |
+
if (hasDitherOffset && blocksPerSide > 0) {
|
| 119 |
+
const relBr = row - tileOriginBlockRow;
|
| 120 |
+
const relBc = col - tileOriginBlockCol;
|
| 121 |
+
const origR = (relBr + ditherOffY) % blocksPerSide;
|
| 122 |
+
const origC = (relBc + ditherOffX) % blocksPerSide;
|
| 123 |
+
blockDitherBase = (origR * blocksPerSide + origC) * numZig;
|
| 124 |
+
// Remap bitIdx to match the embedder's bit assignment at the original position
|
| 125 |
+
bitIdx = ((origR * blocksPerSide + origC) * numZig) % codedLength;
|
| 126 |
+
} else {
|
| 127 |
+
blockDitherBase = ditherIdx;
|
| 128 |
+
}
|
| 129 |
|
| 130 |
for (let z = 0; z < zigCoeffIdx.length; z++) {
|
| 131 |
if (bitIdx >= codedLength) bitIdx = 0;
|
| 132 |
|
| 133 |
const coeffIdx = zigCoeffIdx[z];
|
| 134 |
+
const dither = hasDitherOffset ? dithers[blockDitherBase + z] : dithers[ditherIdx++];
|
| 135 |
|
| 136 |
const soft = dmqimExtractSoft(blockBuf[coeffIdx], effectiveDelta, dither);
|
| 137 |
+
softBits[bitIdx] += soft;
|
| 138 |
+
bitCounts[bitIdx]++;
|
| 139 |
|
| 140 |
bitIdx++;
|
| 141 |
}
|
| 142 |
+
|
| 143 |
+
if (!hasDitherOffset) {
|
| 144 |
+
// ditherIdx already incremented in the loop above
|
| 145 |
+
} else {
|
| 146 |
+
ditherIdx += numZig; // keep in sync
|
| 147 |
+
}
|
| 148 |
}
|
| 149 |
|
| 150 |
for (let i = 0; i < codedLength; i++) {
|
| 151 |
+
if (bitCounts[i] > 0) softBits[i] /= bitCounts[i];
|
| 152 |
}
|
| 153 |
|
| 154 |
tileSoftBits.push(softBits);
|
|
|
|
| 157 |
return { tileSoftBits, totalTiles: tileGrid.totalTiles };
|
| 158 |
}
|
| 159 |
|
| 160 |
+
/** Options for crop-resilient detection */
|
| 161 |
+
export interface DetectOptions {
|
| 162 |
+
/** Enable grid-phase search for cropped content */
|
| 163 |
+
cropResilient?: boolean;
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
/**
|
| 167 |
* Detect watermark from multiple Y planes.
|
| 168 |
* Extracts soft decisions from each frame independently, then combines
|
| 169 |
* across frames and tiles (never averages raw pixels).
|
| 170 |
+
*
|
| 171 |
+
* When cropResilient is true, searches over:
|
| 172 |
+
* - 16 DWT-pad combinations (0..3 Γ 0..3 for dwtLevels=2)
|
| 173 |
+
* - NΓN tile-phase offsets (block-aligned, N = tilePeriod/8)
|
| 174 |
+
* Signal magnitude from one frame ranks candidates cheaply, then the
|
| 175 |
+
* top candidates are decoded using all frames.
|
| 176 |
*/
|
| 177 |
export function detectWatermarkMultiFrame(
|
| 178 |
yPlanes: Uint8Array[],
|
|
|
|
| 180 |
height: number,
|
| 181 |
key: string,
|
| 182 |
config: WatermarkConfig,
|
| 183 |
+
options?: DetectOptions,
|
| 184 |
): DetectionResult {
|
| 185 |
const noResult: DetectionResult = {
|
| 186 |
detected: false,
|
|
|
|
| 196 |
const bch = new BchCodec(config.bch);
|
| 197 |
const perm = generatePermutation(key, codedLength);
|
| 198 |
|
| 199 |
+
// Helper: try to detect with given frames and explicit tile grid.
|
| 200 |
+
// makeSubbandAndGrid can optionally transform the subband (e.g. shift it).
|
| 201 |
+
interface SubbandAndGrid {
|
| 202 |
+
subband: Buffer2D;
|
| 203 |
+
grid: TileGrid;
|
| 204 |
+
ditherOffX?: number;
|
| 205 |
+
ditherOffY?: number;
|
| 206 |
+
blocksPerSide?: number;
|
| 207 |
+
}
|
| 208 |
+
const tryDetect = (
|
| 209 |
frames: FrameDWT[],
|
| 210 |
+
makeSubbandAndGrid: (hlSubband: Buffer2D, stp: number) => SubbandAndGrid,
|
| 211 |
): DetectionResult | null => {
|
| 212 |
const softBits: Float64Array[] = [];
|
| 213 |
for (const { hlSubband, subbandTilePeriod } of frames) {
|
| 214 |
+
const { subband, grid, ditherOffX, ditherOffY, blocksPerSide: bps } = makeSubbandAndGrid(hlSubband, subbandTilePeriod);
|
| 215 |
+
const frameResult = extractSoftBitsFromSubband(subband, grid, key, config, ditherOffX ?? 0, ditherOffY ?? 0, bps ?? 0);
|
| 216 |
if (frameResult) softBits.push(...frameResult.tileSoftBits);
|
| 217 |
}
|
| 218 |
if (softBits.length === 0) return null;
|
|
|
|
| 221 |
|
| 222 |
// Fast path: zero-phase grid (uncropped frames)
|
| 223 |
const frameDWTs = yPlanes.map((yp) => computeFrameDWT(yp, width, height, config));
|
| 224 |
+
const fast = tryDetect(frameDWTs, (hl, stp) => ({
|
| 225 |
+
subband: hl,
|
| 226 |
+
grid: computeTileGrid(hl.width, hl.height, stp),
|
| 227 |
+
}));
|
| 228 |
if (fast) return fast;
|
| 229 |
|
| 230 |
+
if (!options?.cropResilient) return noResult;
|
| 231 |
+
|
| 232 |
+
// ββ Crop-resilient: joint search over DWT-pad Γ pixel-shift Γ dither-offset ββ
|
| 233 |
+
//
|
| 234 |
+
// A crop of C pixels causes three alignment problems:
|
| 235 |
+
// 1. DWT pixel pairing: pad by C%4 β search 0..3 per axis (16 combos)
|
| 236 |
+
// 2. DCT block alignment: subband shift % 8 β search 0..7 per axis (64)
|
| 237 |
+
// 3. Tile dither offset: which block within the tile period does the
|
| 238 |
+
// detector's block 0 correspond to? Search 0..blocksPerTileSide-1
|
| 239 |
+
// per axis.
|
| 240 |
+
//
|
| 241 |
+
// All three must be correct simultaneously for signal to emerge, so we
|
| 242 |
+
// search them jointly. For each (pad, shift), we compute DCT blocks once
|
| 243 |
+
// per scoring frame, then sweep dither offsets cheaply (DMQIM re-indexing
|
| 244 |
+
// only, no DCT recomputation).
|
| 245 |
+
//
|
| 246 |
+
// Scoring uses min(4, nFrames) frames for reliable ranking.
|
| 247 |
+
// Top candidates are decoded with ALL frames.
|
| 248 |
+
|
| 249 |
+
const subbandTilePeriod = Math.floor(config.tilePeriod / (1 << config.dwtLevels));
|
| 250 |
+
const effectiveTP = Math.max(8, Math.floor(subbandTilePeriod / 8) * 8);
|
| 251 |
+
const blocksPerSide = effectiveTP / 8;
|
| 252 |
+
const dwtPads = 1 << config.dwtLevels; // 4 for dwtLevels=2
|
| 253 |
+
|
| 254 |
+
// Scoring: use frame 0 only for fast candidate ranking (36K candidates)
|
| 255 |
+
const nScoringFrames = 1;
|
| 256 |
+
|
| 257 |
+
interface Candidate {
|
| 258 |
+
padTop: number;
|
| 259 |
+
padLeft: number;
|
| 260 |
+
shiftX: number;
|
| 261 |
+
shiftY: number;
|
| 262 |
+
ditherOffX: number;
|
| 263 |
+
ditherOffY: number;
|
| 264 |
+
signalMag: number;
|
| 265 |
+
}
|
| 266 |
+
|
| 267 |
+
const candidates: Candidate[] = [];
|
| 268 |
+
|
| 269 |
+
// Precompute DWTs for scoring frames, cached by pad
|
| 270 |
+
const scoringDWTCache = new Map<string, FrameDWT[]>();
|
| 271 |
+
const getScoringDWTs = (padTop: number, padLeft: number): FrameDWT[] => {
|
| 272 |
+
const cacheKey = `${padTop},${padLeft}`;
|
| 273 |
+
let cached = scoringDWTCache.get(cacheKey);
|
| 274 |
+
if (!cached) {
|
| 275 |
+
cached = [];
|
| 276 |
+
for (let fi = 0; fi < nScoringFrames; fi++) {
|
| 277 |
+
if (padTop === 0 && padLeft === 0) {
|
| 278 |
+
cached.push(frameDWTs[fi]);
|
| 279 |
+
} else {
|
| 280 |
+
const { padded, paddedW, paddedH } = padYPlane(yPlanes[fi], width, height, padLeft, padTop);
|
| 281 |
+
cached.push(computeFrameDWT(padded, paddedW, paddedH, config));
|
| 282 |
+
}
|
| 283 |
+
}
|
| 284 |
+
scoringDWTCache.set(cacheKey, cached);
|
| 285 |
+
}
|
| 286 |
+
return cached;
|
| 287 |
+
};
|
| 288 |
+
|
| 289 |
+
// Precompute zigzag β coefficient index mapping for scoring
|
| 290 |
+
const numZig = config.zigzagPositions.length;
|
| 291 |
+
const zigCoeffIdx = new Int32Array(numZig);
|
| 292 |
+
for (let z = 0; z < numZig; z++) {
|
| 293 |
+
const [r, c] = ZIGZAG_ORDER[config.zigzagPositions[z]];
|
| 294 |
+
zigCoeffIdx[z] = r * 8 + c;
|
| 295 |
+
}
|
| 296 |
+
const scoreDithers = generateDithers(key, 1024, config.delta);
|
| 297 |
+
const blockBuf = new Float64Array(64);
|
| 298 |
+
|
| 299 |
+
// Phase 1: score all candidates with DCT caching.
|
| 300 |
+
// For each (pad, shift), compute DCT once per scoring frame, then sweep
|
| 301 |
+
// all dither offsets using only DMQIM re-indexing (no DCT recomputation).
|
| 302 |
+
for (let padTop = 0; padTop < dwtPads; padTop++) {
|
| 303 |
+
for (let padLeft = 0; padLeft < dwtPads; padLeft++) {
|
| 304 |
+
const scoreDWTs = getScoringDWTs(padTop, padLeft);
|
| 305 |
+
|
| 306 |
+
for (let shiftY = 0; shiftY < 8; shiftY++) {
|
| 307 |
+
for (let shiftX = 0; shiftX < 8; shiftX++) {
|
| 308 |
+
const hl0 = scoreDWTs[0].hlSubband;
|
| 309 |
+
const newW = hl0.width - shiftX;
|
| 310 |
+
const newH = hl0.height - shiftY;
|
| 311 |
+
if (newW < effectiveTP || newH < effectiveTP) continue;
|
| 312 |
+
|
| 313 |
+
const grid = computeTileGrid(newW, newH, subbandTilePeriod);
|
| 314 |
+
if (grid.totalTiles === 0) continue;
|
| 315 |
+
|
| 316 |
+
// Use tile 0 for scoring (fast; sufficient for ranking)
|
| 317 |
+
const tile0Origin = getTileOrigin(grid, 0);
|
| 318 |
+
const tile0Blocks = getTileBlocks(
|
| 319 |
+
tile0Origin.x, tile0Origin.y, grid.tilePeriod, newW, newH
|
| 320 |
+
);
|
| 321 |
+
const nBlocks = tile0Blocks.length;
|
| 322 |
+
if (nBlocks === 0) continue;
|
| 323 |
+
|
| 324 |
+
const tile0OriginBR = Math.floor(tile0Origin.y / 8);
|
| 325 |
+
const tile0OriginBC = Math.floor(tile0Origin.x / 8);
|
| 326 |
+
const relBR = new Int32Array(nBlocks);
|
| 327 |
+
const relBC = new Int32Array(nBlocks);
|
| 328 |
+
for (let bi = 0; bi < nBlocks; bi++) {
|
| 329 |
+
relBR[bi] = tile0Blocks[bi].row - tile0OriginBR;
|
| 330 |
+
relBC[bi] = tile0Blocks[bi].col - tile0OriginBC;
|
| 331 |
+
}
|
| 332 |
+
|
| 333 |
+
// Precompute DCT coefficients + effective deltas per scoring frame
|
| 334 |
+
const frameCoeffs: Float64Array[] = [];
|
| 335 |
+
const frameDeltas: Float64Array[] = [];
|
| 336 |
+
|
| 337 |
+
for (let fi = 0; fi < scoreDWTs.length; fi++) {
|
| 338 |
+
const shifted = createShiftedSubband(scoreDWTs[fi].hlSubband, shiftX, shiftY);
|
| 339 |
+
const coeffs = new Float64Array(nBlocks * numZig);
|
| 340 |
+
const deltas = new Float64Array(nBlocks);
|
| 341 |
+
|
| 342 |
+
if (config.perceptualMasking) {
|
| 343 |
+
const energies = new Float64Array(nBlocks);
|
| 344 |
+
for (let bi = 0; bi < nBlocks; bi++) {
|
| 345 |
+
extractBlock(shifted.data, newW, tile0Blocks[bi].row, tile0Blocks[bi].col, blockBuf);
|
| 346 |
+
dctForward8x8(blockBuf);
|
| 347 |
+
energies[bi] = blockAcEnergy(blockBuf);
|
| 348 |
+
for (let z = 0; z < numZig; z++) {
|
| 349 |
+
coeffs[bi * numZig + z] = blockBuf[zigCoeffIdx[z]];
|
| 350 |
+
}
|
| 351 |
+
}
|
| 352 |
+
const factors = computeMaskingFactors(energies);
|
| 353 |
+
for (let bi = 0; bi < nBlocks; bi++) {
|
| 354 |
+
deltas[bi] = config.delta * factors[bi];
|
| 355 |
+
}
|
| 356 |
+
} else {
|
| 357 |
+
for (let bi = 0; bi < nBlocks; bi++) {
|
| 358 |
+
extractBlock(shifted.data, newW, tile0Blocks[bi].row, tile0Blocks[bi].col, blockBuf);
|
| 359 |
+
dctForward8x8(blockBuf);
|
| 360 |
+
for (let z = 0; z < numZig; z++) {
|
| 361 |
+
coeffs[bi * numZig + z] = blockBuf[zigCoeffIdx[z]];
|
| 362 |
+
}
|
| 363 |
+
deltas[bi] = config.delta;
|
| 364 |
+
}
|
| 365 |
+
}
|
| 366 |
+
|
| 367 |
+
frameCoeffs.push(coeffs);
|
| 368 |
+
frameDeltas.push(deltas);
|
| 369 |
+
}
|
| 370 |
+
|
| 371 |
+
// Sweep all dither offsets using cached coefficients (DMQIM only)
|
| 372 |
+
for (let ditherOffY = 0; ditherOffY < blocksPerSide; ditherOffY++) {
|
| 373 |
+
for (let ditherOffX = 0; ditherOffX < blocksPerSide; ditherOffX++) {
|
| 374 |
+
if (padTop === 0 && padLeft === 0 && shiftX === 0 && shiftY === 0
|
| 375 |
+
&& ditherOffX === 0 && ditherOffY === 0) {
|
| 376 |
+
continue; // Already tried in fast path
|
| 377 |
+
}
|
| 378 |
+
|
| 379 |
+
const avg = new Float64Array(codedLength);
|
| 380 |
+
let nSamples = 0;
|
| 381 |
+
|
| 382 |
+
for (let fi = 0; fi < frameCoeffs.length; fi++) {
|
| 383 |
+
const coeffs = frameCoeffs[fi];
|
| 384 |
+
const deltas = frameDeltas[fi];
|
| 385 |
+
const softBits = new Float64Array(codedLength);
|
| 386 |
+
const bitCounts = new Float64Array(codedLength);
|
| 387 |
+
|
| 388 |
+
for (let bi = 0; bi < nBlocks; bi++) {
|
| 389 |
+
const origR = ((relBR[bi] + ditherOffY) % blocksPerSide + blocksPerSide) % blocksPerSide;
|
| 390 |
+
const origC = ((relBC[bi] + ditherOffX) % blocksPerSide + blocksPerSide) % blocksPerSide;
|
| 391 |
+
const blockDitherBase = (origR * blocksPerSide + origC) * numZig;
|
| 392 |
+
const ed = deltas[bi];
|
| 393 |
+
// Remap bitIdx to match embedder's bit assignment at original position
|
| 394 |
+
let bitIdx = ((origR * blocksPerSide + origC) * numZig) % codedLength;
|
| 395 |
+
|
| 396 |
+
for (let z = 0; z < numZig; z++) {
|
| 397 |
+
if (bitIdx >= codedLength) bitIdx = 0;
|
| 398 |
+
const soft = dmqimExtractSoft(coeffs[bi * numZig + z], ed, scoreDithers[blockDitherBase + z]);
|
| 399 |
+
softBits[bitIdx] += soft;
|
| 400 |
+
bitCounts[bitIdx]++;
|
| 401 |
+
bitIdx++;
|
| 402 |
+
}
|
| 403 |
+
}
|
| 404 |
+
|
| 405 |
+
for (let i = 0; i < codedLength; i++) {
|
| 406 |
+
if (bitCounts[i] > 0) softBits[i] /= bitCounts[i];
|
| 407 |
+
avg[i] += softBits[i];
|
| 408 |
+
}
|
| 409 |
+
nSamples++;
|
| 410 |
+
}
|
| 411 |
+
|
| 412 |
+
let mag = 0;
|
| 413 |
+
for (let i = 0; i < codedLength; i++) {
|
| 414 |
+
avg[i] /= nSamples;
|
| 415 |
+
mag += avg[i] * avg[i];
|
| 416 |
+
}
|
| 417 |
+
|
| 418 |
+
candidates.push({ padTop, padLeft, shiftX, shiftY, ditherOffX, ditherOffY, signalMag: mag });
|
| 419 |
+
}
|
| 420 |
+
}
|
| 421 |
+
}
|
| 422 |
+
}
|
| 423 |
+
}
|
| 424 |
+
}
|
| 425 |
+
|
| 426 |
+
// Sort by signal magnitude and decode top candidates with all frames
|
| 427 |
+
candidates.sort((a, b) => b.signalMag - a.signalMag);
|
| 428 |
+
const MAX_DECODE = 50;
|
| 429 |
+
let bestResult: DetectionResult | null = null;
|
| 430 |
+
|
| 431 |
+
for (let i = 0; i < Math.min(MAX_DECODE, candidates.length); i++) {
|
| 432 |
+
const { padTop, padLeft, shiftX, shiftY, ditherOffX, ditherOffY } = candidates[i];
|
| 433 |
+
|
| 434 |
+
const dwts = (padTop === 0 && padLeft === 0)
|
| 435 |
+
? frameDWTs
|
| 436 |
+
: yPlanes.map((yp) => {
|
| 437 |
+
const { padded, paddedW, paddedH } = padYPlane(yp, width, height, padLeft, padTop);
|
| 438 |
+
return computeFrameDWT(padded, paddedW, paddedH, config);
|
| 439 |
+
});
|
| 440 |
+
|
| 441 |
+
const result = tryDetect(dwts, (hl) => {
|
| 442 |
+
const shifted = createShiftedSubband(hl, shiftX, shiftY);
|
| 443 |
+
const grid = computeTileGrid(shifted.width, shifted.height, subbandTilePeriod);
|
| 444 |
+
return { subband: shifted, grid, ditherOffX, ditherOffY, blocksPerSide };
|
| 445 |
+
});
|
| 446 |
+
|
| 447 |
+
if (result && (!bestResult || result.confidence > bestResult.confidence)) {
|
| 448 |
+
bestResult = result;
|
| 449 |
+
}
|
| 450 |
+
|
| 451 |
+
if (bestResult && bestResult.confidence >= 0.95) break;
|
| 452 |
+
}
|
| 453 |
+
|
| 454 |
+
return bestResult ?? noResult;
|
| 455 |
+
}
|
| 456 |
+
|
| 457 |
+
/**
|
| 458 |
+
* Pad a Y plane with edge-replicated border pixels to realign DWT pixel pairing.
|
| 459 |
+
*/
|
| 460 |
+
function padYPlane(
|
| 461 |
+
yPlane: Uint8Array,
|
| 462 |
+
width: number,
|
| 463 |
+
height: number,
|
| 464 |
+
padLeft: number,
|
| 465 |
+
padTop: number,
|
| 466 |
+
): { padded: Uint8Array; paddedW: number; paddedH: number } {
|
| 467 |
+
const paddedW = width + padLeft;
|
| 468 |
+
const paddedH = height + padTop;
|
| 469 |
+
const padded = new Uint8Array(paddedW * paddedH);
|
| 470 |
+
|
| 471 |
+
for (let y = 0; y < paddedH; y++) {
|
| 472 |
+
const srcY = Math.max(0, y - padTop);
|
| 473 |
+
for (let x = 0; x < paddedW; x++) {
|
| 474 |
+
const srcX = Math.max(0, x - padLeft);
|
| 475 |
+
padded[y * paddedW + x] = yPlane[srcY * width + srcX];
|
| 476 |
+
}
|
| 477 |
+
}
|
| 478 |
+
|
| 479 |
+
return { padded, paddedW, paddedH };
|
| 480 |
+
}
|
| 481 |
+
|
| 482 |
+
/**
|
| 483 |
+
* Create a shifted view of a subband (cheap array copy).
|
| 484 |
+
*/
|
| 485 |
+
function createShiftedSubband(hl: Buffer2D, shiftX: number, shiftY: number): Buffer2D {
|
| 486 |
+
const newW = hl.width - shiftX;
|
| 487 |
+
const newH = hl.height - shiftY;
|
| 488 |
+
const shifted = createBuffer2D(newW, newH);
|
| 489 |
+
for (let y = 0; y < newH; y++) {
|
| 490 |
+
const srcOff = (y + shiftY) * hl.width + shiftX;
|
| 491 |
+
const dstOff = y * newW;
|
| 492 |
+
for (let x = 0; x < newW; x++) {
|
| 493 |
+
shifted.data[dstOff + x] = hl.data[srcOff + x];
|
| 494 |
+
}
|
| 495 |
+
}
|
| 496 |
+
return shifted;
|
| 497 |
}
|
| 498 |
|
| 499 |
/**
|
|
|
|
| 561 |
|
| 562 |
/** Minimum confidence to report a detection (low threshold is fine β
|
| 563 |
* the statistical model already ensures noise scores near 0%) */
|
| 564 |
+
const MIN_CONFIDENCE = 0.75;
|
| 565 |
|
| 566 |
/**
|
| 567 |
* Try to decode soft bits into a payload
|
|
|
|
| 626 |
width: number,
|
| 627 |
height: number,
|
| 628 |
key: string,
|
| 629 |
+
options?: DetectOptions,
|
| 630 |
): AutoDetectResult {
|
| 631 |
+
return autoDetectMultiFrame([yPlane], width, height, key, options);
|
| 632 |
}
|
| 633 |
|
| 634 |
/**
|
|
|
|
| 639 |
width: number,
|
| 640 |
height: number,
|
| 641 |
key: string,
|
| 642 |
+
options?: DetectOptions,
|
| 643 |
): AutoDetectResult {
|
| 644 |
let best: AutoDetectResult = {
|
| 645 |
detected: false,
|
|
|
|
| 651 |
};
|
| 652 |
|
| 653 |
for (const [name, config] of Object.entries(PRESETS)) {
|
| 654 |
+
const result = detectWatermarkMultiFrame(yPlanes, width, height, key, config, options);
|
| 655 |
if (result.detected && result.confidence > best.confidence) {
|
| 656 |
best = { ...result, presetUsed: name as PresetName };
|
| 657 |
}
|