Spaces:
Running
Running
Upload 13 files
Browse files- README.md +75 -26
- core/detector.ts +339 -23
README.md
CHANGED
|
@@ -10,7 +10,9 @@ pinned: false
|
|
| 10 |
|
| 11 |
# LTMarX — Video Watermarking
|
| 12 |
|
| 13 |
-
Imperceptible 32-bit watermarking for video. Embeds a payload into the luminance channel using DWT/DCT transform-domain quantization (DM-QIM) with BCH error correction.
|
|
|
|
|
|
|
| 14 |
|
| 15 |
All processing runs in the browser — no server round-trips needed.
|
| 16 |
|
|
@@ -44,13 +46,13 @@ core/ Pure TypeScript watermark engine (isomorphic, zero platform deps
|
|
| 44 |
├── dwt.ts Haar DWT (forward/inverse, multi-level)
|
| 45 |
├── dct.ts 8×8 DCT with zigzag scan
|
| 46 |
├── dmqim.ts Dither-Modulated QIM (embed/extract with soft decisions)
|
| 47 |
-
├── bch.ts BCH
|
| 48 |
-
├── crc.ts CRC-4
|
| 49 |
-
├── tiling.ts Periodic tile layout
|
| 50 |
-
├── masking.ts Perceptual masking (variance-adaptive
|
| 51 |
├── keygen.ts Seeded PRNG for dithers and permutations
|
| 52 |
├── embedder.ts Y-plane → watermarked Y-plane
|
| 53 |
-
├── detector.ts Y-plane → payload + confidence
|
| 54 |
├── presets.ts Named configurations (light → fortress)
|
| 55 |
└── types.ts Shared types
|
| 56 |
|
|
@@ -58,15 +60,16 @@ web/ Frontend (Vite + React + Tailwind)
|
|
| 58 |
├── src/
|
| 59 |
│ ├── App.tsx
|
| 60 |
│ ├── components/
|
| 61 |
-
│ │ ├── EmbedPanel.tsx Upload, configure, embed
|
| 62 |
│ │ ├── DetectPanel.tsx Upload, detect, display results
|
| 63 |
-
│ │ ├── ApiDocs.tsx Inline API reference
|
| 64 |
│ │ ├── ComparisonView.tsx Side-by-side / difference viewer
|
| 65 |
-
│ │ ├── RobustnessTest.tsx Automated attack
|
|
|
|
| 66 |
│ │ ├── StrengthSlider.tsx Preset selector with snap points
|
| 67 |
-
│ │
|
|
|
|
| 68 |
│ ├── lib/
|
| 69 |
-
│ │ └── video-io.ts Frame extraction, encoding, attack
|
| 70 |
│ └── workers/
|
| 71 |
│ └── watermark.worker.ts
|
| 72 |
└── index.html
|
|
@@ -86,33 +89,70 @@ tests/ Vitest test suite
|
|
| 86 |
### Embedding
|
| 87 |
|
| 88 |
```
|
| 89 |
-
Y plane → 2-level Haar DWT → HL subband → tile grid →
|
| 90 |
-
per tile: 8×8 DCT blocks → select mid-freq coefficients →
|
| 91 |
-
DM-QIM embed coded bits
|
|
|
|
| 92 |
```
|
| 93 |
|
| 94 |
### Payload Encoding
|
| 95 |
|
| 96 |
```
|
| 97 |
-
32-bit payload → CRC append → BCH encode → keyed interleave →
|
|
|
|
| 98 |
```
|
| 99 |
|
| 100 |
### Detection
|
| 101 |
|
| 102 |
```
|
| 103 |
-
Y plane → DWT → HL subband → tile grid →
|
| 104 |
per tile: DCT → DM-QIM soft extract →
|
| 105 |
-
soft-combine across tiles and frames →
|
|
|
|
| 106 |
```
|
| 107 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
## Presets
|
| 109 |
|
| 110 |
-
| Preset | Delta | Tile Period |
|
| 111 |
-
|--------|-------|-------------|----------|---------|----------|
|
| 112 |
-
| **Light** | 50 | 256px | (
|
| 113 |
-
| **Moderate** |
|
| 114 |
-
| **Strong** | 110 | 208px | (
|
| 115 |
-
| **Fortress** | 150 | 192px | (
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
|
| 117 |
## API
|
| 118 |
|
|
@@ -124,7 +164,7 @@ import { getPreset } from './core/presets';
|
|
| 124 |
|
| 125 |
const config = getPreset('moderate');
|
| 126 |
const result = embedWatermark(yPlane, width, height, payload, key, config);
|
| 127 |
-
// result.yPlane: watermarked Y plane
|
| 128 |
// result.psnr: quality metric (dB)
|
| 129 |
```
|
| 130 |
|
|
@@ -140,6 +180,15 @@ const result = detectWatermarkMultiFrame(yPlanes, width, height, key, config);
|
|
| 140 |
// result.confidence: 0–1
|
| 141 |
```
|
| 142 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
### Auto-Detection (tries all presets)
|
| 144 |
|
| 145 |
```typescript
|
|
@@ -164,8 +213,8 @@ npm test # Run all tests
|
|
| 164 |
npm run test:watch # Watch mode
|
| 165 |
```
|
| 166 |
|
| 167 |
-
|
| 168 |
|
| 169 |
## Browser Encoding
|
| 170 |
|
| 171 |
-
The web UI encodes watermarked video using ffmpeg.wasm (x264 in WebAssembly). To avoid memory pressure, frames are encoded in chunks of 100 and concatenated at the end. Peak memory stays
|
|
|
|
| 10 |
|
| 11 |
# LTMarX — Video Watermarking
|
| 12 |
|
| 13 |
+
Imperceptible 32-bit watermarking for video. Embeds a payload into the luminance channel using DWT/DCT transform-domain quantization (DM-QIM) with BCH error correction.
|
| 14 |
+
|
| 15 |
+
Survives re-encoding, rescaling, brightness/contrast/saturation adjustments, and cropping up to ~20%.
|
| 16 |
|
| 17 |
All processing runs in the browser — no server round-trips needed.
|
| 18 |
|
|
|
|
| 46 |
├── dwt.ts Haar DWT (forward/inverse, multi-level)
|
| 47 |
├── dct.ts 8×8 DCT with zigzag scan
|
| 48 |
├── dmqim.ts Dither-Modulated QIM (embed/extract with soft decisions)
|
| 49 |
+
├── bch.ts BCH(63,36,5) over GF(2^6), Berlekamp-Massey decoding
|
| 50 |
+
├── crc.ts CRC-4 integrity check
|
| 51 |
+
├── tiling.ts Periodic tile layout + autocorrelation-based grid recovery
|
| 52 |
+
├── masking.ts Perceptual masking (variance-adaptive quantization step)
|
| 53 |
├── keygen.ts Seeded PRNG for dithers and permutations
|
| 54 |
├── embedder.ts Y-plane → watermarked Y-plane
|
| 55 |
+
├── detector.ts Y-plane(s) → payload + confidence
|
| 56 |
├── presets.ts Named configurations (light → fortress)
|
| 57 |
└── types.ts Shared types
|
| 58 |
|
|
|
|
| 60 |
├── src/
|
| 61 |
│ ├── App.tsx
|
| 62 |
│ ├── components/
|
| 63 |
+
│ │ ├── EmbedPanel.tsx Upload, configure, embed, download
|
| 64 |
│ │ ├── DetectPanel.tsx Upload, detect, display results
|
|
|
|
| 65 |
│ │ ├── ComparisonView.tsx Side-by-side / difference viewer
|
| 66 |
+
│ │ ├── RobustnessTest.tsx Automated attack battery (re-encode, crop, etc.)
|
| 67 |
+
│ │ ├── HowItWorks.tsx Interactive explainer with D3 visualizations
|
| 68 |
│ │ ├── StrengthSlider.tsx Preset selector with snap points
|
| 69 |
+
│ │ ├── ResultCard.tsx Detection result display
|
| 70 |
+
│ │ └── ApiDocs.tsx Inline API reference
|
| 71 |
│ ├── lib/
|
| 72 |
+
│ │ └── video-io.ts Frame extraction, encoding, attack simulations
|
| 73 |
│ └── workers/
|
| 74 |
│ └── watermark.worker.ts
|
| 75 |
└── index.html
|
|
|
|
| 89 |
### Embedding
|
| 90 |
|
| 91 |
```
|
| 92 |
+
Y plane → 2-level Haar DWT → HL subband → periodic tile grid →
|
| 93 |
+
per tile: 8×8 DCT blocks → select mid-freq zigzag coefficients →
|
| 94 |
+
DM-QIM embed coded bits (with per-block dithering and perceptual masking) →
|
| 95 |
+
inverse DCT → inverse DWT → modified Y plane
|
| 96 |
```
|
| 97 |
|
| 98 |
### Payload Encoding
|
| 99 |
|
| 100 |
```
|
| 101 |
+
32-bit payload → CRC-4 append → BCH(63,36,5) encode → keyed interleave →
|
| 102 |
+
map to DCT coefficients across tiles (with wraparound redundancy)
|
| 103 |
```
|
| 104 |
|
| 105 |
### Detection
|
| 106 |
|
| 107 |
```
|
| 108 |
+
Y plane(s) → DWT → HL subband → tile grid →
|
| 109 |
per tile: DCT → DM-QIM soft extract →
|
| 110 |
+
soft-combine across tiles and frames → keyed de-interleave →
|
| 111 |
+
BCH soft decode → CRC verify → payload
|
| 112 |
```
|
| 113 |
|
| 114 |
+
### Crop-Resilient Detection
|
| 115 |
+
|
| 116 |
+
When the frame has been cropped, the detector doesn't know the original tile grid alignment. It searches over three alignment parameters:
|
| 117 |
+
|
| 118 |
+
1. **DWT padding** (0–3 per axis) — the crop may break DWT pixel pairing
|
| 119 |
+
2. **DCT block shift** (0–7 per axis) — the crop may misalign 8×8 block boundaries within the subband
|
| 120 |
+
3. **Tile dither offset** (0–N per axis) — the crop shifts which tile-phase position each block maps to
|
| 121 |
+
|
| 122 |
+
The total search space is 16 × 64 × N² candidates (~37K for the strong preset). To make this fast:
|
| 123 |
+
|
| 124 |
+
- DCT coefficients are precomputed once per (pad, shift) combination using only tile 0
|
| 125 |
+
- Dither offsets are swept cheaply using just DM-QIM re-extraction on cached coefficients
|
| 126 |
+
- Candidates are ranked by signal magnitude (sum of squared averaged soft bits)
|
| 127 |
+
- Only the top 50 candidates are fully decoded with all frames
|
| 128 |
+
|
| 129 |
+
This runs in ~1 second for 32 frames on a 512×512 video.
|
| 130 |
+
|
| 131 |
## Presets
|
| 132 |
|
| 133 |
+
| Preset | Delta | Tile Period | Zigzag Positions | Masking | Use Case |
|
| 134 |
+
|--------|-------|-------------|------------------|---------|----------|
|
| 135 |
+
| **Light** | 50 | 256px | 3–14 (mid-freq) | No | Near-invisible, mild compression |
|
| 136 |
+
| **Moderate** | 62 | 240px | 3–14 (mid-freq) | Yes | Balanced with perceptual masking |
|
| 137 |
+
| **Strong** | 110 | 208px | 1–20 (low+mid) | Yes | Heavy re-encoding, rescaling, cropping |
|
| 138 |
+
| **Fortress** | 150 | 192px | 1–20 (low+mid) | Yes | Maximum robustness |
|
| 139 |
+
|
| 140 |
+
All presets use BCH(63,36,5) with CRC-4 and 2-level DWT.
|
| 141 |
+
|
| 142 |
+
Higher delta = stronger embedding = more visible artifacts but better survival under attacks. The "strong" and "fortress" presets use more DCT coefficients (zigzag positions 1–20 vs 3–14) for additional redundancy.
|
| 143 |
+
|
| 144 |
+
## Robustness
|
| 145 |
+
|
| 146 |
+
The web UI includes an automated robustness test battery. Each test applies an attack to the watermarked video and attempts detection:
|
| 147 |
+
|
| 148 |
+
| Attack | Variants Tested |
|
| 149 |
+
|--------|----------------|
|
| 150 |
+
| **Re-encode** | CRF 23, 28, 33, 38, 43 |
|
| 151 |
+
| **Downscale** | 25%, 50%, 75%, 90% |
|
| 152 |
+
| **Brightness** | -0.2, +0.2, +0.4 |
|
| 153 |
+
| **Contrast** | 0.5×, 1.5×, 2.0× |
|
| 154 |
+
| **Saturation** | 0×, 0.5×, 2.0× |
|
| 155 |
+
| **Crop** | 5%, 10%, 15%, 20% (per side) |
|
| 156 |
|
| 157 |
## API
|
| 158 |
|
|
|
|
| 164 |
|
| 165 |
const config = getPreset('moderate');
|
| 166 |
const result = embedWatermark(yPlane, width, height, payload, key, config);
|
| 167 |
+
// result.yPlane: watermarked Y plane (Uint8Array)
|
| 168 |
// result.psnr: quality metric (dB)
|
| 169 |
```
|
| 170 |
|
|
|
|
| 180 |
// result.confidence: 0–1
|
| 181 |
```
|
| 182 |
|
| 183 |
+
### Crop-Resilient Detection
|
| 184 |
+
|
| 185 |
+
```typescript
|
| 186 |
+
const result = detectWatermarkMultiFrame(
|
| 187 |
+
yPlanes, width, height, key, config,
|
| 188 |
+
{ cropResilient: true }
|
| 189 |
+
);
|
| 190 |
+
```
|
| 191 |
+
|
| 192 |
### Auto-Detection (tries all presets)
|
| 193 |
|
| 194 |
```typescript
|
|
|
|
| 213 |
npm run test:watch # Watch mode
|
| 214 |
```
|
| 215 |
|
| 216 |
+
25 tests across 6 files covering: DWT round-trip, DCT round-trip, DM-QIM embed/extract, BCH encode/decode with error correction, CRC append/verify, full embed-detect pipeline across presets, false positive rejection (wrong key, unwatermarked frame), crop-resilient detection (arbitrary offset and ~20% crop).
|
| 217 |
|
| 218 |
## Browser Encoding
|
| 219 |
|
| 220 |
+
The web UI encodes watermarked video using ffmpeg.wasm (x264 in WebAssembly). To avoid memory pressure, frames are encoded in chunks of 100 and concatenated at the end. Peak memory stays proportional to chunk size rather than scaling with video length.
|
core/detector.ts
CHANGED
|
@@ -5,13 +5,13 @@
|
|
| 5 |
*/
|
| 6 |
|
| 7 |
import type { WatermarkConfig, DetectionResult, Buffer2D } from './types.js';
|
| 8 |
-
import { yPlaneToBuffer, dwtForward, extractSubband } from './dwt.js';
|
| 9 |
import { dctForward8x8, extractBlock, ZIGZAG_ORDER } from './dct.js';
|
| 10 |
import { dmqimExtractSoft } from './dmqim.js';
|
| 11 |
import { crcVerify } from './crc.js';
|
| 12 |
import { BchCodec } from './bch.js';
|
| 13 |
import { generateDithers, generatePermutation } from './keygen.js';
|
| 14 |
-
import { computeTileGrid, getTileOrigin, getTileBlocks, type TileGrid } from './tiling.js';
|
| 15 |
import { blockAcEnergy, computeMaskingFactors } from './masking.js';
|
| 16 |
import { bitsToPayload } from './embedder.js';
|
| 17 |
import { PRESETS } from './presets.js';
|
|
@@ -57,7 +57,10 @@ function extractSoftBitsFromSubband(
|
|
| 57 |
hlSubband: Buffer2D,
|
| 58 |
tileGrid: TileGrid,
|
| 59 |
key: string,
|
| 60 |
-
config: WatermarkConfig
|
|
|
|
|
|
|
|
|
|
| 61 |
): { tileSoftBits: Float64Array[]; totalTiles: number } | null {
|
| 62 |
if (tileGrid.totalTiles === 0) return null;
|
| 63 |
|
|
@@ -75,13 +78,18 @@ function extractSoftBitsFromSubband(
|
|
| 75 |
zigCoeffIdx[z] = r * 8 + c;
|
| 76 |
}
|
| 77 |
|
|
|
|
|
|
|
|
|
|
| 78 |
for (let tileIdx = 0; tileIdx < tileGrid.totalTiles; tileIdx++) {
|
| 79 |
let ditherIdx = 0; // Reset per tile — matches embedder
|
| 80 |
const origin = getTileOrigin(tileGrid, tileIdx);
|
| 81 |
const blocks = getTileBlocks(origin.x, origin.y, tileGrid.tilePeriod, hlSubband.width, hlSubband.height);
|
|
|
|
|
|
|
| 82 |
|
| 83 |
const softBits = new Float64Array(codedLength);
|
| 84 |
-
const
|
| 85 |
|
| 86 |
let maskingFactors: Float64Array | null = null;
|
| 87 |
if (config.perceptualMasking && blocks.length > 0) {
|
|
@@ -103,27 +111,44 @@ function extractSoftBitsFromSubband(
|
|
| 103 |
const maskFactor = maskingFactors ? maskingFactors[bi] : 1.0;
|
| 104 |
const effectiveDelta = config.delta * maskFactor;
|
| 105 |
|
| 106 |
-
//
|
| 107 |
-
//
|
| 108 |
-
//
|
| 109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
for (let z = 0; z < zigCoeffIdx.length; z++) {
|
| 112 |
if (bitIdx >= codedLength) bitIdx = 0;
|
| 113 |
|
| 114 |
const coeffIdx = zigCoeffIdx[z];
|
| 115 |
-
const dither = dithers[ditherIdx++];
|
| 116 |
|
| 117 |
const soft = dmqimExtractSoft(blockBuf[coeffIdx], effectiveDelta, dither);
|
| 118 |
-
softBits[bitIdx] += soft
|
| 119 |
-
|
| 120 |
|
| 121 |
bitIdx++;
|
| 122 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
}
|
| 124 |
|
| 125 |
for (let i = 0; i < codedLength; i++) {
|
| 126 |
-
if (
|
| 127 |
}
|
| 128 |
|
| 129 |
tileSoftBits.push(softBits);
|
|
@@ -132,10 +157,22 @@ function extractSoftBitsFromSubband(
|
|
| 132 |
return { tileSoftBits, totalTiles: tileGrid.totalTiles };
|
| 133 |
}
|
| 134 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
/**
|
| 136 |
* Detect watermark from multiple Y planes.
|
| 137 |
* Extracts soft decisions from each frame independently, then combines
|
| 138 |
* across frames and tiles (never averages raw pixels).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
*/
|
| 140 |
export function detectWatermarkMultiFrame(
|
| 141 |
yPlanes: Uint8Array[],
|
|
@@ -143,6 +180,7 @@ export function detectWatermarkMultiFrame(
|
|
| 143 |
height: number,
|
| 144 |
key: string,
|
| 145 |
config: WatermarkConfig,
|
|
|
|
| 146 |
): DetectionResult {
|
| 147 |
const noResult: DetectionResult = {
|
| 148 |
detected: false,
|
|
@@ -158,15 +196,23 @@ export function detectWatermarkMultiFrame(
|
|
| 158 |
const bch = new BchCodec(config.bch);
|
| 159 |
const perm = generatePermutation(key, codedLength);
|
| 160 |
|
| 161 |
-
// Helper: try to detect with given frames and explicit tile grid
|
| 162 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
frames: FrameDWT[],
|
| 164 |
-
|
| 165 |
): DetectionResult | null => {
|
| 166 |
const softBits: Float64Array[] = [];
|
| 167 |
for (const { hlSubband, subbandTilePeriod } of frames) {
|
| 168 |
-
const
|
| 169 |
-
const frameResult = extractSoftBitsFromSubband(
|
| 170 |
if (frameResult) softBits.push(...frameResult.tileSoftBits);
|
| 171 |
}
|
| 172 |
if (softBits.length === 0) return null;
|
|
@@ -175,11 +221,279 @@ export function detectWatermarkMultiFrame(
|
|
| 175 |
|
| 176 |
// Fast path: zero-phase grid (uncropped frames)
|
| 177 |
const frameDWTs = yPlanes.map((yp) => computeFrameDWT(yp, width, height, config));
|
| 178 |
-
const fast =
|
| 179 |
-
|
|
|
|
|
|
|
| 180 |
if (fast) return fast;
|
| 181 |
|
| 182 |
-
return noResult;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
}
|
| 184 |
|
| 185 |
/**
|
|
@@ -247,7 +561,7 @@ function decodeFromSoftBits(
|
|
| 247 |
|
| 248 |
/** Minimum confidence to report a detection (low threshold is fine —
|
| 249 |
* the statistical model already ensures noise scores near 0%) */
|
| 250 |
-
const MIN_CONFIDENCE = 0.
|
| 251 |
|
| 252 |
/**
|
| 253 |
* Try to decode soft bits into a payload
|
|
@@ -312,8 +626,9 @@ export function autoDetect(
|
|
| 312 |
width: number,
|
| 313 |
height: number,
|
| 314 |
key: string,
|
|
|
|
| 315 |
): AutoDetectResult {
|
| 316 |
-
return autoDetectMultiFrame([yPlane], width, height, key);
|
| 317 |
}
|
| 318 |
|
| 319 |
/**
|
|
@@ -324,6 +639,7 @@ export function autoDetectMultiFrame(
|
|
| 324 |
width: number,
|
| 325 |
height: number,
|
| 326 |
key: string,
|
|
|
|
| 327 |
): AutoDetectResult {
|
| 328 |
let best: AutoDetectResult = {
|
| 329 |
detected: false,
|
|
@@ -335,7 +651,7 @@ export function autoDetectMultiFrame(
|
|
| 335 |
};
|
| 336 |
|
| 337 |
for (const [name, config] of Object.entries(PRESETS)) {
|
| 338 |
-
const result = detectWatermarkMultiFrame(yPlanes, width, height, key, config);
|
| 339 |
if (result.detected && result.confidence > best.confidence) {
|
| 340 |
best = { ...result, presetUsed: name as PresetName };
|
| 341 |
}
|
|
|
|
| 5 |
*/
|
| 6 |
|
| 7 |
import type { WatermarkConfig, DetectionResult, Buffer2D } from './types.js';
|
| 8 |
+
import { createBuffer2D, yPlaneToBuffer, dwtForward, extractSubband } from './dwt.js';
|
| 9 |
import { dctForward8x8, extractBlock, ZIGZAG_ORDER } from './dct.js';
|
| 10 |
import { dmqimExtractSoft } from './dmqim.js';
|
| 11 |
import { crcVerify } from './crc.js';
|
| 12 |
import { BchCodec } from './bch.js';
|
| 13 |
import { generateDithers, generatePermutation } from './keygen.js';
|
| 14 |
+
import { computeTileGrid, recoverTileGrid, getTileOrigin, getTileBlocks, type TileGrid } from './tiling.js';
|
| 15 |
import { blockAcEnergy, computeMaskingFactors } from './masking.js';
|
| 16 |
import { bitsToPayload } from './embedder.js';
|
| 17 |
import { PRESETS } from './presets.js';
|
|
|
|
| 57 |
hlSubband: Buffer2D,
|
| 58 |
tileGrid: TileGrid,
|
| 59 |
key: string,
|
| 60 |
+
config: WatermarkConfig,
|
| 61 |
+
ditherOffX: number = 0,
|
| 62 |
+
ditherOffY: number = 0,
|
| 63 |
+
blocksPerSide: number = 0,
|
| 64 |
): { tileSoftBits: Float64Array[]; totalTiles: number } | null {
|
| 65 |
if (tileGrid.totalTiles === 0) return null;
|
| 66 |
|
|
|
|
| 78 |
zigCoeffIdx[z] = r * 8 + c;
|
| 79 |
}
|
| 80 |
|
| 81 |
+
const hasDitherOffset = ditherOffX !== 0 || ditherOffY !== 0;
|
| 82 |
+
const numZig = config.zigzagPositions.length;
|
| 83 |
+
|
| 84 |
for (let tileIdx = 0; tileIdx < tileGrid.totalTiles; tileIdx++) {
|
| 85 |
let ditherIdx = 0; // Reset per tile — matches embedder
|
| 86 |
const origin = getTileOrigin(tileGrid, tileIdx);
|
| 87 |
const blocks = getTileBlocks(origin.x, origin.y, tileGrid.tilePeriod, hlSubband.width, hlSubband.height);
|
| 88 |
+
const tileOriginBlockRow = Math.floor(origin.y / 8);
|
| 89 |
+
const tileOriginBlockCol = Math.floor(origin.x / 8);
|
| 90 |
|
| 91 |
const softBits = new Float64Array(codedLength);
|
| 92 |
+
const bitCounts = new Float64Array(codedLength);
|
| 93 |
|
| 94 |
let maskingFactors: Float64Array | null = null;
|
| 95 |
if (config.perceptualMasking && blocks.length > 0) {
|
|
|
|
| 111 |
const maskFactor = maskingFactors ? maskingFactors[bi] : 1.0;
|
| 112 |
const effectiveDelta = config.delta * maskFactor;
|
| 113 |
|
| 114 |
+
// Compute dither index and bit index: when dither offset is active,
|
| 115 |
+
// remap block position to find the embedder's dither and bit assignment
|
| 116 |
+
// for this spatial location within the periodic tile structure.
|
| 117 |
+
let blockDitherBase: number;
|
| 118 |
+
if (hasDitherOffset && blocksPerSide > 0) {
|
| 119 |
+
const relBr = row - tileOriginBlockRow;
|
| 120 |
+
const relBc = col - tileOriginBlockCol;
|
| 121 |
+
const origR = (relBr + ditherOffY) % blocksPerSide;
|
| 122 |
+
const origC = (relBc + ditherOffX) % blocksPerSide;
|
| 123 |
+
blockDitherBase = (origR * blocksPerSide + origC) * numZig;
|
| 124 |
+
// Remap bitIdx to match the embedder's bit assignment at the original position
|
| 125 |
+
bitIdx = ((origR * blocksPerSide + origC) * numZig) % codedLength;
|
| 126 |
+
} else {
|
| 127 |
+
blockDitherBase = ditherIdx;
|
| 128 |
+
}
|
| 129 |
|
| 130 |
for (let z = 0; z < zigCoeffIdx.length; z++) {
|
| 131 |
if (bitIdx >= codedLength) bitIdx = 0;
|
| 132 |
|
| 133 |
const coeffIdx = zigCoeffIdx[z];
|
| 134 |
+
const dither = hasDitherOffset ? dithers[blockDitherBase + z] : dithers[ditherIdx++];
|
| 135 |
|
| 136 |
const soft = dmqimExtractSoft(blockBuf[coeffIdx], effectiveDelta, dither);
|
| 137 |
+
softBits[bitIdx] += soft;
|
| 138 |
+
bitCounts[bitIdx]++;
|
| 139 |
|
| 140 |
bitIdx++;
|
| 141 |
}
|
| 142 |
+
|
| 143 |
+
if (!hasDitherOffset) {
|
| 144 |
+
// ditherIdx already incremented in the loop above
|
| 145 |
+
} else {
|
| 146 |
+
ditherIdx += numZig; // keep in sync
|
| 147 |
+
}
|
| 148 |
}
|
| 149 |
|
| 150 |
for (let i = 0; i < codedLength; i++) {
|
| 151 |
+
if (bitCounts[i] > 0) softBits[i] /= bitCounts[i];
|
| 152 |
}
|
| 153 |
|
| 154 |
tileSoftBits.push(softBits);
|
|
|
|
| 157 |
return { tileSoftBits, totalTiles: tileGrid.totalTiles };
|
| 158 |
}
|
| 159 |
|
| 160 |
+
/** Options for crop-resilient detection */
|
| 161 |
+
export interface DetectOptions {
|
| 162 |
+
/** Enable grid-phase search for cropped content */
|
| 163 |
+
cropResilient?: boolean;
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
/**
|
| 167 |
* Detect watermark from multiple Y planes.
|
| 168 |
* Extracts soft decisions from each frame independently, then combines
|
| 169 |
* across frames and tiles (never averages raw pixels).
|
| 170 |
+
*
|
| 171 |
+
* When cropResilient is true, searches over:
|
| 172 |
+
* - 16 DWT-pad combinations (0..3 × 0..3 for dwtLevels=2)
|
| 173 |
+
* - N×N tile-phase offsets (block-aligned, N = tilePeriod/8)
|
| 174 |
+
* Signal magnitude from one frame ranks candidates cheaply, then the
|
| 175 |
+
* top candidates are decoded using all frames.
|
| 176 |
*/
|
| 177 |
export function detectWatermarkMultiFrame(
|
| 178 |
yPlanes: Uint8Array[],
|
|
|
|
| 180 |
height: number,
|
| 181 |
key: string,
|
| 182 |
config: WatermarkConfig,
|
| 183 |
+
options?: DetectOptions,
|
| 184 |
): DetectionResult {
|
| 185 |
const noResult: DetectionResult = {
|
| 186 |
detected: false,
|
|
|
|
| 196 |
const bch = new BchCodec(config.bch);
|
| 197 |
const perm = generatePermutation(key, codedLength);
|
| 198 |
|
| 199 |
+
// Helper: try to detect with given frames and explicit tile grid.
|
| 200 |
+
// makeSubbandAndGrid can optionally transform the subband (e.g. shift it).
|
| 201 |
+
interface SubbandAndGrid {
|
| 202 |
+
subband: Buffer2D;
|
| 203 |
+
grid: TileGrid;
|
| 204 |
+
ditherOffX?: number;
|
| 205 |
+
ditherOffY?: number;
|
| 206 |
+
blocksPerSide?: number;
|
| 207 |
+
}
|
| 208 |
+
const tryDetect = (
|
| 209 |
frames: FrameDWT[],
|
| 210 |
+
makeSubbandAndGrid: (hlSubband: Buffer2D, stp: number) => SubbandAndGrid,
|
| 211 |
): DetectionResult | null => {
|
| 212 |
const softBits: Float64Array[] = [];
|
| 213 |
for (const { hlSubband, subbandTilePeriod } of frames) {
|
| 214 |
+
const { subband, grid, ditherOffX, ditherOffY, blocksPerSide: bps } = makeSubbandAndGrid(hlSubband, subbandTilePeriod);
|
| 215 |
+
const frameResult = extractSoftBitsFromSubband(subband, grid, key, config, ditherOffX ?? 0, ditherOffY ?? 0, bps ?? 0);
|
| 216 |
if (frameResult) softBits.push(...frameResult.tileSoftBits);
|
| 217 |
}
|
| 218 |
if (softBits.length === 0) return null;
|
|
|
|
| 221 |
|
| 222 |
// Fast path: zero-phase grid (uncropped frames)
|
| 223 |
const frameDWTs = yPlanes.map((yp) => computeFrameDWT(yp, width, height, config));
|
| 224 |
+
const fast = tryDetect(frameDWTs, (hl, stp) => ({
|
| 225 |
+
subband: hl,
|
| 226 |
+
grid: computeTileGrid(hl.width, hl.height, stp),
|
| 227 |
+
}));
|
| 228 |
if (fast) return fast;
|
| 229 |
|
| 230 |
+
if (!options?.cropResilient) return noResult;
|
| 231 |
+
|
| 232 |
+
// ── Crop-resilient: joint search over DWT-pad × pixel-shift × dither-offset ──
|
| 233 |
+
//
|
| 234 |
+
// A crop of C pixels causes three alignment problems:
|
| 235 |
+
// 1. DWT pixel pairing: pad by C%4 → search 0..3 per axis (16 combos)
|
| 236 |
+
// 2. DCT block alignment: subband shift % 8 → search 0..7 per axis (64)
|
| 237 |
+
// 3. Tile dither offset: which block within the tile period does the
|
| 238 |
+
// detector's block 0 correspond to? Search 0..blocksPerTileSide-1
|
| 239 |
+
// per axis.
|
| 240 |
+
//
|
| 241 |
+
// All three must be correct simultaneously for signal to emerge, so we
|
| 242 |
+
// search them jointly. For each (pad, shift), we compute DCT blocks once
|
| 243 |
+
// per scoring frame, then sweep dither offsets cheaply (DMQIM re-indexing
|
| 244 |
+
// only, no DCT recomputation).
|
| 245 |
+
//
|
| 246 |
+
// Scoring uses min(4, nFrames) frames for reliable ranking.
|
| 247 |
+
// Top candidates are decoded with ALL frames.
|
| 248 |
+
|
| 249 |
+
const subbandTilePeriod = Math.floor(config.tilePeriod / (1 << config.dwtLevels));
|
| 250 |
+
const effectiveTP = Math.max(8, Math.floor(subbandTilePeriod / 8) * 8);
|
| 251 |
+
const blocksPerSide = effectiveTP / 8;
|
| 252 |
+
const dwtPads = 1 << config.dwtLevels; // 4 for dwtLevels=2
|
| 253 |
+
|
| 254 |
+
// Scoring: use frame 0 only for fast candidate ranking (36K candidates)
|
| 255 |
+
const nScoringFrames = 1;
|
| 256 |
+
|
| 257 |
+
interface Candidate {
|
| 258 |
+
padTop: number;
|
| 259 |
+
padLeft: number;
|
| 260 |
+
shiftX: number;
|
| 261 |
+
shiftY: number;
|
| 262 |
+
ditherOffX: number;
|
| 263 |
+
ditherOffY: number;
|
| 264 |
+
signalMag: number;
|
| 265 |
+
}
|
| 266 |
+
|
| 267 |
+
const candidates: Candidate[] = [];
|
| 268 |
+
|
| 269 |
+
// Precompute DWTs for scoring frames, cached by pad
|
| 270 |
+
const scoringDWTCache = new Map<string, FrameDWT[]>();
|
| 271 |
+
const getScoringDWTs = (padTop: number, padLeft: number): FrameDWT[] => {
|
| 272 |
+
const cacheKey = `${padTop},${padLeft}`;
|
| 273 |
+
let cached = scoringDWTCache.get(cacheKey);
|
| 274 |
+
if (!cached) {
|
| 275 |
+
cached = [];
|
| 276 |
+
for (let fi = 0; fi < nScoringFrames; fi++) {
|
| 277 |
+
if (padTop === 0 && padLeft === 0) {
|
| 278 |
+
cached.push(frameDWTs[fi]);
|
| 279 |
+
} else {
|
| 280 |
+
const { padded, paddedW, paddedH } = padYPlane(yPlanes[fi], width, height, padLeft, padTop);
|
| 281 |
+
cached.push(computeFrameDWT(padded, paddedW, paddedH, config));
|
| 282 |
+
}
|
| 283 |
+
}
|
| 284 |
+
scoringDWTCache.set(cacheKey, cached);
|
| 285 |
+
}
|
| 286 |
+
return cached;
|
| 287 |
+
};
|
| 288 |
+
|
| 289 |
+
// Precompute zigzag → coefficient index mapping for scoring
|
| 290 |
+
const numZig = config.zigzagPositions.length;
|
| 291 |
+
const zigCoeffIdx = new Int32Array(numZig);
|
| 292 |
+
for (let z = 0; z < numZig; z++) {
|
| 293 |
+
const [r, c] = ZIGZAG_ORDER[config.zigzagPositions[z]];
|
| 294 |
+
zigCoeffIdx[z] = r * 8 + c;
|
| 295 |
+
}
|
| 296 |
+
const scoreDithers = generateDithers(key, 1024, config.delta);
|
| 297 |
+
const blockBuf = new Float64Array(64);
|
| 298 |
+
|
| 299 |
+
// Phase 1: score all candidates with DCT caching.
|
| 300 |
+
// For each (pad, shift), compute DCT once per scoring frame, then sweep
|
| 301 |
+
// all dither offsets using only DMQIM re-indexing (no DCT recomputation).
|
| 302 |
+
for (let padTop = 0; padTop < dwtPads; padTop++) {
|
| 303 |
+
for (let padLeft = 0; padLeft < dwtPads; padLeft++) {
|
| 304 |
+
const scoreDWTs = getScoringDWTs(padTop, padLeft);
|
| 305 |
+
|
| 306 |
+
for (let shiftY = 0; shiftY < 8; shiftY++) {
|
| 307 |
+
for (let shiftX = 0; shiftX < 8; shiftX++) {
|
| 308 |
+
const hl0 = scoreDWTs[0].hlSubband;
|
| 309 |
+
const newW = hl0.width - shiftX;
|
| 310 |
+
const newH = hl0.height - shiftY;
|
| 311 |
+
if (newW < effectiveTP || newH < effectiveTP) continue;
|
| 312 |
+
|
| 313 |
+
const grid = computeTileGrid(newW, newH, subbandTilePeriod);
|
| 314 |
+
if (grid.totalTiles === 0) continue;
|
| 315 |
+
|
| 316 |
+
// Use tile 0 for scoring (fast; sufficient for ranking)
|
| 317 |
+
const tile0Origin = getTileOrigin(grid, 0);
|
| 318 |
+
const tile0Blocks = getTileBlocks(
|
| 319 |
+
tile0Origin.x, tile0Origin.y, grid.tilePeriod, newW, newH
|
| 320 |
+
);
|
| 321 |
+
const nBlocks = tile0Blocks.length;
|
| 322 |
+
if (nBlocks === 0) continue;
|
| 323 |
+
|
| 324 |
+
const tile0OriginBR = Math.floor(tile0Origin.y / 8);
|
| 325 |
+
const tile0OriginBC = Math.floor(tile0Origin.x / 8);
|
| 326 |
+
const relBR = new Int32Array(nBlocks);
|
| 327 |
+
const relBC = new Int32Array(nBlocks);
|
| 328 |
+
for (let bi = 0; bi < nBlocks; bi++) {
|
| 329 |
+
relBR[bi] = tile0Blocks[bi].row - tile0OriginBR;
|
| 330 |
+
relBC[bi] = tile0Blocks[bi].col - tile0OriginBC;
|
| 331 |
+
}
|
| 332 |
+
|
| 333 |
+
// Precompute DCT coefficients + effective deltas per scoring frame
|
| 334 |
+
const frameCoeffs: Float64Array[] = [];
|
| 335 |
+
const frameDeltas: Float64Array[] = [];
|
| 336 |
+
|
| 337 |
+
for (let fi = 0; fi < scoreDWTs.length; fi++) {
|
| 338 |
+
const shifted = createShiftedSubband(scoreDWTs[fi].hlSubband, shiftX, shiftY);
|
| 339 |
+
const coeffs = new Float64Array(nBlocks * numZig);
|
| 340 |
+
const deltas = new Float64Array(nBlocks);
|
| 341 |
+
|
| 342 |
+
if (config.perceptualMasking) {
|
| 343 |
+
const energies = new Float64Array(nBlocks);
|
| 344 |
+
for (let bi = 0; bi < nBlocks; bi++) {
|
| 345 |
+
extractBlock(shifted.data, newW, tile0Blocks[bi].row, tile0Blocks[bi].col, blockBuf);
|
| 346 |
+
dctForward8x8(blockBuf);
|
| 347 |
+
energies[bi] = blockAcEnergy(blockBuf);
|
| 348 |
+
for (let z = 0; z < numZig; z++) {
|
| 349 |
+
coeffs[bi * numZig + z] = blockBuf[zigCoeffIdx[z]];
|
| 350 |
+
}
|
| 351 |
+
}
|
| 352 |
+
const factors = computeMaskingFactors(energies);
|
| 353 |
+
for (let bi = 0; bi < nBlocks; bi++) {
|
| 354 |
+
deltas[bi] = config.delta * factors[bi];
|
| 355 |
+
}
|
| 356 |
+
} else {
|
| 357 |
+
for (let bi = 0; bi < nBlocks; bi++) {
|
| 358 |
+
extractBlock(shifted.data, newW, tile0Blocks[bi].row, tile0Blocks[bi].col, blockBuf);
|
| 359 |
+
dctForward8x8(blockBuf);
|
| 360 |
+
for (let z = 0; z < numZig; z++) {
|
| 361 |
+
coeffs[bi * numZig + z] = blockBuf[zigCoeffIdx[z]];
|
| 362 |
+
}
|
| 363 |
+
deltas[bi] = config.delta;
|
| 364 |
+
}
|
| 365 |
+
}
|
| 366 |
+
|
| 367 |
+
frameCoeffs.push(coeffs);
|
| 368 |
+
frameDeltas.push(deltas);
|
| 369 |
+
}
|
| 370 |
+
|
| 371 |
+
// Sweep all dither offsets using cached coefficients (DMQIM only)
|
| 372 |
+
for (let ditherOffY = 0; ditherOffY < blocksPerSide; ditherOffY++) {
|
| 373 |
+
for (let ditherOffX = 0; ditherOffX < blocksPerSide; ditherOffX++) {
|
| 374 |
+
if (padTop === 0 && padLeft === 0 && shiftX === 0 && shiftY === 0
|
| 375 |
+
&& ditherOffX === 0 && ditherOffY === 0) {
|
| 376 |
+
continue; // Already tried in fast path
|
| 377 |
+
}
|
| 378 |
+
|
| 379 |
+
const avg = new Float64Array(codedLength);
|
| 380 |
+
let nSamples = 0;
|
| 381 |
+
|
| 382 |
+
for (let fi = 0; fi < frameCoeffs.length; fi++) {
|
| 383 |
+
const coeffs = frameCoeffs[fi];
|
| 384 |
+
const deltas = frameDeltas[fi];
|
| 385 |
+
const softBits = new Float64Array(codedLength);
|
| 386 |
+
const bitCounts = new Float64Array(codedLength);
|
| 387 |
+
|
| 388 |
+
for (let bi = 0; bi < nBlocks; bi++) {
|
| 389 |
+
const origR = ((relBR[bi] + ditherOffY) % blocksPerSide + blocksPerSide) % blocksPerSide;
|
| 390 |
+
const origC = ((relBC[bi] + ditherOffX) % blocksPerSide + blocksPerSide) % blocksPerSide;
|
| 391 |
+
const blockDitherBase = (origR * blocksPerSide + origC) * numZig;
|
| 392 |
+
const ed = deltas[bi];
|
| 393 |
+
// Remap bitIdx to match embedder's bit assignment at original position
|
| 394 |
+
let bitIdx = ((origR * blocksPerSide + origC) * numZig) % codedLength;
|
| 395 |
+
|
| 396 |
+
for (let z = 0; z < numZig; z++) {
|
| 397 |
+
if (bitIdx >= codedLength) bitIdx = 0;
|
| 398 |
+
const soft = dmqimExtractSoft(coeffs[bi * numZig + z], ed, scoreDithers[blockDitherBase + z]);
|
| 399 |
+
softBits[bitIdx] += soft;
|
| 400 |
+
bitCounts[bitIdx]++;
|
| 401 |
+
bitIdx++;
|
| 402 |
+
}
|
| 403 |
+
}
|
| 404 |
+
|
| 405 |
+
for (let i = 0; i < codedLength; i++) {
|
| 406 |
+
if (bitCounts[i] > 0) softBits[i] /= bitCounts[i];
|
| 407 |
+
avg[i] += softBits[i];
|
| 408 |
+
}
|
| 409 |
+
nSamples++;
|
| 410 |
+
}
|
| 411 |
+
|
| 412 |
+
let mag = 0;
|
| 413 |
+
for (let i = 0; i < codedLength; i++) {
|
| 414 |
+
avg[i] /= nSamples;
|
| 415 |
+
mag += avg[i] * avg[i];
|
| 416 |
+
}
|
| 417 |
+
|
| 418 |
+
candidates.push({ padTop, padLeft, shiftX, shiftY, ditherOffX, ditherOffY, signalMag: mag });
|
| 419 |
+
}
|
| 420 |
+
}
|
| 421 |
+
}
|
| 422 |
+
}
|
| 423 |
+
}
|
| 424 |
+
}
|
| 425 |
+
|
| 426 |
+
// Sort by signal magnitude and decode top candidates with all frames
|
| 427 |
+
candidates.sort((a, b) => b.signalMag - a.signalMag);
|
| 428 |
+
const MAX_DECODE = 50;
|
| 429 |
+
let bestResult: DetectionResult | null = null;
|
| 430 |
+
|
| 431 |
+
for (let i = 0; i < Math.min(MAX_DECODE, candidates.length); i++) {
|
| 432 |
+
const { padTop, padLeft, shiftX, shiftY, ditherOffX, ditherOffY } = candidates[i];
|
| 433 |
+
|
| 434 |
+
const dwts = (padTop === 0 && padLeft === 0)
|
| 435 |
+
? frameDWTs
|
| 436 |
+
: yPlanes.map((yp) => {
|
| 437 |
+
const { padded, paddedW, paddedH } = padYPlane(yp, width, height, padLeft, padTop);
|
| 438 |
+
return computeFrameDWT(padded, paddedW, paddedH, config);
|
| 439 |
+
});
|
| 440 |
+
|
| 441 |
+
const result = tryDetect(dwts, (hl) => {
|
| 442 |
+
const shifted = createShiftedSubband(hl, shiftX, shiftY);
|
| 443 |
+
const grid = computeTileGrid(shifted.width, shifted.height, subbandTilePeriod);
|
| 444 |
+
return { subband: shifted, grid, ditherOffX, ditherOffY, blocksPerSide };
|
| 445 |
+
});
|
| 446 |
+
|
| 447 |
+
if (result && (!bestResult || result.confidence > bestResult.confidence)) {
|
| 448 |
+
bestResult = result;
|
| 449 |
+
}
|
| 450 |
+
|
| 451 |
+
if (bestResult && bestResult.confidence >= 0.95) break;
|
| 452 |
+
}
|
| 453 |
+
|
| 454 |
+
return bestResult ?? noResult;
|
| 455 |
+
}
|
| 456 |
+
|
| 457 |
+
/**
|
| 458 |
+
* Pad a Y plane with edge-replicated border pixels to realign DWT pixel pairing.
|
| 459 |
+
*/
|
| 460 |
+
function padYPlane(
|
| 461 |
+
yPlane: Uint8Array,
|
| 462 |
+
width: number,
|
| 463 |
+
height: number,
|
| 464 |
+
padLeft: number,
|
| 465 |
+
padTop: number,
|
| 466 |
+
): { padded: Uint8Array; paddedW: number; paddedH: number } {
|
| 467 |
+
const paddedW = width + padLeft;
|
| 468 |
+
const paddedH = height + padTop;
|
| 469 |
+
const padded = new Uint8Array(paddedW * paddedH);
|
| 470 |
+
|
| 471 |
+
for (let y = 0; y < paddedH; y++) {
|
| 472 |
+
const srcY = Math.max(0, y - padTop);
|
| 473 |
+
for (let x = 0; x < paddedW; x++) {
|
| 474 |
+
const srcX = Math.max(0, x - padLeft);
|
| 475 |
+
padded[y * paddedW + x] = yPlane[srcY * width + srcX];
|
| 476 |
+
}
|
| 477 |
+
}
|
| 478 |
+
|
| 479 |
+
return { padded, paddedW, paddedH };
|
| 480 |
+
}
|
| 481 |
+
|
| 482 |
+
/**
|
| 483 |
+
* Create a shifted view of a subband (cheap array copy).
|
| 484 |
+
*/
|
| 485 |
+
function createShiftedSubband(hl: Buffer2D, shiftX: number, shiftY: number): Buffer2D {
|
| 486 |
+
const newW = hl.width - shiftX;
|
| 487 |
+
const newH = hl.height - shiftY;
|
| 488 |
+
const shifted = createBuffer2D(newW, newH);
|
| 489 |
+
for (let y = 0; y < newH; y++) {
|
| 490 |
+
const srcOff = (y + shiftY) * hl.width + shiftX;
|
| 491 |
+
const dstOff = y * newW;
|
| 492 |
+
for (let x = 0; x < newW; x++) {
|
| 493 |
+
shifted.data[dstOff + x] = hl.data[srcOff + x];
|
| 494 |
+
}
|
| 495 |
+
}
|
| 496 |
+
return shifted;
|
| 497 |
}
|
| 498 |
|
| 499 |
/**
|
|
|
|
| 561 |
|
| 562 |
/** Minimum confidence to report a detection (low threshold is fine —
|
| 563 |
* the statistical model already ensures noise scores near 0%) */
|
| 564 |
+
const MIN_CONFIDENCE = 0.75;
|
| 565 |
|
| 566 |
/**
|
| 567 |
* Try to decode soft bits into a payload
|
|
|
|
| 626 |
width: number,
|
| 627 |
height: number,
|
| 628 |
key: string,
|
| 629 |
+
options?: DetectOptions,
|
| 630 |
): AutoDetectResult {
|
| 631 |
+
return autoDetectMultiFrame([yPlane], width, height, key, options);
|
| 632 |
}
|
| 633 |
|
| 634 |
/**
|
|
|
|
| 639 |
width: number,
|
| 640 |
height: number,
|
| 641 |
key: string,
|
| 642 |
+
options?: DetectOptions,
|
| 643 |
): AutoDetectResult {
|
| 644 |
let best: AutoDetectResult = {
|
| 645 |
detected: false,
|
|
|
|
| 651 |
};
|
| 652 |
|
| 653 |
for (const [name, config] of Object.entries(PRESETS)) {
|
| 654 |
+
const result = detectWatermarkMultiFrame(yPlanes, width, height, key, config, options);
|
| 655 |
if (result.detected && result.confidence > best.confidence) {
|
| 656 |
best = { ...result, presetUsed: name as PresetName };
|
| 657 |
}
|