ltmarx / web /src /lib /video-io.ts
harelcain's picture
Upload video-io.ts
7ccf135 verified
/**
* Browser-based video frame extraction and re-encoding
* Uses ffmpeg.wasm for H.264 MP4 output
*/
import { FFmpeg } from '@ffmpeg/ffmpeg';
import { toBlobURL } from '@ffmpeg/util';
import type { WatermarkConfig } from '@core/types.js';
import { embedWatermark } from '@core/embedder.js';
let ffmpegInstance: FFmpeg | null = null;
let ffmpegLoaded = false;
/** Reset the FFmpeg singleton so the next getFFmpeg() call creates a fresh instance */
function resetFFmpeg() {
ffmpegInstance = null;
ffmpegLoaded = false;
}
/** Get or initialize the shared FFmpeg instance */
async function getFFmpeg(onLog?: (msg: string) => void): Promise<FFmpeg> {
if (ffmpegInstance && ffmpegLoaded) return ffmpegInstance;
ffmpegInstance = new FFmpeg();
if (onLog) {
ffmpegInstance.on('log', ({ message }) => onLog(message));
}
// Load ffmpeg.wasm from CDN
const baseURL = 'https://unpkg.com/@ffmpeg/core@0.12.6/dist/esm';
await ffmpegInstance.load({
coreURL: await toBlobURL(`${baseURL}/ffmpeg-core.js`, 'text/javascript'),
wasmURL: await toBlobURL(`${baseURL}/ffmpeg-core.wasm`, 'application/wasm'),
});
ffmpegLoaded = true;
return ffmpegInstance;
}
/** Extract Y plane from an RGBA ImageData */
export function rgbaToY(imageData: ImageData): Uint8Array {
const { data, width, height } = imageData;
const y = new Uint8Array(width * height);
for (let i = 0; i < width * height; i++) {
const r = data[i * 4];
const g = data[i * 4 + 1];
const b = data[i * 4 + 2];
y[i] = Math.round(0.299 * r + 0.587 * g + 0.114 * b);
}
return y;
}
/** Apply Y plane delta to RGBA ImageData (modifies in place) */
export function applyYDelta(imageData: ImageData, originalY: Uint8Array, watermarkedY: Uint8Array): void {
const { data } = imageData;
for (let i = 0; i < originalY.length; i++) {
const delta = watermarkedY[i] - originalY[i];
data[i * 4] = Math.max(0, Math.min(255, data[i * 4] + delta));
data[i * 4 + 1] = Math.max(0, Math.min(255, data[i * 4 + 1] + delta));
data[i * 4 + 2] = Math.max(0, Math.min(255, data[i * 4 + 2] + delta));
}
}
/** Result of the streaming extract+embed pipeline */
export interface StreamEmbedResult {
blob: Blob;
width: number;
height: number;
fps: number;
totalFrames: number;
avgPsnr: number;
sampleOriginal: ImageData[];
sampleWatermarked: ImageData[];
embedTimeMs: number;
pixelsPerSecond: number;
}
/** Number of frames to accumulate before flushing to H.264 encoder */
const CHUNK_SIZE = 100;
/**
* Streaming extract → watermark → encode pipeline.
* Processes frames in chunks of CHUNK_SIZE, encoding each chunk to a
* temporary MP4 segment and freeing the raw buffer. This keeps peak
* memory at ~(CHUNK_SIZE * frameSize) instead of (totalFrames * frameSize).
* At the end, all segments are concatenated into the final MP4.
*/
export async function streamExtractAndEmbed(
videoUrl: string,
payload: Uint8Array,
key: string,
config: WatermarkConfig,
comparisonSample: number,
onProgress?: (phase: string, current: number, total: number) => void
): Promise<StreamEmbedResult> {
try {
return await _streamExtractAndEmbed(videoUrl, payload, key, config, comparisonSample, onProgress);
} catch (e) {
resetFFmpeg();
throw e;
}
}
async function _streamExtractAndEmbed(
videoUrl: string,
payload: Uint8Array,
key: string,
config: WatermarkConfig,
comparisonSample: number,
onProgress?: (phase: string, current: number, total: number) => void
): Promise<StreamEmbedResult> {
const t0 = performance.now();
const video = document.createElement('video');
video.src = videoUrl;
video.muted = true;
video.preload = 'auto';
await new Promise<void>((resolve, reject) => {
video.onloadedmetadata = () => resolve();
video.onerror = () => reject(new Error('Failed to load video'));
});
const { videoWidth: rawW, videoHeight: rawH, duration } = video;
// x264 with yuv420p requires even dimensions
const width = rawW % 2 === 0 ? rawW : rawW - 1;
const height = rawH % 2 === 0 ? rawH : rawH - 1;
const totalFrames = Math.ceil(duration * 30);
const interval = duration / totalFrames;
const fps = totalFrames / duration;
const canvas = document.createElement('canvas');
canvas.width = width;
canvas.height = height;
const ctx = canvas.getContext('2d')!;
const frameSize = width * height * 4;
// Determine which frame indices to sample for comparison
const sampleIndices = new Set<number>();
const sampleStep = Math.max(1, Math.floor(totalFrames / comparisonSample));
for (let i = 0; i < totalFrames && sampleIndices.size < comparisonSample; i += sampleStep) {
sampleIndices.add(i);
}
const sampleOriginal: ImageData[] = [];
const sampleWatermarked: ImageData[] = [];
let totalPsnr = 0;
const ffmpeg = await getFFmpeg();
// --- Extract audio from original video (if any) ---
let hasAudio = false;
{
const resp = await fetch(videoUrl);
const origBytes = new Uint8Array(await resp.arrayBuffer());
await ffmpeg.writeFile('original_input.mp4', origBytes);
try {
await ffmpeg.exec([
'-i', 'original_input.mp4',
'-vn', '-acodec', 'copy',
'-y', 'audio_track.m4a',
]);
// Check if audio file was actually produced (non-empty)
const audioData = await ffmpeg.readFile('audio_track.m4a');
hasAudio = typeof audioData !== 'string' && (audioData as Uint8Array).length > 0;
} catch {
hasAudio = false;
}
await ffmpeg.deleteFile('original_input.mp4');
}
const segments: string[] = [];
// Chunk buffer — reused across chunks
let chunkBuffer = new Uint8Array(Math.min(CHUNK_SIZE, totalFrames) * frameSize);
let chunkOffset = 0;
let framesInChunk = 0;
const flushChunk = async () => {
if (framesInChunk === 0) return;
const segName = `seg_${segments.length}.mp4`;
const usedBytes = chunkBuffer.slice(0, framesInChunk * frameSize);
await ffmpeg.writeFile('chunk.raw', usedBytes);
await ffmpeg.exec([
'-f', 'rawvideo',
'-pix_fmt', 'rgba',
'-s', `${width}x${height}`,
'-r', String(fps),
'-i', 'chunk.raw',
'-c:v', 'libx264',
'-pix_fmt', 'yuv420p',
'-crf', '20',
'-preset', 'ultrafast',
'-an', '-y',
segName,
]);
await ffmpeg.deleteFile('chunk.raw');
segments.push(segName);
chunkOffset = 0;
framesInChunk = 0;
};
for (let i = 0; i < totalFrames; i++) {
onProgress?.('Embedding', i + 1, totalFrames);
// Seek and extract frame
video.currentTime = i * interval;
await new Promise<void>((resolve) => {
video.onseeked = () => resolve();
});
ctx.drawImage(video, 0, 0, width, height);
const frameData = ctx.getImageData(0, 0, width, height);
// Watermark
const y = rgbaToY(frameData);
const result = embedWatermark(y, width, height, payload, key, config);
totalPsnr += result.psnr;
// Apply watermark delta to RGBA
applyYDelta(frameData, y, result.yPlane);
// Append to chunk buffer
chunkBuffer.set(frameData.data, chunkOffset);
chunkOffset += frameSize;
framesInChunk++;
// Keep sample frames for comparison
if (sampleIndices.has(i)) {
ctx.drawImage(video, 0, 0, width, height);
sampleOriginal.push(ctx.getImageData(0, 0, width, height));
sampleWatermarked.push(frameData);
}
// Flush chunk when full
if (framesInChunk >= CHUNK_SIZE) {
onProgress?.('Encoding chunk', segments.length + 1, Math.ceil(totalFrames / CHUNK_SIZE));
await flushChunk();
}
}
// Flush remaining frames
if (framesInChunk > 0) {
onProgress?.('Encoding chunk', segments.length + 1, Math.ceil(totalFrames / CHUNK_SIZE));
await flushChunk();
}
// Free chunk buffer
chunkBuffer = null!;
// Concatenate all segments into video-only file
const videoOnlyFile = 'video_only.mp4';
if (segments.length === 1) {
// Single segment — rename it
const data = await ffmpeg.readFile(segments[0]);
await ffmpeg.deleteFile(segments[0]);
await ffmpeg.writeFile(videoOnlyFile, data);
} else {
onProgress?.('Joining segments', 0, 0);
const concatList = segments.map((s) => `file '${s}'`).join('\n');
await ffmpeg.writeFile('concat.txt', concatList);
await ffmpeg.exec([
'-f', 'concat',
'-safe', '0',
'-i', 'concat.txt',
'-c', 'copy',
'-movflags', '+faststart',
'-y',
videoOnlyFile,
]);
await ffmpeg.deleteFile('concat.txt');
for (const seg of segments) {
await ffmpeg.deleteFile(seg);
}
}
// Mux audio back in if it existed in the original
let blob: Blob;
if (hasAudio) {
onProgress?.('Muxing audio', 0, 0);
await ffmpeg.exec([
'-i', videoOnlyFile,
'-i', 'audio_track.m4a',
'-c:v', 'copy',
'-c:a', 'copy',
'-shortest',
'-movflags', '+faststart',
'-y',
'final_with_audio.mp4',
]);
await ffmpeg.deleteFile(videoOnlyFile);
await ffmpeg.deleteFile('audio_track.m4a');
const data = await ffmpeg.readFile('final_with_audio.mp4');
await ffmpeg.deleteFile('final_with_audio.mp4');
if (typeof data === 'string') throw new Error('Unexpected string output from ffmpeg');
blob = new Blob([(data as unknown as { buffer: ArrayBuffer }).buffer], { type: 'video/mp4' });
} else {
const data = await ffmpeg.readFile(videoOnlyFile);
await ffmpeg.deleteFile(videoOnlyFile);
if (typeof data === 'string') throw new Error('Unexpected string output from ffmpeg');
blob = new Blob([(data as unknown as { buffer: ArrayBuffer }).buffer], { type: 'video/mp4' });
// Clean up audio file if extraction was attempted
try { await ffmpeg.deleteFile('audio_track.m4a'); } catch { /* no-op */ }
}
const embedTimeMs = performance.now() - t0;
const pixelsPerSecond = (width * height * totalFrames) / (embedTimeMs / 1000);
return {
blob,
width,
height,
fps,
totalFrames,
avgPsnr: totalPsnr / totalFrames,
sampleOriginal,
sampleWatermarked,
embedTimeMs,
pixelsPerSecond,
};
}
/**
* Extract frames from a video (used by detect panel).
* Spreads frames evenly across the full duration.
*/
export async function extractFrames(
videoUrl: string,
maxFrames: number = 30,
onProgress?: (frame: number, total: number) => void
): Promise<{ frames: ImageData[]; width: number; height: number; fps: number; duration: number }> {
const video = document.createElement('video');
video.src = videoUrl;
video.muted = true;
video.preload = 'auto';
await new Promise<void>((resolve, reject) => {
video.onloadedmetadata = () => resolve();
video.onerror = () => reject(new Error('Failed to load video'));
});
const { videoWidth: rawW, videoHeight: rawH, duration } = video;
const width = rawW % 2 === 0 ? rawW : rawW - 1;
const height = rawH % 2 === 0 ? rawH : rawH - 1;
const nativeFrameCount = Math.ceil(duration * 30);
const totalFrames = Math.min(maxFrames, nativeFrameCount);
const interval = duration / totalFrames;
const canvas = document.createElement('canvas');
canvas.width = width;
canvas.height = height;
const ctx = canvas.getContext('2d')!;
const frames: ImageData[] = [];
for (let i = 0; i < totalFrames; i++) {
video.currentTime = i * interval;
await new Promise<void>((resolve) => {
video.onseeked = () => resolve();
});
ctx.drawImage(video, 0, 0, width, height);
frames.push(ctx.getImageData(0, 0, width, height));
onProgress?.(i + 1, totalFrames);
}
const fps = totalFrames / duration;
return { frames, width, height, fps, duration };
}
// ---------------------------------------------------------------------------
// Attack utilities — apply degradation to watermarked MP4, return Y planes
// ---------------------------------------------------------------------------
/** Result of decoding an attacked video back to Y planes */
export interface AttackYPlanes {
yPlanes: Uint8Array[];
width: number;
height: number;
}
/**
* Write blob to ffmpeg FS, run attack filter, decode output to Y planes.
* Only processes `maxFrames` frames to keep WASM encoding fast.
*/
async function runAttackPipelineOnce(
blob: Blob,
filterArgs: string[],
outputWidth: number,
outputHeight: number,
maxFrames: number = 30,
): Promise<AttackYPlanes> {
const ffmpeg = await getFFmpeg();
const inputData = new Uint8Array(await blob.arrayBuffer());
await ffmpeg.writeFile('attack_input.mp4', inputData);
await ffmpeg.exec([
'-i', 'attack_input.mp4',
...filterArgs,
'-frames:v', String(maxFrames),
'-c:v', 'libx264',
'-pix_fmt', 'yuv420p',
'-preset', 'ultrafast',
'-an',
'-y',
'attack_output.mp4',
]);
// Decode attacked output to raw grayscale
await ffmpeg.exec([
'-i', 'attack_output.mp4',
'-f', 'rawvideo',
'-pix_fmt', 'gray',
'-y',
'attack_raw.raw',
]);
const rawData = await ffmpeg.readFile('attack_raw.raw');
await ffmpeg.deleteFile('attack_input.mp4');
await ffmpeg.deleteFile('attack_output.mp4');
await ffmpeg.deleteFile('attack_raw.raw');
if (typeof rawData === 'string') throw new Error('Unexpected string output from ffmpeg');
const raw = new Uint8Array((rawData as unknown as { buffer: ArrayBuffer }).buffer);
const frameSize = outputWidth * outputHeight;
const frameCount = Math.floor(raw.length / frameSize);
const yPlanes: Uint8Array[] = [];
for (let i = 0; i < frameCount; i++) {
yPlanes.push(raw.slice(i * frameSize, (i + 1) * frameSize));
}
return { yPlanes, width: outputWidth, height: outputHeight };
}
/** Run attack pipeline with one retry on WASM crash */
async function runAttackPipeline(
blob: Blob,
filterArgs: string[],
outputWidth: number,
outputHeight: number,
maxFrames: number = 30,
): Promise<AttackYPlanes> {
try {
return await runAttackPipelineOnce(blob, filterArgs, outputWidth, outputHeight, maxFrames);
} catch (e) {
console.warn('ffmpeg.wasm crashed, reinitializing and retrying:', e);
resetFFmpeg();
return await runAttackPipelineOnce(blob, filterArgs, outputWidth, outputHeight, maxFrames);
}
}
/** Attack: re-encode at a given CRF quality level */
export async function attackReencode(blob: Blob, crf: number, width: number, height: number): Promise<AttackYPlanes> {
return runAttackPipeline(blob, ['-crf', String(crf)], width, height);
}
/** Attack: downscale to scalePct%, then scale back up to original size */
export async function attackDownscale(
blob: Blob,
scalePct: number,
width: number,
height: number,
): Promise<AttackYPlanes> {
const scaledW = Math.round(width * scalePct / 100);
const scaledH = Math.round(height * scalePct / 100);
// Ensure even dimensions for libx264
const sW = scaledW % 2 === 0 ? scaledW : scaledW + 1;
const sH = scaledH % 2 === 0 ? scaledH : scaledH + 1;
return runAttackPipeline(
blob,
['-vf', `scale=${sW}:${sH},scale=${width}:${height}`, '-crf', '18'],
width,
height,
);
}
/** Attack: adjust brightness by delta (-1.0 to 1.0) */
export async function attackBrightness(blob: Blob, delta: number, width: number, height: number): Promise<AttackYPlanes> {
return runAttackPipeline(blob, ['-vf', `eq=brightness=${delta}`, '-crf', '18'], width, height);
}
/** Attack: adjust contrast (1.0 = unchanged, <1 = less, >1 = more) */
export async function attackContrast(blob: Blob, factor: number, width: number, height: number): Promise<AttackYPlanes> {
return runAttackPipeline(blob, ['-vf', `eq=contrast=${factor}`, '-crf', '18'], width, height);
}
/** Attack: adjust saturation (1.0 = unchanged, 0 = grayscale, >1 = boosted) */
export async function attackSaturation(blob: Blob, factor: number, width: number, height: number): Promise<AttackYPlanes> {
return runAttackPipeline(blob, ['-vf', `eq=saturation=${factor}`, '-crf', '18'], width, height);
}
/** Attack: crop by given pixel amounts from each edge */
export async function attackCrop(
blob: Blob,
cropLeft: number,
cropTop: number,
cropRight: number,
cropBottom: number,
width: number,
height: number,
): Promise<AttackYPlanes> {
const outW = width - cropLeft - cropRight;
const outH = height - cropTop - cropBottom;
// Ensure even dimensions for libx264
const w = outW % 2 === 0 ? outW : outW - 1;
const h = outH % 2 === 0 ? outH : outH - 1;
return runAttackPipeline(
blob,
['-vf', `crop=${w}:${h}:${cropLeft}:${cropTop}`, '-crf', '18'],
w,
h,
);
}