Spaces:
Running
Running
| /** | |
| * Browser-based video frame extraction and re-encoding | |
| * Uses ffmpeg.wasm for H.264 MP4 output | |
| */ | |
| import { FFmpeg } from '@ffmpeg/ffmpeg'; | |
| import { toBlobURL } from '@ffmpeg/util'; | |
| import type { WatermarkConfig } from '@core/types.js'; | |
| import { embedWatermark } from '@core/embedder.js'; | |
| let ffmpegInstance: FFmpeg | null = null; | |
| let ffmpegLoaded = false; | |
| /** Reset the FFmpeg singleton so the next getFFmpeg() call creates a fresh instance */ | |
| function resetFFmpeg() { | |
| ffmpegInstance = null; | |
| ffmpegLoaded = false; | |
| } | |
| /** Get or initialize the shared FFmpeg instance */ | |
| async function getFFmpeg(onLog?: (msg: string) => void): Promise<FFmpeg> { | |
| if (ffmpegInstance && ffmpegLoaded) return ffmpegInstance; | |
| ffmpegInstance = new FFmpeg(); | |
| if (onLog) { | |
| ffmpegInstance.on('log', ({ message }) => onLog(message)); | |
| } | |
| // Load ffmpeg.wasm from CDN | |
| const baseURL = 'https://unpkg.com/@ffmpeg/core@0.12.6/dist/esm'; | |
| await ffmpegInstance.load({ | |
| coreURL: await toBlobURL(`${baseURL}/ffmpeg-core.js`, 'text/javascript'), | |
| wasmURL: await toBlobURL(`${baseURL}/ffmpeg-core.wasm`, 'application/wasm'), | |
| }); | |
| ffmpegLoaded = true; | |
| return ffmpegInstance; | |
| } | |
| /** Extract Y plane from an RGBA ImageData */ | |
| export function rgbaToY(imageData: ImageData): Uint8Array { | |
| const { data, width, height } = imageData; | |
| const y = new Uint8Array(width * height); | |
| for (let i = 0; i < width * height; i++) { | |
| const r = data[i * 4]; | |
| const g = data[i * 4 + 1]; | |
| const b = data[i * 4 + 2]; | |
| y[i] = Math.round(0.299 * r + 0.587 * g + 0.114 * b); | |
| } | |
| return y; | |
| } | |
| /** Apply Y plane delta to RGBA ImageData (modifies in place) */ | |
| export function applyYDelta(imageData: ImageData, originalY: Uint8Array, watermarkedY: Uint8Array): void { | |
| const { data } = imageData; | |
| for (let i = 0; i < originalY.length; i++) { | |
| const delta = watermarkedY[i] - originalY[i]; | |
| data[i * 4] = Math.max(0, Math.min(255, data[i * 4] + delta)); | |
| data[i * 4 + 1] = Math.max(0, Math.min(255, data[i * 4 + 1] + delta)); | |
| data[i * 4 + 2] = Math.max(0, Math.min(255, data[i * 4 + 2] + delta)); | |
| } | |
| } | |
| /** Result of the streaming extract+embed pipeline */ | |
| export interface StreamEmbedResult { | |
| blob: Blob; | |
| width: number; | |
| height: number; | |
| fps: number; | |
| totalFrames: number; | |
| avgPsnr: number; | |
| sampleOriginal: ImageData[]; | |
| sampleWatermarked: ImageData[]; | |
| embedTimeMs: number; | |
| pixelsPerSecond: number; | |
| } | |
| /** Number of frames to accumulate before flushing to H.264 encoder */ | |
| const CHUNK_SIZE = 100; | |
| /** | |
| * Streaming extract → watermark → encode pipeline. | |
| * Processes frames in chunks of CHUNK_SIZE, encoding each chunk to a | |
| * temporary MP4 segment and freeing the raw buffer. This keeps peak | |
| * memory at ~(CHUNK_SIZE * frameSize) instead of (totalFrames * frameSize). | |
| * At the end, all segments are concatenated into the final MP4. | |
| */ | |
| export async function streamExtractAndEmbed( | |
| videoUrl: string, | |
| payload: Uint8Array, | |
| key: string, | |
| config: WatermarkConfig, | |
| comparisonSample: number, | |
| onProgress?: (phase: string, current: number, total: number) => void | |
| ): Promise<StreamEmbedResult> { | |
| try { | |
| return await _streamExtractAndEmbed(videoUrl, payload, key, config, comparisonSample, onProgress); | |
| } catch (e) { | |
| resetFFmpeg(); | |
| throw e; | |
| } | |
| } | |
| async function _streamExtractAndEmbed( | |
| videoUrl: string, | |
| payload: Uint8Array, | |
| key: string, | |
| config: WatermarkConfig, | |
| comparisonSample: number, | |
| onProgress?: (phase: string, current: number, total: number) => void | |
| ): Promise<StreamEmbedResult> { | |
| const t0 = performance.now(); | |
| const video = document.createElement('video'); | |
| video.src = videoUrl; | |
| video.muted = true; | |
| video.preload = 'auto'; | |
| await new Promise<void>((resolve, reject) => { | |
| video.onloadedmetadata = () => resolve(); | |
| video.onerror = () => reject(new Error('Failed to load video')); | |
| }); | |
| const { videoWidth: rawW, videoHeight: rawH, duration } = video; | |
| // x264 with yuv420p requires even dimensions | |
| const width = rawW % 2 === 0 ? rawW : rawW - 1; | |
| const height = rawH % 2 === 0 ? rawH : rawH - 1; | |
| const totalFrames = Math.ceil(duration * 30); | |
| const interval = duration / totalFrames; | |
| const fps = totalFrames / duration; | |
| const canvas = document.createElement('canvas'); | |
| canvas.width = width; | |
| canvas.height = height; | |
| const ctx = canvas.getContext('2d')!; | |
| const frameSize = width * height * 4; | |
| // Determine which frame indices to sample for comparison | |
| const sampleIndices = new Set<number>(); | |
| const sampleStep = Math.max(1, Math.floor(totalFrames / comparisonSample)); | |
| for (let i = 0; i < totalFrames && sampleIndices.size < comparisonSample; i += sampleStep) { | |
| sampleIndices.add(i); | |
| } | |
| const sampleOriginal: ImageData[] = []; | |
| const sampleWatermarked: ImageData[] = []; | |
| let totalPsnr = 0; | |
| const ffmpeg = await getFFmpeg(); | |
| // --- Extract audio from original video (if any) --- | |
| let hasAudio = false; | |
| { | |
| const resp = await fetch(videoUrl); | |
| const origBytes = new Uint8Array(await resp.arrayBuffer()); | |
| await ffmpeg.writeFile('original_input.mp4', origBytes); | |
| try { | |
| await ffmpeg.exec([ | |
| '-i', 'original_input.mp4', | |
| '-vn', '-acodec', 'copy', | |
| '-y', 'audio_track.m4a', | |
| ]); | |
| // Check if audio file was actually produced (non-empty) | |
| const audioData = await ffmpeg.readFile('audio_track.m4a'); | |
| hasAudio = typeof audioData !== 'string' && (audioData as Uint8Array).length > 0; | |
| } catch { | |
| hasAudio = false; | |
| } | |
| await ffmpeg.deleteFile('original_input.mp4'); | |
| } | |
| const segments: string[] = []; | |
| // Chunk buffer — reused across chunks | |
| let chunkBuffer = new Uint8Array(Math.min(CHUNK_SIZE, totalFrames) * frameSize); | |
| let chunkOffset = 0; | |
| let framesInChunk = 0; | |
| const flushChunk = async () => { | |
| if (framesInChunk === 0) return; | |
| const segName = `seg_${segments.length}.mp4`; | |
| const usedBytes = chunkBuffer.slice(0, framesInChunk * frameSize); | |
| await ffmpeg.writeFile('chunk.raw', usedBytes); | |
| await ffmpeg.exec([ | |
| '-f', 'rawvideo', | |
| '-pix_fmt', 'rgba', | |
| '-s', `${width}x${height}`, | |
| '-r', String(fps), | |
| '-i', 'chunk.raw', | |
| '-c:v', 'libx264', | |
| '-pix_fmt', 'yuv420p', | |
| '-crf', '20', | |
| '-preset', 'ultrafast', | |
| '-an', '-y', | |
| segName, | |
| ]); | |
| await ffmpeg.deleteFile('chunk.raw'); | |
| segments.push(segName); | |
| chunkOffset = 0; | |
| framesInChunk = 0; | |
| }; | |
| for (let i = 0; i < totalFrames; i++) { | |
| onProgress?.('Embedding', i + 1, totalFrames); | |
| // Seek and extract frame | |
| video.currentTime = i * interval; | |
| await new Promise<void>((resolve) => { | |
| video.onseeked = () => resolve(); | |
| }); | |
| ctx.drawImage(video, 0, 0, width, height); | |
| const frameData = ctx.getImageData(0, 0, width, height); | |
| // Watermark | |
| const y = rgbaToY(frameData); | |
| const result = embedWatermark(y, width, height, payload, key, config); | |
| totalPsnr += result.psnr; | |
| // Apply watermark delta to RGBA | |
| applyYDelta(frameData, y, result.yPlane); | |
| // Append to chunk buffer | |
| chunkBuffer.set(frameData.data, chunkOffset); | |
| chunkOffset += frameSize; | |
| framesInChunk++; | |
| // Keep sample frames for comparison | |
| if (sampleIndices.has(i)) { | |
| ctx.drawImage(video, 0, 0, width, height); | |
| sampleOriginal.push(ctx.getImageData(0, 0, width, height)); | |
| sampleWatermarked.push(frameData); | |
| } | |
| // Flush chunk when full | |
| if (framesInChunk >= CHUNK_SIZE) { | |
| onProgress?.('Encoding chunk', segments.length + 1, Math.ceil(totalFrames / CHUNK_SIZE)); | |
| await flushChunk(); | |
| } | |
| } | |
| // Flush remaining frames | |
| if (framesInChunk > 0) { | |
| onProgress?.('Encoding chunk', segments.length + 1, Math.ceil(totalFrames / CHUNK_SIZE)); | |
| await flushChunk(); | |
| } | |
| // Free chunk buffer | |
| chunkBuffer = null!; | |
| // Concatenate all segments into video-only file | |
| const videoOnlyFile = 'video_only.mp4'; | |
| if (segments.length === 1) { | |
| // Single segment — rename it | |
| const data = await ffmpeg.readFile(segments[0]); | |
| await ffmpeg.deleteFile(segments[0]); | |
| await ffmpeg.writeFile(videoOnlyFile, data); | |
| } else { | |
| onProgress?.('Joining segments', 0, 0); | |
| const concatList = segments.map((s) => `file '${s}'`).join('\n'); | |
| await ffmpeg.writeFile('concat.txt', concatList); | |
| await ffmpeg.exec([ | |
| '-f', 'concat', | |
| '-safe', '0', | |
| '-i', 'concat.txt', | |
| '-c', 'copy', | |
| '-movflags', '+faststart', | |
| '-y', | |
| videoOnlyFile, | |
| ]); | |
| await ffmpeg.deleteFile('concat.txt'); | |
| for (const seg of segments) { | |
| await ffmpeg.deleteFile(seg); | |
| } | |
| } | |
| // Mux audio back in if it existed in the original | |
| let blob: Blob; | |
| if (hasAudio) { | |
| onProgress?.('Muxing audio', 0, 0); | |
| await ffmpeg.exec([ | |
| '-i', videoOnlyFile, | |
| '-i', 'audio_track.m4a', | |
| '-c:v', 'copy', | |
| '-c:a', 'copy', | |
| '-shortest', | |
| '-movflags', '+faststart', | |
| '-y', | |
| 'final_with_audio.mp4', | |
| ]); | |
| await ffmpeg.deleteFile(videoOnlyFile); | |
| await ffmpeg.deleteFile('audio_track.m4a'); | |
| const data = await ffmpeg.readFile('final_with_audio.mp4'); | |
| await ffmpeg.deleteFile('final_with_audio.mp4'); | |
| if (typeof data === 'string') throw new Error('Unexpected string output from ffmpeg'); | |
| blob = new Blob([(data as unknown as { buffer: ArrayBuffer }).buffer], { type: 'video/mp4' }); | |
| } else { | |
| const data = await ffmpeg.readFile(videoOnlyFile); | |
| await ffmpeg.deleteFile(videoOnlyFile); | |
| if (typeof data === 'string') throw new Error('Unexpected string output from ffmpeg'); | |
| blob = new Blob([(data as unknown as { buffer: ArrayBuffer }).buffer], { type: 'video/mp4' }); | |
| // Clean up audio file if extraction was attempted | |
| try { await ffmpeg.deleteFile('audio_track.m4a'); } catch { /* no-op */ } | |
| } | |
| const embedTimeMs = performance.now() - t0; | |
| const pixelsPerSecond = (width * height * totalFrames) / (embedTimeMs / 1000); | |
| return { | |
| blob, | |
| width, | |
| height, | |
| fps, | |
| totalFrames, | |
| avgPsnr: totalPsnr / totalFrames, | |
| sampleOriginal, | |
| sampleWatermarked, | |
| embedTimeMs, | |
| pixelsPerSecond, | |
| }; | |
| } | |
| /** | |
| * Extract frames from a video (used by detect panel). | |
| * Spreads frames evenly across the full duration. | |
| */ | |
| export async function extractFrames( | |
| videoUrl: string, | |
| maxFrames: number = 30, | |
| onProgress?: (frame: number, total: number) => void | |
| ): Promise<{ frames: ImageData[]; width: number; height: number; fps: number; duration: number }> { | |
| const video = document.createElement('video'); | |
| video.src = videoUrl; | |
| video.muted = true; | |
| video.preload = 'auto'; | |
| await new Promise<void>((resolve, reject) => { | |
| video.onloadedmetadata = () => resolve(); | |
| video.onerror = () => reject(new Error('Failed to load video')); | |
| }); | |
| const { videoWidth: rawW, videoHeight: rawH, duration } = video; | |
| const width = rawW % 2 === 0 ? rawW : rawW - 1; | |
| const height = rawH % 2 === 0 ? rawH : rawH - 1; | |
| const nativeFrameCount = Math.ceil(duration * 30); | |
| const totalFrames = Math.min(maxFrames, nativeFrameCount); | |
| const interval = duration / totalFrames; | |
| const canvas = document.createElement('canvas'); | |
| canvas.width = width; | |
| canvas.height = height; | |
| const ctx = canvas.getContext('2d')!; | |
| const frames: ImageData[] = []; | |
| for (let i = 0; i < totalFrames; i++) { | |
| video.currentTime = i * interval; | |
| await new Promise<void>((resolve) => { | |
| video.onseeked = () => resolve(); | |
| }); | |
| ctx.drawImage(video, 0, 0, width, height); | |
| frames.push(ctx.getImageData(0, 0, width, height)); | |
| onProgress?.(i + 1, totalFrames); | |
| } | |
| const fps = totalFrames / duration; | |
| return { frames, width, height, fps, duration }; | |
| } | |
| // --------------------------------------------------------------------------- | |
| // Attack utilities — apply degradation to watermarked MP4, return Y planes | |
| // --------------------------------------------------------------------------- | |
| /** Result of decoding an attacked video back to Y planes */ | |
| export interface AttackYPlanes { | |
| yPlanes: Uint8Array[]; | |
| width: number; | |
| height: number; | |
| } | |
| /** | |
| * Write blob to ffmpeg FS, run attack filter, decode output to Y planes. | |
| * Only processes `maxFrames` frames to keep WASM encoding fast. | |
| */ | |
| async function runAttackPipelineOnce( | |
| blob: Blob, | |
| filterArgs: string[], | |
| outputWidth: number, | |
| outputHeight: number, | |
| maxFrames: number = 30, | |
| ): Promise<AttackYPlanes> { | |
| const ffmpeg = await getFFmpeg(); | |
| const inputData = new Uint8Array(await blob.arrayBuffer()); | |
| await ffmpeg.writeFile('attack_input.mp4', inputData); | |
| await ffmpeg.exec([ | |
| '-i', 'attack_input.mp4', | |
| ...filterArgs, | |
| '-frames:v', String(maxFrames), | |
| '-c:v', 'libx264', | |
| '-pix_fmt', 'yuv420p', | |
| '-preset', 'ultrafast', | |
| '-an', | |
| '-y', | |
| 'attack_output.mp4', | |
| ]); | |
| // Decode attacked output to raw grayscale | |
| await ffmpeg.exec([ | |
| '-i', 'attack_output.mp4', | |
| '-f', 'rawvideo', | |
| '-pix_fmt', 'gray', | |
| '-y', | |
| 'attack_raw.raw', | |
| ]); | |
| const rawData = await ffmpeg.readFile('attack_raw.raw'); | |
| await ffmpeg.deleteFile('attack_input.mp4'); | |
| await ffmpeg.deleteFile('attack_output.mp4'); | |
| await ffmpeg.deleteFile('attack_raw.raw'); | |
| if (typeof rawData === 'string') throw new Error('Unexpected string output from ffmpeg'); | |
| const raw = new Uint8Array((rawData as unknown as { buffer: ArrayBuffer }).buffer); | |
| const frameSize = outputWidth * outputHeight; | |
| const frameCount = Math.floor(raw.length / frameSize); | |
| const yPlanes: Uint8Array[] = []; | |
| for (let i = 0; i < frameCount; i++) { | |
| yPlanes.push(raw.slice(i * frameSize, (i + 1) * frameSize)); | |
| } | |
| return { yPlanes, width: outputWidth, height: outputHeight }; | |
| } | |
| /** Run attack pipeline with one retry on WASM crash */ | |
| async function runAttackPipeline( | |
| blob: Blob, | |
| filterArgs: string[], | |
| outputWidth: number, | |
| outputHeight: number, | |
| maxFrames: number = 30, | |
| ): Promise<AttackYPlanes> { | |
| try { | |
| return await runAttackPipelineOnce(blob, filterArgs, outputWidth, outputHeight, maxFrames); | |
| } catch (e) { | |
| console.warn('ffmpeg.wasm crashed, reinitializing and retrying:', e); | |
| resetFFmpeg(); | |
| return await runAttackPipelineOnce(blob, filterArgs, outputWidth, outputHeight, maxFrames); | |
| } | |
| } | |
| /** Attack: re-encode at a given CRF quality level */ | |
| export async function attackReencode(blob: Blob, crf: number, width: number, height: number): Promise<AttackYPlanes> { | |
| return runAttackPipeline(blob, ['-crf', String(crf)], width, height); | |
| } | |
| /** Attack: downscale to scalePct%, then scale back up to original size */ | |
| export async function attackDownscale( | |
| blob: Blob, | |
| scalePct: number, | |
| width: number, | |
| height: number, | |
| ): Promise<AttackYPlanes> { | |
| const scaledW = Math.round(width * scalePct / 100); | |
| const scaledH = Math.round(height * scalePct / 100); | |
| // Ensure even dimensions for libx264 | |
| const sW = scaledW % 2 === 0 ? scaledW : scaledW + 1; | |
| const sH = scaledH % 2 === 0 ? scaledH : scaledH + 1; | |
| return runAttackPipeline( | |
| blob, | |
| ['-vf', `scale=${sW}:${sH},scale=${width}:${height}`, '-crf', '18'], | |
| width, | |
| height, | |
| ); | |
| } | |
| /** Attack: adjust brightness by delta (-1.0 to 1.0) */ | |
| export async function attackBrightness(blob: Blob, delta: number, width: number, height: number): Promise<AttackYPlanes> { | |
| return runAttackPipeline(blob, ['-vf', `eq=brightness=${delta}`, '-crf', '18'], width, height); | |
| } | |
| /** Attack: adjust contrast (1.0 = unchanged, <1 = less, >1 = more) */ | |
| export async function attackContrast(blob: Blob, factor: number, width: number, height: number): Promise<AttackYPlanes> { | |
| return runAttackPipeline(blob, ['-vf', `eq=contrast=${factor}`, '-crf', '18'], width, height); | |
| } | |
| /** Attack: adjust saturation (1.0 = unchanged, 0 = grayscale, >1 = boosted) */ | |
| export async function attackSaturation(blob: Blob, factor: number, width: number, height: number): Promise<AttackYPlanes> { | |
| return runAttackPipeline(blob, ['-vf', `eq=saturation=${factor}`, '-crf', '18'], width, height); | |
| } | |
| /** Attack: crop by given pixel amounts from each edge */ | |
| export async function attackCrop( | |
| blob: Blob, | |
| cropLeft: number, | |
| cropTop: number, | |
| cropRight: number, | |
| cropBottom: number, | |
| width: number, | |
| height: number, | |
| ): Promise<AttackYPlanes> { | |
| const outW = width - cropLeft - cropRight; | |
| const outH = height - cropTop - cropBottom; | |
| // Ensure even dimensions for libx264 | |
| const w = outW % 2 === 0 ? outW : outW - 1; | |
| const h = outH % 2 === 0 ? outH : outH - 1; | |
| return runAttackPipeline( | |
| blob, | |
| ['-vf', `crop=${w}:${h}:${cropLeft}:${cropTop}`, '-crf', '18'], | |
| w, | |
| h, | |
| ); | |
| } | |