deep-encryption-analysis / segmenter.py
multimedia-cryptography-benchmarks's picture
Upload segmenter.py with huggingface_hub
b8a5759 verified
Raw
History Blame Contribute Delete
3.7 kB
import asyncio
import hashlib
import logging
import os
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
from cryptography.hazmat.primitives import padding
logger = logging.getLogger(__name__)
_PASSPHRASE = b"youzerseef92004"
_KEY_MATERIAL = hashlib.sha256(_PASSPHRASE).digest()
ENCRYPTION_KEY = _KEY_MATERIAL[:16]
ENCRYPTION_IV = _KEY_MATERIAL[16:32]
SEGMENT_DURATION = 60
def _encrypt(data: bytes) -> bytes:
padder = padding.PKCS7(128).padder()
padded = padder.update(data) + padder.finalize()
cipher = Cipher(algorithms.AES(ENCRYPTION_KEY), modes.CBC(ENCRYPTION_IV))
encryptor = cipher.encryptor()
return encryptor.update(padded) + encryptor.finalize()
async def ffmpeg_split(
input_path: str,
output_dir: str,
base_name: str,
) -> list[str] | None:
os.makedirs(output_dir, exist_ok=True)
pattern = os.path.join(output_dir, f"{base_name}_%05d.ts")
cmd = [
"ffmpeg", "-y",
"-i", input_path,
"-c", "copy",
"-f", "segment",
"-segment_time", str(SEGMENT_DURATION),
"-reset_timestamps", "1",
pattern,
]
logger.info("Segmenting (60s): %s", input_path)
proc = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
try:
stdout, stderr = await asyncio.wait_for(proc.communicate(), timeout=7200)
except asyncio.TimeoutError:
proc.kill()
await proc.communicate()
logger.error("FFmpeg timed out: %s", input_path)
return None
files = sorted(
f for f in os.listdir(output_dir)
if f.startswith(f"{base_name}_") and f.endswith(".ts")
)
if not files:
stderr_text = stderr.decode(errors="replace")
logger.error("FFmpeg produced no segments: %s", stderr_text[:500])
return None
logger.info("FFmpeg created %d segments @60s", len(files))
# Rename .ts → .bin for ML dataset camouflage
renamed = []
for f in files:
src = os.path.join(output_dir, f)
dst_name = f.replace(".ts", ".bin")
dst = os.path.join(output_dir, dst_name)
os.rename(src, dst)
renamed.append(dst_name)
logger.info("Renamed %d segments .ts → .bin for camouflage", len(renamed))
return renamed
async def prepare_segments(input_path: str, base_name: str, title: str) -> dict | None:
work_dir = os.path.join(os.path.dirname(input_path), "segments", base_name)
segments = await ffmpeg_split(input_path, work_dir, base_name)
if not segments:
return None
loop = asyncio.get_running_loop()
seg_list = []
total_duration = 0.0
for bin_name in segments:
bin_path = os.path.join(work_dir, bin_name)
def _encrypt_file(p: str):
with open(p, "rb") as f:
raw = f.read()
encrypted = _encrypt(raw)
with open(p, "wb") as f:
f.write(encrypted)
return len(encrypted)
size = await loop.run_in_executor(None, _encrypt_file, bin_path)
seg_id = bin_name.replace(".bin", "")
seg_list.append({
"id": seg_id,
"name": bin_name,
"path": bin_path,
"size": size,
"duration": SEGMENT_DURATION,
})
total_duration += SEGMENT_DURATION
manifest = {
"version": 5,
"title": title,
"duration": total_duration,
"segments": [
{"id": s["id"], "duration": s["duration"]}
for s in seg_list
],
"_work_dir": work_dir,
"_ts_segments": seg_list,
}
return manifest