PackedAvatar.py · HiMind/Packed-Avatar at main

Upload 2 files

64cfabb verified 5 days ago

59.2 kB

	from __future__ import annotations

	import argparse
	import hashlib
	import importlib.util
	import io
	import json
	import os
	import platform
	import shutil
	import subprocess
	import sys
	import tempfile
	import time
	import uuid
	import zipfile
	from dataclasses import dataclass
	from pathlib import Path
	from typing import Any, Dict, List, Optional, Tuple

	import cv2
	import numpy as np
	import torch
	import zstandard as zstd
	from PIL import Image
	from pydub import AudioSegment
	from scipy.io import loadmat, savemat


	# ============================================================
	# GENERAL HELPERS
	# ============================================================

	REAL_STYLE_ALIASES = {"real", "realistic", "photo", "photoreal", "liveaction"}


	def ensure_dir(path: Path) -> None:
	path.mkdir(parents=True, exist_ok=True)


	def utc_now_iso() -> str:
	from datetime import datetime, timezone

	return datetime.now(timezone.utc).isoformat(timespec="seconds")


	def sha256_bytes(data: bytes) -> str:
	return hashlib.sha256(data).hexdigest()


	def sha256_file(path: Path, chunk_size: int = 1024 * 1024) -> str:
	h = hashlib.sha256()
	with path.open("rb") as f:
	for chunk in iter(lambda: f.read(chunk_size), b""):
	h.update(chunk)
	return h.hexdigest()


	def tensor_to_bytes(obj: Any) -> bytes:
	if isinstance(obj, (bytes, bytearray)):
	return bytes(obj)
	if torch.is_tensor(obj):
	return obj.detach().cpu().contiguous().numpy().tobytes()
	raise TypeError(f"Expected bytes or tensor, got {type(obj)!r}")


	def bytes_to_tensor(data: bytes) -> torch.Tensor:
	try:
	return torch.frombuffer(memoryview(data), dtype=torch.uint8).clone()
	except Exception:
	return torch.tensor(list(data), dtype=torch.uint8)


	def decode_png_or_zstd_image(blob: bytes) -> Image.Image:
	"""Decode a preview blob that may be a raw PNG or zstd-compressed PNG bytes."""
	try:
	raw = zstd.ZstdDecompressor().decompress(blob)
	except Exception:
	raw = blob
	return Image.open(io.BytesIO(raw)).convert("RGB")


	def pil_to_numpy_rgb(img: Image.Image) -> np.ndarray:
	return np.asarray(img.convert("RGB"), dtype=np.uint8)


	def normalize_style_name(style: Optional[str]) -> str:
	return (style or "").strip().lower()


	def normalize_gender_name(gender: Optional[str]) -> str:
	return (gender or "").strip().lower()


	def safe_load_bundle(path_or_bundle: Any) -> Optional[Dict[str, Any]]:
	if path_or_bundle is None:
	return None
	if isinstance(path_or_bundle, dict):
	return path_or_bundle
	if isinstance(path_or_bundle, (str, os.PathLike)):
	p = Path(path_or_bundle)
	ext = p.suffix.lower()
	if ext in {".pt", ".pth"}:
	return torch.load(str(p), map_location="cpu", weights_only=False)
	if ext == ".mat":
	return loadmat(str(p))
	raise TypeError("Conditioning input must be None, a dict, or a .pt/.pth/.mat path")
	def _resolve_checkpoint(self):
	candidates = [
	"SadTalker_V0.0.2_512.safetensors",
	"SadTalker_V0.0.2_256.safetensors",
	"SadTalker_V0.0.2_512.pth",
	"SadTalker_V0.0.2_256.pth",
	]

	for name in candidates:
	p = Path(self.checkpoint_path) / name
	if p.exists():
	return str(p)

	raise FileNotFoundError(
	f"No SadTalker checkpoint found in {self.checkpoint_path}"
	)

	def composite_alpha_to_rgb(image_path: Path, bg_rgb=(255, 255, 255)) -> Path:
	"""If the input image has alpha, composite it to RGB and return a new PNG path."""
	with Image.open(image_path) as im:
	im = im.convert("RGBA")
	bg = Image.new("RGBA", im.size, (*bg_rgb, 255))
	out = Image.alpha_composite(bg, im).convert("RGB")

	out_path = image_path.with_name(f"{image_path.stem}_rgb.png")
	out.save(out_path)
	return out_path


	def prepare_image_for_sadtalker(image_path: Path, remove_background_result: Optional[Path] = None) -> Path:
	if remove_background_result is None:
	with Image.open(image_path) as im:
	if im.mode in {"RGBA", "LA"} or ("transparency" in im.info):
	return composite_alpha_to_rgb(image_path)
	return image_path
	return composite_alpha_to_rgb(remove_background_result)


	# ============================================================
	# ARCHIVE EXTRACTION
	# ============================================================

	@dataclass
	class MountedArchive:
	name: str
	zip_sha256: str
	target_dir: Path
	marker_path: Path


	def extract_zip_bytes_to_dir(zip_bytes: bytes, dest_dir: Path) -> None:
	ensure_dir(dest_dir)
	with zipfile.ZipFile(io.BytesIO(zip_bytes), "r") as zf:
	zf.extractall(dest_dir)


	def mount_zip_payload(zip_bytes: bytes, zip_sha256: str, target_dir: Path, marker_name: str) -> MountedArchive:
	ensure_dir(target_dir)
	marker_path = target_dir / marker_name

	if marker_path.exists():
	try:
	existing = json.loads(marker_path.read_text(encoding="utf-8"))
	if existing.get("zip_sha256") == zip_sha256 and existing.get("mounted") is True:
	return MountedArchive(
	name=existing.get("name", marker_name),
	zip_sha256=zip_sha256,
	target_dir=target_dir,
	marker_path=marker_path,
	)
	except Exception:
	pass

	# Clear any stale contents before extracting.
	for child in list(target_dir.iterdir()):
	if child == marker_path:
	continue
	if child.is_dir():
	shutil.rmtree(child, ignore_errors=True)
	else:
	try:
	child.unlink()
	except Exception:
	pass

	extract_zip_bytes_to_dir(zip_bytes, target_dir)
	marker_path.write_text(
	json.dumps(
	{
	"mounted": True,
	"zip_sha256": zip_sha256,
	"name": marker_name,
	"created_at": utc_now_iso(),
	},
	indent=2,
	),
	encoding="utf-8",
	)
	return MountedArchive(
	name=marker_name,
	zip_sha256=zip_sha256,
	target_dir=target_dir,
	marker_path=marker_path,
	)


	# ============================================================
	# AVATAR BANK RUNTIME
	# ============================================================
	class AvatarBankRuntime:
	def __init__(
	self,
	payload: Dict[str, Any],
	defaults: Optional[Dict[str, Any]] = None,
	):
	self.index: Dict[str, Dict[str, Any]] = payload.get("index", {}) or {}
	self.embeddings: Dict[str, Dict[str, Any]] = payload.get("embeddings", {}) or {}
	self.previews: Dict[str, Any] = payload.get("previews", {}) or {}
	self.defaults = defaults or {}

	@classmethod
	def load(
	cls,
	path: Path,
	defaults: Optional[Dict[str, Any]] = None,
	) -> "AvatarBankRuntime":
	payload = torch.load(str(path), map_location="cpu", weights_only=False)

	if not isinstance(payload, dict):
	raise ValueError(f"Avatar bank file did not contain a dictionary: {path}")

	return cls(payload, defaults=defaults)

	def save(self, path: Union[str, Path]) -> None:
	torch.save(
	{
	"index": self.index,
	"embeddings": self.embeddings,
	"previews": self.previews,
	},
	str(path),
	)

	# --------------------------------------------------------
	# BASIC ACCESS
	# --------------------------------------------------------

	def __contains__(self, avatar_id: str) -> bool:
	return avatar_id in self.index

	def exists(self, avatar_id: str) -> bool:
	return avatar_id in self.index

	def available_ids(self) -> List[str]:
	return list(self.index.keys())

	def list_avatars(self) -> List[str]:
	return self.available_ids()

	def get_metadata(self, avatar_id: str) -> Dict[str, Any]:
	if avatar_id not in self.index:
	raise KeyError(f"Avatar not found: {avatar_id}")

	return dict(self.index[avatar_id])

	def get_avatar(self, avatar_id: str) -> Dict[str, Any]:
	return self.build_avatar_condition(avatar_id)

	def get_embedding_bundle(self, avatar_id: str) -> Dict[str, Any]:
	if avatar_id not in self.embeddings:
	raise KeyError(f"Avatar not found: {avatar_id}")

	return self.embeddings[avatar_id]

	# --------------------------------------------------------
	# FUZZY SEARCH
	# --------------------------------------------------------

	def _fuzzy_match_single(
	self,
	query: str,
	choices: set,
	cutoff: float = 0.6,
	):
	if not query:
	return None

	query_lower = query.lower()

	choice_map = {
	c.lower(): c
	for c in choices
	}

	matches = get_close_matches(
	query_lower,
	list(choice_map.keys()),
	n=1,
	cutoff=cutoff,
	)

	return choice_map[matches[0]] if matches else None

	def fuzzy_search_id(
	self,
	query_id: str,
	n: int = 5,
	cutoff: float = 0.5,
	) -> List[str]:
	query_lower = query_id.lower()

	id_map = {
	aid.lower(): aid
	for aid in self.index.keys()
	}

	matches = get_close_matches(
	query_lower,
	list(id_map.keys()),
	n=n,
	cutoff=cutoff,
	)

	return [id_map[m] for m in matches]

	# --------------------------------------------------------
	# QUERY
	# --------------------------------------------------------

	def query(
	self,
	gender=None,
	style=None,
	fuzzy=True,
	cutoff=0.6,
	) -> List[str]:

	known_genders = {
	meta["gender"]
	for meta in self.index.values()
	if meta.get("gender")
	}

	known_styles = {
	meta["style"]
	for meta in self.index.values()
	if meta.get("style")
	}

	target_gender = gender
	target_style = style

	if fuzzy:
	if gender:
	target_gender = (
	self._fuzzy_match_single(
	gender,
	known_genders,
	cutoff,
	)
	or gender
	)

	if style:
	target_style = (
	self._fuzzy_match_single(
	style,
	known_styles,
	cutoff,
	)
	or style
	)

	results = []

	for aid, meta in self.index.items():

	if (
	target_gender
	and meta.get("gender") != target_gender
	):
	continue

	if (
	target_style
	and meta.get("style") != target_style
	):
	continue

	results.append(aid)

	return results

	# --------------------------------------------------------
	# PREVIEWS
	# --------------------------------------------------------

	def get_preview(self, avatar_id: str):
	if avatar_id not in self.previews:
	raise KeyError(f"Avatar not found: {avatar_id}")

	return decode_png_or_zstd_image(
	self.previews[avatar_id]
	)

	def get_preview_numpy(
	self,
	avatar_id: str,
	) -> Optional[np.ndarray]:
	return self._preview_to_numpy(avatar_id)

	def _preview_to_numpy(
	self,
	avatar_id: str,
	) -> Optional[np.ndarray]:

	blob = self.previews.get(avatar_id)

	if blob is None:
	return None

	try:
	img = decode_png_or_zstd_image(blob)
	return pil_to_numpy_rgb(img)

	except Exception:
	return None

	# --------------------------------------------------------
	# MUTATION
	# --------------------------------------------------------

	def delete_avatar(
	self,
	avatar_id: str,
	) -> None:

	self.index.pop(avatar_id, None)
	self.embeddings.pop(avatar_id, None)
	self.previews.pop(avatar_id, None)

	@classmethod
	def load(cls, path: Path, defaults: Optional[Dict[str, Any]] = None) -> "AvatarBankRuntime":
	payload = torch.load(str(path), map_location="cpu", weights_only=False)
	if not isinstance(payload, dict):
	raise ValueError(f"Avatar bank file did not contain a dictionary: {path}")
	return cls(payload, defaults=defaults)

	def available_ids(self) -> List[str]:
	return list(self.index.keys())

	def _preview_to_numpy(self, avatar_id: str) -> Optional[np.ndarray]:
	blob = self.previews.get(avatar_id)
	if blob is None:
	return None
	try:
	img = decode_png_or_zstd_image(blob)
	return pil_to_numpy_rgb(img)
	except Exception:
	return None

	def _style_is_real(self, style: Optional[str]) -> bool:
	return normalize_style_name(style) in REAL_STYLE_ALIASES

	def resolve_default_avatar_id(self) -> str:
	if not self.index:
	raise RuntimeError("Avatar bank is empty.")

	default_voice = self.defaults.get("default_avatar")
	if default_voice and default_voice in self.index:
	return default_voice

	# Prefer first real male.
	for avatar_id, meta in self.index.items():
	if normalize_gender_name(meta.get("gender")) == "male" and self._style_is_real(meta.get("style")):
	return avatar_id

	# Then any real-style avatar.
	for avatar_id, meta in self.index.items():
	if self._style_is_real(meta.get("style")):
	return avatar_id

	# Then any male avatar.
	for avatar_id, meta in self.index.items():
	if normalize_gender_name(meta.get("gender")) == "male":
	return avatar_id

	# Then any complete avatar.
	for avatar_id, emb in self.embeddings.items():
	if emb is not None:
	return avatar_id

	# Finally first available entry.
	return next(iter(self.index.keys()))

	def build_avatar_condition(self, avatar_id: str) -> Dict[str, Any]:
	if avatar_id not in self.embeddings:
	raise KeyError(f"Avatar not found: {avatar_id}")

	meta = self.index.get(avatar_id, {}) or {}
	emb = self.embeddings[avatar_id] or {}

	coeff = emb.get("motion_3dmm")
	if coeff is None:
	coeff = emb.get("full_3dmm")
	if coeff is None:
	raise ValueError(f"Avatar '{avatar_id}' is missing motion_3dmm/full_3dmm")

	crop_preview = emb.get("crop_preview")
	if crop_preview is None:
	crop_preview = self._preview_to_numpy(avatar_id)
	else:
	if torch.is_tensor(crop_preview):
	crop_preview = crop_preview.detach().cpu()
	elif isinstance(crop_preview, np.ndarray):
	crop_preview = crop_preview
	else:
	crop_preview = np.asarray(crop_preview)

	out = {
	"avatar_id": avatar_id,
	"gender": meta.get("gender"),
	"style": meta.get("style"),
	"coeff_3dmm": coeff.detach().cpu() if torch.is_tensor(coeff) else coeff,
	"motion_3dmm": emb.get("motion_3dmm"),
	"full_3dmm": emb.get("full_3dmm"),
	"crop_info": emb.get("crop_info"),
	"crop_preview": crop_preview,
	}
	if torch.is_tensor(out["motion_3dmm"]):
	out["motion_3dmm"] = out["motion_3dmm"].detach().cpu()
	if torch.is_tensor(out["full_3dmm"]):
	out["full_3dmm"] = out["full_3dmm"].detach().cpu()
	return out


	# ============================================================
	# BRIA RMBG BACKGROUND REMOVER (BEST-EFFORT)
	# ============================================================

	class BriaBackgroundRemover:
	"""
	Best-effort loader for the packed briaaiRMBG-2.0 directory.

	It searches for a likely inference script and tries callable or CLI-based
	execution patterns. If the local folder layout differs, the search list
	below is the only part that usually needs adjustment.
	"""

	def __init__(self, root: Path):
	self.root = root
	self.entrypoint = self._discover_entrypoint()

	def _discover_entrypoint(self) -> Optional[Path]:
	if not self.root.exists():
	return None

	preferred = [
	"inference.py",
	"predict.py",
	"app.py",
	"main.py",
	"run.py",
	]
	for name in preferred:
	hits = list(self.root.rglob(name))
	if hits:
	return hits[0]

	# Fall back to any Python file with a likely folder name.
	for p in self.root.rglob("*.py"):
	lower = str(p).lower()
	if "bria" in lower or "rmbg" in lower or "background" in lower:
	return p
	return None

	def _import_module_from_path(self, py_file: Path):
	module_name = f"packed_bria_{sha256_bytes(str(py_file).encode('utf-8'))[:12]}"
	spec = importlib.util.spec_from_file_location(module_name, str(py_file))
	if spec is None or spec.loader is None:
	raise RuntimeError(f"Could not import module from {py_file}")
	module = importlib.util.module_from_spec(spec)
	spec.loader.exec_module(module)
	return module

	def _call_module_callable(self, module, image_path: Path, output_path: Path) -> bool:
	candidates = [
	"remove_background",
	"predict_image",
	"predict",
	"run",
	"inference",
	"main",
	]
	callables = [getattr(module, name, None) for name in candidates]
	callables = [fn for fn in callables if callable(fn)]
	for fn in callables:
	attempts = [
	(str(image_path), str(output_path)),
	(str(image_path),),
	(Image.open(image_path),),
	(),
	]
	for args in attempts:
	try:
	result = fn(*args)
	if isinstance(result, (str, os.PathLike)):
	result_path = Path(result)
	if result_path.exists():
	shutil.copy2(result_path, output_path)
	return True
	elif isinstance(result, Image.Image):
	result.save(output_path)
	return True
	elif torch.is_tensor(result):
	arr = result.detach().cpu().numpy()
	if arr.ndim == 3 and arr.shape[-1] in (3, 4):
	img = Image.fromarray(arr.astype(np.uint8))
	img.save(output_path)
	return True
	elif result is None and output_path.exists():
	return True
	except Exception:
	continue
	return False

	def _call_cli_with_patterns(self, image_path: Path, output_path: Path) -> bool:
	if self.entrypoint is None:
	return False

	cmd_patterns = [
	[str(self.entrypoint), str(image_path), str(output_path)],
	[str(self.entrypoint), "--input", str(image_path), "--output", str(output_path)],
	[str(self.entrypoint), "--image", str(image_path), "--output", str(output_path)],
	[str(self.entrypoint), "--input_path", str(image_path), "--output_path", str(output_path)],
	[str(self.entrypoint), "-i", str(image_path), "-o", str(output_path)],
	]

	for args in cmd_patterns:
	try:
	proc = subprocess.run(
	[sys.executable, *args],
	cwd=str(self.root),
	stdout=subprocess.DEVNULL,
	stderr=subprocess.DEVNULL,
	check=False,
	)
	if proc.returncode == 0 and output_path.exists():
	return True
	except Exception:
	continue
	return False

	def remove_background(self, image_path: Path, output_dir: Path) -> Path:
	if self.entrypoint is None:
	raise RuntimeError(
	f"No usable background-removal entrypoint found under {self.root}."
	)

	ensure_dir(output_dir)
	output_path = output_dir / f"{image_path.stem}_rmbg.png"

	try:
	module = self._import_module_from_path(self.entrypoint)
	if self._call_module_callable(module, image_path, output_path):
	return output_path
	except Exception:
	pass

	if self._call_cli_with_patterns(image_path, output_path):
	return output_path

	raise RuntimeError(
	f"Could not execute background removal with entrypoint {self.entrypoint}. "
	f"You may need to adjust the call patterns in BriaBackgroundRemover."
	)


	# ============================================================
	# SADTALKER CORE RUNTIME
	# ============================================================

	class SadTalkerRunner:
	def __init__(self, checkpoint_path: str, config_path: str, device: str = "cpu"):
	self.checkpoint_path = checkpoint_path
	self.config_path = config_path
	self.device = device
	self._mods_loaded = False
	self._load_modules()

	def _load_modules(self):
	if self._mods_loaded:
	return

	from SadTalker.src.facerender.pirender_animate import AnimateFromCoeff_PIRender
	from SadTalker.src.utils.preprocess import CropAndExtract
	from SadTalker.src.test_audio2coeff import Audio2Coeff
	from SadTalker.src.facerender.animate import AnimateFromCoeff
	from SadTalker.src.generate_batch import get_data
	from SadTalker.src.generate_facerender_batch import get_facerender_data
	from SadTalker.src.utils.init_path import init_path

	self.AnimateFromCoeff_PIRender = AnimateFromCoeff_PIRender
	self.CropAndExtract = CropAndExtract
	self.Audio2Coeff = Audio2Coeff
	self.AnimateFromCoeff = AnimateFromCoeff
	self.get_data = get_data
	self.get_facerender_data = get_facerender_data
	self.init_path = init_path
	self._mods_loaded = True

	@staticmethod
	def _mp3_to_wav(mp3_filename: str, wav_filename: str, frame_rate: int):
	mp3_file = AudioSegment.from_file(file=mp3_filename)
	mp3_file.set_frame_rate(frame_rate).export(wav_filename, format="wav")

	def _to_numpy(self, x):
	if x is None:
	return None
	if isinstance(x, np.ndarray):
	return x
	if torch.is_tensor(x):
	return x.detach().cpu().numpy()
	return np.asarray(x)

	def _save_png_from_bundle(self, bundle, out_path):
	for key in ("crop_preview", "aligned_face", "image", "png"):
	if key in bundle and bundle[key] is not None:
	arr = self._to_numpy(bundle[key])
	if arr.ndim == 3 and arr.shape[-1] in (1, 3, 4):
	if arr.dtype != np.uint8:
	arr = np.clip(arr, 0, 255).astype(np.uint8)
	if arr.shape[-1] == 4:
	img = Image.fromarray(arr, mode="RGBA").convert("RGB")
	else:
	img = Image.fromarray(arr, mode="RGB")
	img.save(out_path)
	return out_path
	raise ValueError(
	"Avatar conditioning bundle needs at least one image-like field such as crop_preview or aligned_face."
	)

	def _save_mat_from_avatar_bundle(self, bundle, out_path):
	coeff_3dmm = bundle.get("coeff_3dmm", None)
	if coeff_3dmm is None:
	coeff_3dmm = bundle.get("motion_3dmm", None)
	if coeff_3dmm is None:
	coeff_3dmm = bundle.get("full_3dmm", None)
	if coeff_3dmm is None:
	raise ValueError("Avatar bundle must contain coeff_3dmm, motion_3dmm, or full_3dmm.")

	mat_dict = {"coeff_3dmm": self._to_numpy(coeff_3dmm)}
	full_3dmm = bundle.get("full_3dmm", None)
	if full_3dmm is not None:
	mat_dict["full_3dmm"] = self._to_numpy(full_3dmm)

	savemat(out_path, mat_dict)
	return out_path

	def _save_mat_from_motion_bundle(self, bundle, out_path):
	motion = bundle.get("motion_3dmm", None)
	if motion is None:
	motion = bundle.get("coeff_3dmm", None)
	if motion is None:
	motion = bundle.get("full_3dmm_seq", None)
	if motion is None:
	motion = bundle.get("full_3dmm", None)

	if motion is None:
	raise ValueError(
	"Motion bundle must contain motion_3dmm, coeff_3dmm, full_3dmm_seq, or full_3dmm."
	)

	mat_dict = {"coeff_3dmm": self._to_numpy(motion)}

	if "full_3dmm" in bundle and bundle["full_3dmm"] is not None:
	mat_dict["full_3dmm"] = self._to_numpy(bundle["full_3dmm"])
	elif "full_3dmm_seq" in bundle and bundle["full_3dmm_seq"] is not None:
	seq = self._to_numpy(bundle["full_3dmm_seq"])
	if seq.ndim >= 3:
	mat_dict["full_3dmm"] = seq[0]
	else:
	mat_dict["full_3dmm"] = seq

	savemat(out_path, mat_dict)
	return out_path


	def _bundle_from_preprocess_output(
	self,
	coeff_path,
	crop_pic_path,
	crop_info,
	):
	bundle = {}

	# Load whatever the SadTalker preprocessing wrote to disk.
	if coeff_path is not None and os.path.isfile(coeff_path):
	try:
	raw = loadmat(coeff_path)
	for key, value in raw.items():
	if not key.startswith("__"):
	bundle[key] = value
	except Exception:
	pass

	# Preserve the paths used to generate the bundle.
	if coeff_path is not None:
	bundle["coeff_path"] = str(coeff_path)
	if crop_pic_path is not None:
	bundle["crop_pic_path"] = str(crop_pic_path)
	if crop_info is not None:
	bundle["crop_info"] = crop_info

	# Keep a usable preview in memory.
	try:
	if crop_pic_path is not None and os.path.isfile(crop_pic_path):
	with Image.open(crop_pic_path) as im:
	bundle["crop_preview"] = pil_to_numpy_rgb(im)
	except Exception:
	pass

	# Normalize common aliases so downstream code can rely on them.
	if "coeff_3dmm" in bundle and "motion_3dmm" not in bundle:
	bundle["motion_3dmm"] = bundle["coeff_3dmm"]
	if "motion_3dmm" in bundle and "coeff_3dmm" not in bundle:
	bundle["coeff_3dmm"] = bundle["motion_3dmm"]

	if "full_3dmm" not in bundle:
	if "full_3dmm_seq" in bundle:
	seq = bundle["full_3dmm_seq"]
	try:
	if hasattr(seq, "ndim") and seq.ndim >= 3:
	bundle["full_3dmm"] = seq[0]
	else:
	bundle["full_3dmm"] = seq
	except Exception:
	bundle["full_3dmm"] = seq
	elif "motion_3dmm" in bundle:
	bundle["full_3dmm"] = bundle["motion_3dmm"]

	if "landmarks" in bundle:
	bundle["landmarks"] = bundle["landmarks"]

	return bundle

	def extract_embeddings(
	self,
	input_path,
	crop_or_resize: str = "crop",
	pic_size: int = 256,
	save_dir: Optional[str] = None,
	):
	"""
	Public preprocessing helper.

	Accepts either a source image or a reference video, runs the packed
	SadTalker preprocessing, and returns the extracted conditioning bundle.
	"""
	self._load_modules()
	self._ensure_models(size=pic_size, preprocess=crop_or_resize, facerender="facevid2vid")

	input_path = Path(input_path)
	if not input_path.exists():
	raise FileNotFoundError(str(input_path))

	if save_dir is None:
	save_dir = tempfile.mkdtemp(prefix="packedavatar_embeddings_")
	else:
	ensure_dir(Path(save_dir))

	work_dir = Path(save_dir)
	input_ext = input_path.suffix.lower()
	video_exts = {".mp4", ".mov", ".avi", ".mkv", ".webm", ".flv", ".wmv", ".m4v", ".gif"}

	if input_ext in video_exts:
	frame_dir = work_dir / f"{input_path.stem}_frames"
	ensure_dir(frame_dir)
	coeff_path, crop_pic_path, crop_info = self.preprocess_model.generate(
	str(input_path),
	str(frame_dir),
	crop_or_resize,
	source_image_flag=False,
	)
	else:
	staged = work_dir / input_path.name
	shutil.copy2(input_path, staged)

	first_frame_dir = work_dir / "first_frame_dir"
	ensure_dir(first_frame_dir)
	coeff_path, crop_pic_path, crop_info = self.preprocess_model.generate(
	str(staged),
	str(first_frame_dir),
	crop_or_resize,
	True,
	pic_size,
	)

	return self._bundle_from_preprocess_output(coeff_path, crop_pic_path, crop_info)

	def ExtractEmbeddings(
	self,
	input_path,
	crop_or_resize: str = "crop",
	pic_size: int = 256,
	save_dir: Optional[str] = None,
	):
	return self.extract_embeddings(
	input_path=input_path,
	crop_or_resize=crop_or_resize,
	pic_size=pic_size,
	save_dir=save_dir,
	)

	def _materialize_avatar_condition(self, avatar_condition, save_dir):
	bundle = safe_load_bundle(avatar_condition)
	if bundle is None:
	return None, None, None

	coeff_path = bundle.get("coeff_path", None)
	crop_pic_path = bundle.get("crop_pic_path", None)
	crop_info = bundle.get("crop_info", None)

	if coeff_path is None or not os.path.isfile(coeff_path):
	coeff_path = os.path.join(save_dir, "avatar_condition.mat")
	self._save_mat_from_avatar_bundle(bundle, coeff_path)

	if crop_pic_path is None or not os.path.isfile(crop_pic_path):
	crop_pic_path = os.path.join(save_dir, "avatar_condition.png")
	self._save_png_from_bundle(bundle, crop_pic_path)

	return coeff_path, crop_pic_path, crop_info

	def _materialize_motion_condition(self, motion_condition, save_dir):
	bundle = safe_load_bundle(motion_condition)
	if bundle is None:
	return None

	coeff_path = bundle.get("coeff_path", None)
	if coeff_path is not None and os.path.isfile(coeff_path):
	return coeff_path

	coeff_path = os.path.join(save_dir, "motion_condition.mat")
	self._save_mat_from_motion_bundle(bundle, coeff_path)
	return coeff_path

	def _resolve_checkpoint(self):
	candidates = [
	"SadTalker_V0.0.2_512.safetensors",
	"SadTalker_V0.0.2_256.safetensors",
	"SadTalker_V0.0.2_512.pth",
	"SadTalker_V0.0.2_256.pth",
	]

	for name in candidates:
	p = Path(self.checkpoint_path) / name
	if p.exists():
	return str(p)

	raise FileNotFoundError(
	f"No SadTalker checkpoint found in {self.checkpoint_path}"
	)

	def _ensure_models(self, size: int, preprocess: str, facerender: str):
	self.sadtalker_paths = self.init_path(
	self.checkpoint_path,
	self.config_path,
	size,
	False,
	preprocess,
	)

	# override whatever init_path guessed
	self.sadtalker_paths["checkpoint"] = self._resolve_checkpoint()

	print("\n[PackedAvatar] Using checkpoint:")
	print(self.sadtalker_paths["checkpoint"])

	self.audio_to_coeff = self.Audio2Coeff(
	self.sadtalker_paths,
	self.device
	)

	self.preprocess_model = self.CropAndExtract(
	self.sadtalker_paths,
	self.device
	)

	if facerender == "facevid2vid" and self.device != "mps":
	self.animate_from_coeff = self.AnimateFromCoeff(
	self.sadtalker_paths,
	self.device
	)
	else:
	self.animate_from_coeff = self.AnimateFromCoeff_PIRender(
	self.sadtalker_paths,
	self.device
	)

	def generate(
	self,
	source_image=None,
	driven_audio=None,
	preprocess="crop",
	still_mode=False,
	use_enhancer=False,
	batch_size=1,
	size=256,
	pose_style=0,
	facerender="facevid2vid",
	exp_scale=1.0,
	use_ref_video=False,
	ref_video=None,
	ref_info=None,
	use_idle_mode=False,
	length_of_audio=0,
	use_blink=True,
	result_dir="./results/",
	avatar_condition=None,
	motion_condition=None,
	):
	self._load_modules()
	self._ensure_models(size=size, preprocess=preprocess, facerender=facerender)

	time_tag = str(uuid.uuid4())
	save_dir = os.path.join(result_dir, time_tag)
	os.makedirs(save_dir, exist_ok=True)

	input_dir = os.path.join(save_dir, "input")
	os.makedirs(input_dir, exist_ok=True)

	# -----------------------------
	# Audio handling
	# -----------------------------
	if driven_audio is not None and os.path.isfile(driven_audio):
	audio_name = os.path.basename(driven_audio)
	audio_path = os.path.join(input_dir, audio_name)

	if audio_name.lower().endswith(".mp3"):
	wav_path = os.path.splitext(audio_path)[0] + ".wav"
	self._mp3_to_wav(driven_audio, wav_path, 16000)
	audio_path = wav_path
	else:
	shutil.copy2(driven_audio, audio_path)

	elif use_idle_mode:
	audio_path = os.path.join(input_dir, f"idlemode_{str(length_of_audio)}.wav")
	one_sec_segment = AudioSegment.silent(duration=1000 * length_of_audio)
	one_sec_segment.export(audio_path, format="wav")
	else:
	assert use_ref_video is True and ref_info == "all", (
	"Either driven_audio, use_idle_mode, or use_ref_video/ref_info='all' must be provided."
	)

	if use_ref_video and ref_info == "all" and ref_video is not None:
	ref_video_videoname = os.path.basename(ref_video)
	audio_path = os.path.join(save_dir, ref_video_videoname + ".wav")
	cmd = f'ffmpeg -y -hide_banner -loglevel error -i "{ref_video}" "{audio_path}"'
	os.system(cmd)

	# -----------------------------
	# Avatar / source conditioning
	# -----------------------------
	if avatar_condition is not None:
	first_coeff_path, crop_pic_path, crop_info = self._materialize_avatar_condition(
	avatar_condition, save_dir
	)
	if first_coeff_path is None:
	raise AttributeError("Invalid avatar_condition bundle.")
	pic_path = crop_pic_path
	else:
	if source_image is None:
	raise ValueError("source_image is required when avatar_condition is not provided.")

	pic_path = os.path.join(input_dir, os.path.basename(source_image))
	shutil.copy2(source_image, pic_path)

	first_frame_dir = os.path.join(save_dir, "first_frame_dir")
	os.makedirs(first_frame_dir, exist_ok=True)

	first_coeff_path, crop_pic_path, crop_info = self.preprocess_model.generate(
	pic_path,
	first_frame_dir,
	preprocess,
	True,
	size,
	)

	if first_coeff_path is None:
	raise AttributeError("No face is detected")

	# -----------------------------
	# Motion conditioning / reference video
	# -----------------------------
	if motion_condition is not None:
	ref_video_coeff_path = self._materialize_motion_condition(motion_condition, save_dir)
	ref_pose_coeff_path = ref_video_coeff_path
	ref_eyeblink_coeff_path = ref_video_coeff_path
	elif use_ref_video and ref_video is not None:
	ref_video_videoname = os.path.splitext(os.path.split(ref_video)[-1])[0]
	ref_video_frame_dir = os.path.join(save_dir, ref_video_videoname)
	os.makedirs(ref_video_frame_dir, exist_ok=True)

	print("3DMM Extraction for the reference video providing pose")
	ref_video_coeff_path, _, _ = self.preprocess_model.generate(
	ref_video,
	ref_video_frame_dir,
	preprocess,
	source_image_flag=False,
	)

	if use_ref_video:
	if ref_info == "pose":
	ref_pose_coeff_path = ref_video_coeff_path
	ref_eyeblink_coeff_path = None
	elif ref_info == "blink":
	ref_pose_coeff_path = None
	ref_eyeblink_coeff_path = ref_video_coeff_path
	elif ref_info == "pose+blink":
	ref_pose_coeff_path = ref_video_coeff_path
	ref_eyeblink_coeff_path = ref_video_coeff_path
	elif ref_info == "all":
	ref_pose_coeff_path = None
	ref_eyeblink_coeff_path = None
	else:
	raise ValueError("error in ref_info")
	else:
	ref_pose_coeff_path = None
	ref_eyeblink_coeff_path = None
	else:
	ref_video_coeff_path = None
	ref_pose_coeff_path = None
	ref_eyeblink_coeff_path = None

	# -----------------------------
	# Audio -> coeff
	# -----------------------------
	if use_ref_video and ref_info == "all" and ref_video_coeff_path is not None:
	coeff_path = ref_video_coeff_path
	else:
	batch = self.get_data(
	first_coeff_path,
	audio_path,
	self.device,
	ref_eyeblink_coeff_path=ref_eyeblink_coeff_path,
	still=still_mode,
	idlemode=use_idle_mode,
	length_of_audio=length_of_audio,
	use_blink=use_blink,
	)
	coeff_path = self.audio_to_coeff.generate(
	batch,
	save_dir,
	pose_style,
	ref_pose_coeff_path,
	)

	# -----------------------------
	# coeff -> video
	# -----------------------------
	data = self.get_facerender_data(
	coeff_path,
	crop_pic_path,
	first_coeff_path,
	audio_path,
	batch_size,
	still_mode=still_mode,
	preprocess=preprocess,
	size=size,
	expression_scale=exp_scale,
	facemodel=facerender,
	)

	return_path = self.animate_from_coeff.generate(
	data,
	save_dir,
	crop_pic_path if avatar_condition is not None else pic_path,
	crop_info,
	enhancer="gfpgan" if use_enhancer else None,
	preprocess=preprocess,
	img_size=size,
	)

	video_name = data.get("video_name", "output")
	print(f"The generated video is named {video_name} in {save_dir}")

	del self.preprocess_model
	del self.audio_to_coeff
	del self.animate_from_coeff

	if torch.cuda.is_available():
	torch.cuda.empty_cache()
	torch.cuda.synchronize()

	import gc

	gc.collect()

	return return_path, audio_path, save_dir


	# ============================================================
	# PACKED AVATAR ORCHESTRATOR
	# ============================================================

	class PackedAvatar:
	def __init__(
	self,
	packed_pt_path: str = None,
	cache_dir: Optional[str] = None,
	device: Optional[str] = None,
	):
	self.packed_pt_path = Path(packed_pt_path or (Path(__file__).resolve().parent / "checkpoints" / "PackedAvatar.pt"))
	if not self.packed_pt_path.exists():
	raise FileNotFoundError(f"Packed bundle not found: {self.packed_pt_path}")

	self.device = device or (
	"cuda" if torch.cuda.is_available() else ("mps" if platform.system() == "Darwin" else "cpu")
	)

	self.cache_dir = Path(cache_dir) if cache_dir else Path(tempfile.gettempdir()) / "PackedAvatarCache"
	ensure_dir(self.cache_dir)

	self.bundle = self._load_bundle(self.packed_pt_path)
	self.manifest = self.bundle.get("manifest", {}) or {}

	self._extract_and_mount()
	self._mount_python_path()

	self.avatar_bank = self._load_avatar_bank()
	self.bria_root = self.extracted_root / "checkpoints" / "briaaiRMBG-2.0"
	self.background_remover = BriaBackgroundRemover(self.bria_root)
	self._runner_cache: Dict[Tuple[int, str, str], SadTalkerRunner] = {}

	@staticmethod
	def _load_bundle(path: Path) -> Dict[str, Any]:
	bundle = torch.load(str(path), map_location="cpu", weights_only=False)
	if not isinstance(bundle, dict):
	raise ValueError("PackedAvatar.pt did not contain a dictionary bundle.")
	return bundle

	def _asset_bytes(self, key: str) -> bytes:
	asset = self.bundle.get("assets", {}).get(key)
	if asset is None:
	raise KeyError(f"Missing asset in bundle: {key}")
	return tensor_to_bytes(asset)

	def _bundle_id(self) -> str:
	ck_hash = self.manifest.get("archives", {}).get("checkpoints_zip", {}).get("sha256", "")
	sd_hash = self.manifest.get("archives", {}).get("sadtalker_zip", {}).get("sha256", "")
	seed = f"{ck_hash}:{sd_hash}".encode("utf-8")
	return sha256_bytes(seed)[:16]

	def _extract_and_mount(self) -> None:
	bundle_id = self._bundle_id()
	runtime_root = self.cache_dir / f"packedavatar_{bundle_id}"
	self.runtime_root = runtime_root
	self.extracted_root = runtime_root / "extracted"
	ensure_dir(self.extracted_root)

	marker = runtime_root / "mount.json"
	expected = {
	"bundle_id": bundle_id,
	"checkpoints_sha256": self.manifest.get("archives", {}).get("checkpoints_zip", {}).get("sha256"),
	"sadtalker_sha256": self.manifest.get("archives", {}).get("sadtalker_zip", {}).get("sha256"),
	}

	if marker.exists():
	try:
	existing = json.loads(marker.read_text(encoding="utf-8"))
	if existing == expected:
	self.checkpoints_dir = self.extracted_root / "checkpoints"
	self.sadtalker_dir = self.extracted_root / "SadTalker"
	return
	except Exception:
	pass

	# Reset stale extraction if the bundle changed.
	if self.extracted_root.exists():
	for child in list(self.extracted_root.iterdir()):
	if child.is_dir():
	shutil.rmtree(child, ignore_errors=True)
	else:
	try:
	child.unlink()
	except Exception:
	pass

	checkpoints_zip = self._asset_bytes("checkpoints_zip")
	sadtalker_zip = self._asset_bytes("sadtalker_zip")

	# Extract both archives into the same extracted root.
	extract_zip_bytes_to_dir(checkpoints_zip, self.extracted_root)
	extract_zip_bytes_to_dir(sadtalker_zip, self.extracted_root)

	marker.write_text(json.dumps(expected, indent=2), encoding="utf-8")

	self.checkpoints_dir = self.extracted_root / "checkpoints"
	self.sadtalker_dir = self.extracted_root / "SadTalker"

	if not self.checkpoints_dir.exists():
	raise RuntimeError(f"checkpoints folder missing after extraction: {self.checkpoints_dir}")
	if not self.sadtalker_dir.exists():
	raise RuntimeError(f"SadTalker folder missing after extraction: {self.sadtalker_dir}")

	def _mount_python_path(self) -> None:
	extracted = str(self.extracted_root)
	if extracted not in sys.path:
	sys.path.insert(0, extracted)

	def _load_avatar_bank(self) -> AvatarBankRuntime:
	bank_path = self.checkpoints_dir / "AvatarBank.pt"
	if not bank_path.exists():
	raise FileNotFoundError(f"AvatarBank.pt not found inside packed checkpoints: {bank_path}")
	defaults = {
	"default_avatar": self.manifest.get("defaults", {}).get("default_avatar", ""),
	"real_style_aliases": self.manifest.get("defaults", {}).get("real_style_aliases", list(REAL_STYLE_ALIASES)),
	}
	return AvatarBankRuntime.load(bank_path, defaults=defaults)

	def _get_runner(self, size: int, preprocess: str, facerender: str) -> SadTalkerRunner:
	key = (int(size), preprocess, facerender)
	runner = self._runner_cache.get(key)
	if runner is None:
	runner = SadTalkerRunner(
	checkpoint_path=str(self.checkpoints_dir),
	config_path=str(self.sadtalker_dir / "src" / "config"),
	device=self.device,
	)
	self._runner_cache[key] = runner
	return runner


	def extract_embeddings(
	self,
	input_path: str,
	crop_or_resize: str = "crop",
	pic_size: int = 256,
	save_dir: Optional[str] = None,
	) -> Dict[str, Any]:
	"""
	Extract a conditioning bundle from a source image or reference video.

	The returned dictionary is the same kind of bundle the runtime uses
	internally for avatar conditioning and motion conditioning.
	"""
	runner = self._get_runner(size=pic_size, preprocess=crop_or_resize, facerender="facevid2vid")
	return runner.extract_embeddings(
	input_path=input_path,
	crop_or_resize=crop_or_resize,
	pic_size=pic_size,
	save_dir=save_dir,
	)

	def ExtractEmbeddings(
	self,
	input_path: str,
	crop_or_resize: str = "crop",
	pic_size: int = 256,
	save_dir: Optional[str] = None,
	) -> Dict[str, Any]:
	return self.extract_embeddings(
	input_path=input_path,
	crop_or_resize=crop_or_resize,
	pic_size=pic_size,
	save_dir=save_dir,
	)

	def _resolve_avatar_condition_from_bank(self, avatar_id: Optional[str]) -> Dict[str, Any]:
	if avatar_id is None:
	avatar_id = self.avatar_bank.resolve_default_avatar_id()
	return self.avatar_bank.build_avatar_condition(avatar_id)

	def _normalize_avatar_condition(self, avatar_condition: Any) -> Optional[Dict[str, Any]]:
	bundle = safe_load_bundle(avatar_condition)
	if bundle is None:
	return None
	if "coeff_3dmm" not in bundle:
	if "motion_3dmm" in bundle and bundle["motion_3dmm"] is not None:
	bundle["coeff_3dmm"] = bundle["motion_3dmm"]
	elif "full_3dmm" in bundle and bundle["full_3dmm"] is not None:
	bundle["coeff_3dmm"] = bundle["full_3dmm"]
	return bundle

	def _remove_background_if_requested(
	self,
	source_image: Optional[str],
	remove_background: bool,
	work_dir: Path,
	) -> Optional[Path]:
	if source_image is None:
	return None

	src = Path(source_image)
	if not src.exists():
	raise FileNotFoundError(str(src))

	ensure_dir(work_dir)
	staged = work_dir / src.name
	shutil.copy2(src, staged)

	if not remove_background:
	return prepare_image_for_sadtalker(staged)

	# Best-effort background removal using the packed Bria folder.
	try:
	removed = self.background_remover.remove_background(staged, work_dir)
	return prepare_image_for_sadtalker(staged, removed)
	except Exception as e:
	raise RuntimeError(
	f"remove_background=True was requested, but Bria RMBG execution failed: {e}"
	) from e

	def _run_wav2lip_gan(
	self,
	face_video: str,
	audio_path: str,
	save_dir: str,
	wav2lip_repo: Optional[str] = None,
	) -> str:
	wav2lip_checkpoint = self.checkpoints_dir / "wav2lip_gan.pth"
	if not wav2lip_checkpoint.is_file():
	raise FileNotFoundError(
	f"Could not find bundled Wav2Lip GAN checkpoint at: {wav2lip_checkpoint}"
	)

	candidate_repos = []
	if wav2lip_repo:
	candidate_repos.append(Path(wav2lip_repo))

	# Prefer packed locations first.
	candidate_repos.extend([
	self.checkpoints_dir / "Wav2Lip",
	self.sadtalker_dir / "Wav2Lip",
	Path(__file__).resolve().parent / "Wav2Lip",
	])

	repo = None
	for candidate in candidate_repos:
	if candidate is None:
	continue
	inference_py = candidate / "inference.py"
	if inference_py.is_file():
	repo = candidate
	break

	# No error just because wav2lip_repo was not passed.
	# If we cannot find runnable Wav2Lip code anywhere, fall back gracefully.
	if repo is None:
	print(
	"[PackedAvatar] Wav2Lip inference code was not found; "
	"skipping Wav2Lip post-processing and returning the SadTalker video."
	)
	return face_video

	inference_py = repo / "inference.py"

	out_video = os.path.join(save_dir, f"{Path(face_video).stem}_wav2lip_gan.mp4")
	cmd = [
	sys.executable,
	str(inference_py),
	"--checkpoint_path",
	str(wav2lip_checkpoint),
	"--face",
	str(face_video),
	"--audio",
	str(audio_path),
	"--outfile",
	str(out_video),
	]
	subprocess.run(cmd, cwd=str(repo), check=True)
	return out_video

	def list_avatars(self):
	return self.avatar_bank.list_avatars()

	def query_avatars(self, args, *kwargs):
	return self.avatar_bank.query(args, *kwargs)

	def fuzzy_search_avatar(self, query, n=5, cutoff=0.5):
	return self.avatar_bank.fuzzy_search_id(query, n, cutoff)

	def get_avatar(self, avatar_id):
	return self.avatar_bank.get_avatar(avatar_id)

	def get_avatar_preview(self, avatar_id):
	return self.avatar_bank.get_preview(avatar_id)

	def get_avatar_metadata(self, avatar_id):
	return self.avatar_bank.get_metadata(avatar_id)

	def delete_avatar(self, avatar_id):
	self.avatar_bank.delete_avatar(avatar_id)

	def save_avatar_bank(self, path):
	self.avatar_bank.save(path)

	def generate(
	self,
	source_image: Optional[str] = None,
	driven_audio: Optional[str] = None,
	preprocess: str = "crop",
	still_mode: bool = False,
	use_enhancer: bool = False,
	batch_size: int = 1,
	size: int = 256,
	pose_style: int = 0,
	facerender: str = "facevid2vid",
	exp_scale: float = 1.0,
	use_ref_video: bool = False,
	ref_video: Optional[str] = None,
	ref_info: Optional[str] = None,
	use_idle_mode: bool = False,
	length_of_audio: int = 0,
	use_blink: bool = True,
	result_dir: str = "./results/",
	avatar_id: Optional[str] = None,
	avatar_condition: Optional[Any] = None,
	motion_condition: Optional[Any] = None,
	remove_background: bool = False,
	use_wav2lip: bool = False,
	wav2lip_repo: Optional[str] = None,
	) -> str:
	runner = self._get_runner(size=size, preprocess=preprocess, facerender=facerender)
	ensure_dir(Path(result_dir))

	# If the caller did not provide a source image or explicit avatar condition,
	# use the bank. If a source image is provided, it stays in the SadTalker path.
	resolved_avatar_condition = self._normalize_avatar_condition(avatar_condition)
	source_image_for_runner: Optional[str] = source_image

	if resolved_avatar_condition is None:
	if source_image_for_runner is None:
	resolved_avatar_condition = self._resolve_avatar_condition_from_bank(avatar_id)
	else:
	# source_image path will be used directly by SadTalker; optionally background remove it.
	source_work_dir = self.runtime_root / "source_work"
	ensure_dir(source_work_dir)
	prepared = self._remove_background_if_requested(source_image_for_runner, remove_background, source_work_dir)
	source_image_for_runner = str(prepared) if prepared is not None else source_image_for_runner
	else:
	# If an explicit avatar_condition is supplied, it supersedes source_image-driven conditioning.
	source_image_for_runner = None

	# When avatar_id is explicitly selected and no source_image/condition was given,
	# build the corresponding condition from the packed AvatarBank.
	if resolved_avatar_condition is None and source_image_for_runner is None:
	resolved_avatar_condition = self._resolve_avatar_condition_from_bank(avatar_id)

	return_path, audio_path, save_dir = runner.generate(
	source_image=source_image_for_runner,
	driven_audio=driven_audio,
	preprocess=preprocess,
	still_mode=still_mode,
	use_enhancer=use_enhancer,
	batch_size=batch_size,
	size=size,
	pose_style=pose_style,
	facerender=facerender,
	exp_scale=exp_scale,
	use_ref_video=use_ref_video,
	ref_video=ref_video,
	ref_info=ref_info,
	use_idle_mode=use_idle_mode,
	length_of_audio=length_of_audio,
	use_blink=use_blink,
	result_dir=result_dir,
	avatar_condition=resolved_avatar_condition,
	motion_condition=motion_condition,
	)

	if use_wav2lip:
	return_path = self._run_wav2lip_gan(
	face_video=return_path,
	audio_path=audio_path,
	save_dir=save_dir,
	wav2lip_repo=wav2lip_repo,
	)

	return return_path


	PackedAvatarModel = PackedAvatar


	# ============================================================
	# CLI
	# ============================================================

	def build_parser() -> argparse.ArgumentParser:
	p = argparse.ArgumentParser(description="Run the packed avatar bundle.")
	p.add_argument("--packed-pt", type=Path, default=Path(__file__).resolve().parent / "PackedAvatar.pt")
	p.add_argument("--cache-dir", type=Path, default=None)
	p.add_argument("--device", type=str, default=None)
	p.add_argument("--source-image", type=Path, default=None)
	p.add_argument("--driven-audio", type=Path, default="speech.wav")
	p.add_argument("--avatar-id", type=str, default=None)
	p.add_argument("--avatar-condition", type=Path, default=None)
	p.add_argument("--motion-condition", type=Path, default=None)
	p.add_argument("--remove-background", action="store_true")
	p.add_argument("--use-wav2lip", action="store_true", default=True)
	p.add_argument("--wav2lip-repo", type=Path, default=None)
	p.add_argument("--result-dir", type=Path, default=Path("./results"))
	p.add_argument("--preprocess", type=str, default="crop")
	p.add_argument("--size", type=int, default=256)
	p.add_argument("--facerender", type=str, default="facevid2vid")
	p.add_argument("--still-mode", action="store_true")
	p.add_argument("--use-enhancer", action="store_true")
	p.add_argument("--batch-size", type=int, default=1)
	p.add_argument("--pose-style", type=int, default=0)
	p.add_argument("--exp-scale", type=float, default=1.0)
	p.add_argument("--use-ref-video", action="store_true")
	p.add_argument("--ref-video", type=Path, default=None)
	p.add_argument("--ref-info", type=str, default=None)
	p.add_argument("--use-idle-mode", action="store_true")
	p.add_argument("--length-of-audio", type=int, default=0)
	p.add_argument("--use-blink", action="store_true", default=True)
	p.add_argument("--no-blink", action="store_false", dest="use_blink")
	p.add_argument("--manual-audio", action="store_true", help="Alias for driven-audio handling; kept for clarity.")
	return p


	def main() -> None:
	parser = build_parser()
	args = parser.parse_args()

	model = PackedAvatar(
	packed_pt_path=str(args.packed_pt),
	cache_dir=str(args.cache_dir) if args.cache_dir else None,
	device=args.device,
	)

	avatar_condition = args.avatar_condition if args.avatar_condition else None
	motion_condition = args.motion_condition if args.motion_condition else None

	output = model.generate(
	source_image=str(args.source_image) if args.source_image else None,
	driven_audio=str(args.driven_audio) if args.driven_audio else None,
	preprocess=args.preprocess,
	still_mode=args.still_mode,
	use_enhancer=args.use_enhancer,
	batch_size=args.batch_size,
	size=args.size,
	pose_style=args.pose_style,
	facerender=args.facerender,
	exp_scale=args.exp_scale,
	use_ref_video=args.use_ref_video,
	ref_video=str(args.ref_video) if args.ref_video else None,
	ref_info=args.ref_info,
	use_idle_mode=args.use_idle_mode,
	length_of_audio=args.length_of_audio,
	use_blink=args.use_blink,
	result_dir=str(args.result_dir),
	avatar_id=args.avatar_id,
	avatar_condition=str(avatar_condition) if avatar_condition else None,
	motion_condition=str(motion_condition) if motion_condition else None,
	remove_background=args.remove_background,
	use_wav2lip=args.use_wav2lip,
	wav2lip_repo=str(args.wav2lip_repo) if args.wav2lip_repo else None,
	)

	print(output)


	if __name__ == "__main__":
	main()