| { | |
| "model_type": "packedavatar", | |
| "library_name": "packed_avatar", | |
| "bundle_file": "PackedAvatar.pt", | |
| "runtime_script": "PackedAvatar.py", | |
| "version": 1, | |
| "architecture": { | |
| "animation_engine": "SadTalker", | |
| "audio_to_coeff": "Audio2Coeff", | |
| "face_renderer": "FaceVid2Vid|PIRender (fallback)", | |
| "wav2lip": "Wav2Lip GAN (optional)", | |
| "rmbg": "Bria RMBG 2.0", | |
| "avatar_bank": "AvatarBank", | |
| "bfm": "BFM face model assets" | |
| }, | |
| "bundle_layout": { | |
| "archives": { | |
| "checkpoints_zip": "checkpoints.zip", | |
| "sadtalker_zip": "sadtalker.zip" | |
| }, | |
| "checkpoints": { | |
| "avatar_bank": "checkpoints/AvatarBank.pt", | |
| "wav2lip_gan": "checkpoints/wav2lip_gan.pth", | |
| "bria_rmbg": "checkpoints/briaaiRMBG-2.0/", | |
| "bfm": "checkpoints/BFM/", | |
| "face_detectors": "checkpoints/face_detectors/" | |
| }, | |
| "sadtalker": { | |
| "source": "SadTalker/", | |
| "hub_checkpoints": "SadTalker/hub/checkpoints/" | |
| }, | |
| "manifest": "manifest.json", | |
| "metadata": "manifest" | |
| }, | |
| "media": { | |
| "audio": { | |
| "sample_rate": 16000, | |
| "accepted_formats": [ | |
| "wav", | |
| "mp3", | |
| "m4a" | |
| ], | |
| "auto_convert_to_wav": true | |
| }, | |
| "video": { | |
| "output_format": "mp4", | |
| "preview_supported": true, | |
| "accepted_reference_formats": [ | |
| "mp4", | |
| "mov", | |
| "mkv", | |
| "webm" | |
| ] | |
| }, | |
| "image": { | |
| "accepted_formats": [ | |
| "png", | |
| "jpg", | |
| "jpeg", | |
| "webp" | |
| ], | |
| "auto_composite_alpha": true | |
| } | |
| }, | |
| "defaults": { | |
| "default_avatar": "", | |
| "device_priority": [ | |
| "cuda", | |
| "mps", | |
| "cpu" | |
| ], | |
| "use_wav2lip_by_default": false, | |
| "remove_background_by_default": false, | |
| "cache_dir": "<system_temp>/PackedAvatarCache", | |
| "cache_validation": "sha256(checkpoints_zip + sadtalker_zip)" | |
| }, | |
| "avatar_bank": { | |
| "included": true, | |
| "avatar_count": 100, | |
| "styles": [ | |
| "anime", | |
| "cyber", | |
| "drawn", | |
| "paint", | |
| "real" | |
| ], | |
| "resolution_previews": true, | |
| "preview_formats": [ | |
| "png", | |
| "zstd-compressed-png" | |
| ] | |
| }, | |
| "conditioning": { | |
| "avatar_condition_formats": [ | |
| ".pt", | |
| ".pth", | |
| ".mat", | |
| "dict" | |
| ], | |
| "motion_condition_formats": [ | |
| ".pt", | |
| ".pth", | |
| ".mat", | |
| "dict" | |
| ], | |
| "legacy_normalization": [ | |
| "motion_3dmm", | |
| "full_3dmm", | |
| "coeff_3dmm" | |
| ] | |
| }, | |
| "runtime_behavior": { | |
| "lazy_module_loading": true, | |
| "cache_invalidation_on_bundle_change": true, | |
| "avatar_resolution_priority": [ | |
| "avatar_condition", | |
| "source_image", | |
| "avatar_id", | |
| "default_avatar" | |
| ], | |
| "background_removal_pipeline": "Input Image -> Bria RMBG -> Foreground -> SadTalker -> MP4", | |
| "post_processing": { | |
| "wav2lip": "optional post-pass", | |
| "enhancer": "gfpgan (optional)" | |
| }, | |
| "memory_cleanup": [ | |
| "del preprocess_model", | |
| "del audio_to_coeff", | |
| "del animate_from_coeff", | |
| "torch.cuda.empty_cache()", | |
| "gc.collect()" | |
| ] | |
| }, | |
| "supported_commands": [ | |
| "generate", | |
| "extract-embeddings", | |
| "list-avatars", | |
| "list-bundle-contents", | |
| "download-bundle", | |
| "validate-bundle", | |
| "cleanup-cache" | |
| ], | |
| "cli_examples": { | |
| "basic": "python PackedAvatar.py --source-image person.jpg --driven-audio speech.wav", | |
| "avatarbank": "python PackedAvatar.py --avatar-id Rebecca --driven-audio speech.wav", | |
| "background_removal": "python PackedAvatar.py --source-image portrait.png --driven-audio speech.wav --remove-background", | |
| "wav2lip": "python PackedAvatar.py --source-image portrait.png --driven-audio speech.wav --use-wav2lip" | |
| }, | |
| "input_files": [ | |
| "PackedAvatar.pt", | |
| "PackedAvatar.py", | |
| "requirements.txt", | |
| "README.md" | |
| ], | |
| "query_file": true, | |
| "recommended_env": { | |
| "python": [ | |
| "3.10", | |
| "3.11" | |
| ], | |
| "pytorch": "2.x (CUDA-enabled for GPU)", | |
| "ffmpeg": "required for reference-video audio extraction" | |
| }, | |
| "performance_guidance": { | |
| "256_resolution_vram": "4-6GB (recommended)", | |
| "512_resolution_vram": "8-12GB (recommended)", | |
| "high_quality_vram": "12+ GB", | |
| "notes": "First run slower due to extraction; subsequent runs reuse cache and start faster." | |
| }, | |
| "security_and_ethics": { | |
| "bundle_trust": "Bundle is treated as a trusted runtime artifact", | |
| "ethical_use_note": "Users must obtain consent for generating talking-head videos of real people", | |
| "license": "apache-2.0 (see repo)" | |
| } | |
| } | |