Upload config.json

Browse files

Files changed (1) hide show

config.json +175 -0

config.json ADDED Viewed

	@@ -0,0 +1,175 @@

+{
+  "model_type": "packedavatar",
+  "library_name": "packed_avatar",
+  "bundle_file": "PackedAvatar.pt",
+  "runtime_script": "PackedAvatar.py",
+  "version": 1,
+  "architecture": {
+    "animation_engine": "SadTalker",
+    "audio_to_coeff": "Audio2Coeff",
+    "face_renderer": "FaceVid2Vid|PIRender (fallback)",
+    "wav2lip": "Wav2Lip GAN (optional)",
+    "rmbg": "Bria RMBG 2.0",
+    "avatar_bank": "AvatarBank",
+    "bfm": "BFM face model assets"
+  },
+  "bundle_layout": {
+    "archives": {
+      "checkpoints_zip": "checkpoints.zip",
+      "sadtalker_zip": "sadtalker.zip"
+    },
+    "checkpoints": {
+      "avatar_bank": "checkpoints/AvatarBank.pt",
+      "wav2lip_gan": "checkpoints/wav2lip_gan.pth",
+      "bria_rmbg": "checkpoints/briaaiRMBG-2.0/",
+      "bfm": "checkpoints/BFM/",
+      "face_detectors": "checkpoints/face_detectors/"
+    },
+    "sadtalker": {
+      "source": "SadTalker/",
+      "hub_checkpoints": "SadTalker/hub/checkpoints/"
+    },
+    "manifest": "manifest.json",
+    "metadata": "manifest"
+  },
+  "media": {
+    "audio": {
+      "sample_rate": 16000,
+      "accepted_formats": [
+        "wav",
+        "mp3",
+        "m4a"
+      ],
+      "auto_convert_to_wav": true
+    },
+    "video": {
+      "output_format": "mp4",
+      "preview_supported": true,
+      "accepted_reference_formats": [
+        "mp4",
+        "mov",
+        "mkv",
+        "webm"
+      ]
+    },
+    "image": {
+      "accepted_formats": [
+        "png",
+        "jpg",
+        "jpeg",
+        "webp"
+      ],
+      "auto_composite_alpha": true
+    }
+  },
+  "defaults": {
+    "default_avatar": "",
+    "device_priority": [
+      "cuda",
+      "mps",
+      "cpu"
+    ],
+    "use_wav2lip_by_default": false,
+    "remove_background_by_default": false,
+    "cache_dir": "<system_temp>/PackedAvatarCache",
+    "cache_validation": "sha256(checkpoints_zip + sadtalker_zip)"
+  },
+  "avatar_bank": {
+    "included": true,
+    "avatar_count": 100,
+    "styles": [
+      "anime",
+      "cyber",
+      "drawn",
+      "paint",
+      "real"
+    ],
+    "resolution_previews": true,
+    "preview_formats": [
+      "png",
+      "zstd-compressed-png"
+    ]
+  },
+  "conditioning": {
+    "avatar_condition_formats": [
+      ".pt",
+      ".pth",
+      ".mat",
+      "dict"
+    ],
+    "motion_condition_formats": [
+      ".pt",
+      ".pth",
+      ".mat",
+      "dict"
+    ],
+    "legacy_normalization": [
+      "motion_3dmm",
+      "full_3dmm",
+      "coeff_3dmm"
+    ]
+  },
+  "runtime_behavior": {
+    "lazy_module_loading": true,
+    "cache_invalidation_on_bundle_change": true,
+    "avatar_resolution_priority": [
+      "avatar_condition",
+      "source_image",
+      "avatar_id",
+      "default_avatar"
+    ],
+    "background_removal_pipeline": "Input Image -> Bria RMBG -> Foreground -> SadTalker -> MP4",
+    "post_processing": {
+      "wav2lip": "optional post-pass",
+      "enhancer": "gfpgan (optional)"
+    },
+    "memory_cleanup": [
+      "del preprocess_model",
+      "del audio_to_coeff",
+      "del animate_from_coeff",
+      "torch.cuda.empty_cache()",
+      "gc.collect()"
+    ]
+  },
+  "supported_commands": [
+    "generate",
+    "extract-embeddings",
+    "list-avatars",
+    "list-bundle-contents",
+    "download-bundle",
+    "validate-bundle",
+    "cleanup-cache"
+  ],
+  "cli_examples": {
+    "basic": "python PackedAvatar.py --source-image person.jpg --driven-audio speech.wav",
+    "avatarbank": "python PackedAvatar.py --avatar-id Rebecca --driven-audio speech.wav",
+    "background_removal": "python PackedAvatar.py --source-image portrait.png --driven-audio speech.wav --remove-background",
+    "wav2lip": "python PackedAvatar.py --source-image portrait.png --driven-audio speech.wav --use-wav2lip"
+  },
+  "input_files": [
+    "PackedAvatar.pt",
+    "PackedAvatar.py",
+    "requirements.txt",
+    "README.md"
+  ],
+  "query_file": true,
+  "recommended_env": {
+    "python": [
+      "3.10",
+      "3.11"
+    ],
+    "pytorch": "2.x (CUDA-enabled for GPU)",
+    "ffmpeg": "required for reference-video audio extraction"
+  },
+  "performance_guidance": {
+    "256_resolution_vram": "4-6GB (recommended)",
+    "512_resolution_vram": "8-12GB (recommended)",
+    "high_quality_vram": "12+ GB",
+    "notes": "First run slower due to extraction; subsequent runs reuse cache and start faster."
+  },
+  "security_and_ethics": {
+    "bundle_trust": "Bundle is treated as a trusted runtime artifact",
+    "ethical_use_note": "Users must obtain consent for generating talking-head videos of real people",
+    "license": "apache-2.0 (see repo)"
+  }
+}