config.json · HiMind/Packed-Avatar at main

File size: 4,706 Bytes

4e9190a

{
  "model_type": "packedavatar",
  "library_name": "packed_avatar",
  "bundle_file": "PackedAvatar.pt",
  "runtime_script": "PackedAvatar.py",
  "version": 1,
  "architecture": {
    "animation_engine": "SadTalker",
    "audio_to_coeff": "Audio2Coeff",
    "face_renderer": "FaceVid2Vid|PIRender (fallback)",
    "wav2lip": "Wav2Lip GAN (optional)",
    "rmbg": "Bria RMBG 2.0",
    "avatar_bank": "AvatarBank",
    "bfm": "BFM face model assets"
  },
  "bundle_layout": {
    "archives": {
      "checkpoints_zip": "checkpoints.zip",
      "sadtalker_zip": "sadtalker.zip"
    },
    "checkpoints": {
      "avatar_bank": "checkpoints/AvatarBank.pt",
      "wav2lip_gan": "checkpoints/wav2lip_gan.pth",
      "bria_rmbg": "checkpoints/briaaiRMBG-2.0/",
      "bfm": "checkpoints/BFM/",
      "face_detectors": "checkpoints/face_detectors/"
    },
    "sadtalker": {
      "source": "SadTalker/",
      "hub_checkpoints": "SadTalker/hub/checkpoints/"
    },
    "manifest": "manifest.json",
    "metadata": "manifest"
  },
  "media": {
    "audio": {
      "sample_rate": 16000,
      "accepted_formats": [
        "wav",
        "mp3",
        "m4a"
      ],
      "auto_convert_to_wav": true
    },
    "video": {
      "output_format": "mp4",
      "preview_supported": true,
      "accepted_reference_formats": [
        "mp4",
        "mov",
        "mkv",
        "webm"
      ]
    },
    "image": {
      "accepted_formats": [
        "png",
        "jpg",
        "jpeg",
        "webp"
      ],
      "auto_composite_alpha": true
    }
  },
  "defaults": {
    "default_avatar": "",
    "device_priority": [
      "cuda",
      "mps",
      "cpu"
    ],
    "use_wav2lip_by_default": false,
    "remove_background_by_default": false,
    "cache_dir": "<system_temp>/PackedAvatarCache",
    "cache_validation": "sha256(checkpoints_zip + sadtalker_zip)"
  },
  "avatar_bank": {
    "included": true,
    "avatar_count": 100,
    "styles": [
      "anime",
      "cyber",
      "drawn",
      "paint",
      "real"
    ],
    "resolution_previews": true,
    "preview_formats": [
      "png",
      "zstd-compressed-png"
    ]
  },
  "conditioning": {
    "avatar_condition_formats": [
      ".pt",
      ".pth",
      ".mat",
      "dict"
    ],
    "motion_condition_formats": [
      ".pt",
      ".pth",
      ".mat",
      "dict"
    ],
    "legacy_normalization": [
      "motion_3dmm",
      "full_3dmm",
      "coeff_3dmm"
    ]
  },
  "runtime_behavior": {
    "lazy_module_loading": true,
    "cache_invalidation_on_bundle_change": true,
    "avatar_resolution_priority": [
      "avatar_condition",
      "source_image",
      "avatar_id",
      "default_avatar"
    ],
    "background_removal_pipeline": "Input Image -> Bria RMBG -> Foreground -> SadTalker -> MP4",
    "post_processing": {
      "wav2lip": "optional post-pass",
      "enhancer": "gfpgan (optional)"
    },
    "memory_cleanup": [
      "del preprocess_model",
      "del audio_to_coeff",
      "del animate_from_coeff",
      "torch.cuda.empty_cache()",
      "gc.collect()"
    ]
  },
  "supported_commands": [
    "generate",
    "extract-embeddings",
    "list-avatars",
    "list-bundle-contents",
    "download-bundle",
    "validate-bundle",
    "cleanup-cache"
  ],
  "cli_examples": {
    "basic": "python PackedAvatar.py --source-image person.jpg --driven-audio speech.wav",
    "avatarbank": "python PackedAvatar.py --avatar-id Rebecca --driven-audio speech.wav",
    "background_removal": "python PackedAvatar.py --source-image portrait.png --driven-audio speech.wav --remove-background",
    "wav2lip": "python PackedAvatar.py --source-image portrait.png --driven-audio speech.wav --use-wav2lip"
  },
  "input_files": [
    "PackedAvatar.pt",
    "PackedAvatar.py",
    "requirements.txt",
    "README.md"
  ],
  "query_file": true,
  "recommended_env": {
    "python": [
      "3.10",
      "3.11"
    ],
    "pytorch": "2.x (CUDA-enabled for GPU)",
    "ffmpeg": "required for reference-video audio extraction"
  },
  "performance_guidance": {
    "256_resolution_vram": "4-6GB (recommended)",
    "512_resolution_vram": "8-12GB (recommended)",
    "high_quality_vram": "12+ GB",
    "notes": "First run slower due to extraction; subsequent runs reuse cache and start faster."
  },
  "security_and_ethics": {
    "bundle_trust": "Bundle is treated as a trusted runtime artifact",
    "ethical_use_note": "Users must obtain consent for generating talking-head videos of real people",
    "license": "apache-2.0 (see repo)"
  }
}