# Core dependencies numpy einops librosa pydub torch torchaudio torchvision torchcodec torchdiffeq transformers>=4.54.0 nvidia-ml-py yt-dlp>=2024.12.23 ffmpeg # Audio / multimodal models audiobox_aesthetics # Git-based dependencies git+https://github.com/huggingface/datasets.git git+https://github.com/huggingface/peft.git git+https://github.com/facebookresearch/sam3.git git+https://github.com/huggingface/accelerate.git dacvae @ git+https://github.com/facebookresearch/dacvae.git imagebind @ git+https://github.com/facebookresearch/ImageBind.git laion-clap @ git+https://github.com/lematt1991/CLAP.git git+https://github.com/wookayin/nvidia-ml-py.git #perception-models @ git+https://github.com/facebookresearch/perception_models@unpin-deps perception-models@git+https://github.com/hx23840/perception_models@unpin-deps