diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..a6344aac8c09253b3b630fb776ae94478aa0275b --- /dev/null +++ b/.gitattributes @@ -0,0 +1,35 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..0cff484033362ac4616c0085b4eb726f0154f53d --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +**/__pycache__ +.ipynb_checkpoints +# ckpts \ No newline at end of file diff --git a/assets/demo/an antique shop.jpg b/assets/demo/an antique shop.jpg new file mode 100644 index 0000000000000000000000000000000000000000..4ac3133dbd043e916b0a79a100f77824e72c43ca Binary files /dev/null and b/assets/demo/an antique shop.jpg differ diff --git a/assets/demo/clock ticking.wav b/assets/demo/clock ticking.wav new file mode 100644 index 0000000000000000000000000000000000000000..07fe53f4f6929e47ec562a8d351e6f1d4fcd3fb3 Binary files /dev/null and b/assets/demo/clock ticking.wav differ diff --git a/ckpts/imagebind_huge.pth b/ckpts/imagebind_huge.pth new file mode 120000 index 0000000000000000000000000000000000000000..b5b417db27cf1245f117359df739c6787b868ab2 --- /dev/null +++ b/ckpts/imagebind_huge.pth @@ -0,0 +1 @@ +/home/jacklishufan/AudioLDM2/.checkpoints/imagebind_huge.pth \ No newline at end of file diff --git a/ckpts/llm b/ckpts/llm new file mode 120000 index 0000000000000000000000000000000000000000..0ca57910c38b981db010dd82c60d96f28361ecae --- /dev/null +++ b/ckpts/llm @@ -0,0 +1 @@ +/home/jacklishufan/LLaVA/outputs/llava-v1.5-7b-pretrain-any2any2align/ \ No newline at end of file diff --git a/ckpts/prior/model.bin b/ckpts/prior/model.bin new file mode 120000 index 0000000000000000000000000000000000000000..00d6daa1dfc9d844b85b601a9b5ce0718b2c50fe --- /dev/null +++ b/ckpts/prior/model.bin @@ -0,0 +1 @@ +/home/jacklishufan/AudioLDM2/diffusion_prior_3.bin \ No newline at end of file diff --git a/ckpts/sdxl b/ckpts/sdxl new file mode 120000 index 0000000000000000000000000000000000000000..c1629b1c3dcaaf42bb25cf2a4dc8b5c787b28ef3 --- /dev/null +++ b/ckpts/sdxl @@ -0,0 +1 @@ +/localhome/data/ckpts/jacklishufan/sdxl/ \ No newline at end of file diff --git a/demo.ipynb b/demo.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..64652058cdc065d4cb177a47ea92b4fe5156cbc8 --- /dev/null +++ b/demo.ipynb @@ -0,0 +1,1638 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "b2ba62eb-031b-4558-9500-77ae6baeb472", + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "a1652f17-2f24-4e1f-bc44-9227d4d5207f", + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "from PIL import Image" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "93ee78f8-a730-43ba-a5c8-a9737ee0af18", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/jacklishufan/miniconda3/envs/llava/lib/python3.10/site-packages/torchvision/transforms/functional_tensor.py:5: UserWarning: The torchvision.transforms.functional_tensor module is deprecated in 0.15 and will be **removed in 0.17**. Please don't rely on it. You probably just need to use APIs in torchvision.transforms.functional or in torchvision.transforms.v2.functional.\n", + " warnings.warn(\n", + "/home/jacklishufan/miniconda3/envs/llava/lib/python3.10/site-packages/torchvision/transforms/_functional_video.py:6: UserWarning: The 'torchvision.transforms._functional_video' module is deprecated since 0.12 and will be removed in the future. Please use the 'torchvision.transforms.functional' module instead.\n", + " warnings.warn(\n", + "/home/jacklishufan/miniconda3/envs/llava/lib/python3.10/site-packages/torchvision/transforms/_transforms_video.py:22: UserWarning: The 'torchvision.transforms._transforms_video' module is deprecated since 0.12 and will be removed in the future. Please use the 'torchvision.transforms' module instead.\n", + " warnings.warn(\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "87fe5c2edac149ab8d9719bbdc23525a", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Loading pipeline components...: 0%| | 0/7 [00:00 to