jaxmetaverse commited on
Commit
aaa9617
·
verified ·
1 Parent(s): 5158d75

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +2 -0
  2. models/checkpoints/2.5D_V1.0.safetensors +3 -0
  3. models/checkpoints/3Guofeng3_v34.safetensors +3 -0
  4. models/checkpoints/RealisticModel_v2.safetensors +3 -0
  5. models/checkpoints/SUPIR-v0Q_fp16.safetensors +3 -0
  6. models/checkpoints/absolutereality_v181.safetensors +3 -0
  7. models/checkpoints/animerge_v27.safetensors +3 -0
  8. models/checkpoints/anyloraCheckpoint_bakedvaeBlessedFp16.safetensors +3 -0
  9. models/checkpoints/ayu_realistic.safetensors +3 -0
  10. models/checkpoints/cardosAnime_v20.safetensors +3 -0
  11. models/checkpoints/cuteyukimixAdorable_kemiaomiao.safetensors +3 -0
  12. models/checkpoints/cyberrealistic_classicV31.safetensors +3 -0
  13. models/checkpoints/darkSushiMixMix_225D.safetensors +3 -0
  14. models/checkpoints/disneyPixarCartoon_v10.safetensors +3 -0
  15. models/checkpoints/dreamshaper_8.safetensors +3 -0
  16. models/checkpoints/epicdream_lullaby.safetensors +3 -0
  17. models/checkpoints/flat2DAnimerge_v30.safetensors +3 -0
  18. models/checkpoints/juggernautXL_v9Rdphoto2Lightning.safetensors +3 -0
  19. models/checkpoints/juggernaut_aftermath.safetensors +3 -0
  20. models/checkpoints/kantanmixSD15_v10.safetensors +3 -0
  21. models/checkpoints/ltx-video-2b-v0.9.1.safetensors +3 -0
  22. models/checkpoints/ltx-video-2b-v0.9.safetensors +3 -0
  23. models/checkpoints/majicmixRealistic_v7.safetensors +3 -0
  24. models/checkpoints/manmaruMix_v30.safetensors +3 -0
  25. models/checkpoints/maturemalemix_v14.safetensors +3 -0
  26. models/checkpoints/meinamix_meinaV11.safetensors +3 -0
  27. models/checkpoints/memo/.gitattributes +36 -0
  28. models/checkpoints/memo/README.md +77 -0
  29. models/checkpoints/memo/audio_proj/config.json +4 -0
  30. models/checkpoints/memo/audio_proj/diffusion_pytorch_model.safetensors +3 -0
  31. models/checkpoints/memo/diffusion_net/config.json +87 -0
  32. models/checkpoints/memo/diffusion_net/diffusion_pytorch_model.safetensors +3 -0
  33. models/checkpoints/memo/image_proj/config.json +4 -0
  34. models/checkpoints/memo/image_proj/diffusion_pytorch_model.safetensors +3 -0
  35. models/checkpoints/memo/misc/audio_emotion_classifier/config.json +4 -0
  36. models/checkpoints/memo/misc/audio_emotion_classifier/diffusion_pytorch_model.safetensors +3 -0
  37. models/checkpoints/memo/misc/face_analysis/glintr100.onnx +3 -0
  38. models/checkpoints/memo/misc/face_analysis/misc/face_analysis/models/1k3d68.onnx +3 -0
  39. models/checkpoints/memo/misc/face_analysis/misc/face_analysis/models/2d106det.onnx +3 -0
  40. models/checkpoints/memo/misc/face_analysis/misc/face_analysis/models/face_landmarker_v2_with_blendshapes.task +3 -0
  41. models/checkpoints/memo/misc/face_analysis/misc/face_analysis/models/genderage.onnx +3 -0
  42. models/checkpoints/memo/misc/face_analysis/misc/face_analysis/models/glintr100.onnx +3 -0
  43. models/checkpoints/memo/misc/face_analysis/misc/face_analysis/models/scrfd_10g_bnkps.onnx +3 -0
  44. models/checkpoints/memo/misc/face_analysis/models.json +13 -0
  45. models/checkpoints/memo/misc/face_analysis/models/1k3d68.onnx +3 -0
  46. models/checkpoints/memo/misc/face_analysis/models/2d106det.onnx +3 -0
  47. models/checkpoints/memo/misc/face_analysis/models/face_landmarker_v2_with_blendshapes.task +3 -0
  48. models/checkpoints/memo/misc/face_analysis/models/genderage.onnx +3 -0
  49. models/checkpoints/memo/misc/face_analysis/models/scrfd_10g_bnkps.onnx +3 -0
  50. models/checkpoints/memo/misc/vocal_separator/Kim_Vocal_2.onnx +3 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ models/checkpoints/memo/misc/face_analysis/misc/face_analysis/models/face_landmarker_v2_with_blendshapes.task filter=lfs diff=lfs merge=lfs -text
37
+ models/checkpoints/memo/misc/face_analysis/models/face_landmarker_v2_with_blendshapes.task filter=lfs diff=lfs merge=lfs -text
models/checkpoints/2.5D_V1.0.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00c373fc73809d546d72a6b640c864857b5c5ed91be6fa892afa88e66249e384
3
+ size 2132625910
models/checkpoints/3Guofeng3_v34.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a83e25fe5b70bad595fe4dd6733ee35f0e3ddf8ed4041ab360f9573556e8b3e6
3
+ size 2299933688
models/checkpoints/RealisticModel_v2.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eaa0bb415a08b4f480ad41659eafa8062cafd3857424571200d9c74cb8be7b86
3
+ size 2132621438
models/checkpoints/SUPIR-v0Q_fp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3eef33ec7633122ca23b1e5ef167faa048b5a0845768694d5e8070138ac013ce
3
+ size 2664858464
models/checkpoints/absolutereality_v181.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:463d6a9fe8a4b56a4d69ef3692074c0617428dfd8e8f12f9efe3b1e9a71717ce
3
+ size 2132625432
models/checkpoints/animerge_v27.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd98dd2fe195654f9e17231c42ae96f1431c1f73bc8193601991bb3c2ce021e9
3
+ size 2132626814
models/checkpoints/anyloraCheckpoint_bakedvaeBlessedFp16.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef49fbb25fa908bb54ded95abf81ef4b0e21fa0a8de56c40c3d62e768ef7e49a
3
+ size 3455824250
models/checkpoints/ayu_realistic.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa7308c649f34a7020dfda590834c92424616d4ed104b7dda414b6f01aace2bd
3
+ size 2498580732
models/checkpoints/cardosAnime_v20.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86bd0c547c0ffbbcff680a601264ab9003dece7c9afe5ee7746f3b195cdfb78e
3
+ size 4265096689
models/checkpoints/cuteyukimixAdorable_kemiaomiao.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35dae95c7495f1a28d4d1f916d7276db9ed622cf9364acb40d1a8f042460c01c
3
+ size 2378779158
models/checkpoints/cyberrealistic_classicV31.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a722bd93f40adc093b5d2fdb2aa86710285a4a1960122a181416c6be417464fd
3
+ size 2132651162
models/checkpoints/darkSushiMixMix_225D.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cca17b08da3254aa2f0242a84ab8641c89d1bc71cc1046fc79f8e906581a97ba
3
+ size 2132627182
models/checkpoints/disneyPixarCartoon_v10.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6548414b4947186d0573d1a9f151fa1f1ca0e7c5fbe9e626a5e6f5532f7e654
3
+ size 4244098204
models/checkpoints/dreamshaper_8.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:879db523c30d3b9017143d56705015e15a2cb5628762c11d086fed9538abd7fd
3
+ size 2132625894
models/checkpoints/epicdream_lullaby.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:546fb83e40600d143589890e96fbd10191ccf2b7b1ba4af712a43ea24b77c3b1
3
+ size 2132625790
models/checkpoints/flat2DAnimerge_v30.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dd56bfa12e9ea2f1f3e86a04d0d2cde06a58f2cdf3bfc46cbff6aaa8f00921d
3
+ size 2132626072
models/checkpoints/juggernautXL_v9Rdphoto2Lightning.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8df560d2992ac04299412be6a36fa53a4e7a1b74f27b94867ad3f84f4b425a5
3
+ size 7105348284
models/checkpoints/juggernaut_aftermath.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ed60a2f5842de8e6755965ffbf2a0ee273429939f307233c22f7dffd8d7ae3d
3
+ size 5672794804
models/checkpoints/kantanmixSD15_v10.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6c46570180b4defbf0841cfe4f491f5eaa38f48a61f85f29cf83fdf0dd82ddf
3
+ size 2132626758
models/checkpoints/ltx-video-2b-v0.9.1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a23200896c5eddf215c7cb9517820c5763a2b054eb62ba86cbce6b871a4577e3
3
+ size 5716863844
models/checkpoints/ltx-video-2b-v0.9.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63111264103e4bb8e7f5f7a653e9417d83c84759a48a945f9fabf7f7ea0d9984
3
+ size 9370440316
models/checkpoints/majicmixRealistic_v7.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c819b6d13663ed720c2254f4fe18373107dfef2448d337913c8fc545640881e
3
+ size 2132625894
models/checkpoints/manmaruMix_v30.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d34a470c17d0361f2eaf06b555d616b358543be0d019984ad3d65be6b161cc5
3
+ size 2299958656
models/checkpoints/maturemalemix_v14.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f5b5d7b9cd029dc12fde669209fdb2446e318c17f019b8e381a37e9743ec7aa
3
+ size 2400040472
models/checkpoints/meinamix_meinaV11.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54ef3e3610eee7f0f6c9f9b82eee26aa404598512d800494d1db344e3bc560b1
3
+ size 2132651818
models/checkpoints/memo/.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ misc/face_analysis/models/face_landmarker_v2_with_blendshapes.task filter=lfs diff=lfs merge=lfs -text
models/checkpoints/memo/README.md ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ ---
4
+
5
+ # MEMO
6
+
7
+ **MEMO: Memory-Guided Diffusion for Expressive Talking Video Generation**
8
+ <br>
9
+ [Longtao Zheng](https://ltzheng.github.io)\*,
10
+ [Yifan Zhang](https://scholar.google.com/citations?user=zuYIUJEAAAAJ)\*,
11
+ [Hanzhong Guo](https://scholar.google.com/citations?user=q3x6KsgAAAAJ)\,
12
+ [Jiachun Pan](https://scholar.google.com/citations?user=nrOvfb4AAAAJ),
13
+ [Zhenxiong Tan](https://scholar.google.com/citations?user=HP9Be6UAAAAJ),
14
+ [Jiahao Lu](https://scholar.google.com/citations?user=h7rbA-sAAAAJ),
15
+ [Chuanxin Tang](https://scholar.google.com/citations?user=3ZC8B7MAAAAJ),
16
+ [Bo An](https://personal.ntu.edu.sg/boan/index.html),
17
+ [Shuicheng Yan](https://scholar.google.com/citations?user=DNuiPHwAAAAJ)
18
+ <br>
19
+ _[Project Page](https://memoavatar.github.io) | [arXiv](https://arxiv.org/abs/2412.04448) | [Model](https://huggingface.co/memoavatar/memo)_
20
+
21
+ This repository contains the example inference script for the MEMO-preview model. The gif demo below is compressed. See our [project page](https://memoavatar.github.io) for full videos.
22
+
23
+ <div style="width: 100%; text-align: center;">
24
+ <img src="https://github.com/memoavatar/memo/raw/main/assets/demo.gif" alt="Demo GIF" style="width: 100%; height: auto;">
25
+ </div>
26
+
27
+ ## Installation
28
+
29
+ ```bash
30
+ conda create -n memo python=3.10 -y
31
+ conda activate memo
32
+ conda install -c conda-forge ffmpeg -y
33
+ pip install -e .
34
+ ```
35
+
36
+ > Our code will download the checkpoint from Hugging Face automatically, and the models for face analysis and vocal separation will be downloaded to `misc_model_dir` of `configs/inference.yaml`. If you want to download the models manually, please download the checkpoint from [here](https://huggingface.co/memoavatar/memo) and specify the path in `model_name_or_path` of `configs/inference.yaml`.
37
+
38
+ ## Inference
39
+
40
+ ```bash
41
+ python inference.py --config configs/inference.yaml --input_image <IMAGE_PATH> --input_audio <AUDIO_PATH> --output_dir <SAVE_PATH>
42
+ ```
43
+
44
+ For example:
45
+
46
+ ```bash
47
+ python inference.py --config configs/inference.yaml --input_image assets/examples/dicaprio.jpg --input_audio assets/examples/speech.wav --output_dir outputs
48
+ ```
49
+
50
+ > We tested the code on H100 and RTX 4090 GPUs using CUDA 12. Under the default settings (fps=30, inference_steps=20), the inference time is around 1 second per frame on H100 and 2 seconds per frame on RTX 4090. We welcome community contributions to improve the inference speed or interfaces like ComfyUI.
51
+
52
+ ## Acknowledgement
53
+
54
+ Our work is made possible thanks to high-quality open-source talking video datasets (including [HDTF](https://github.com/MRzzm/HDTF), [VFHQ](https://liangbinxie.github.io/projects/vfhq), [CelebV-HQ](https://celebv-hq.github.io), [MultiTalk](https://multi-talk.github.io), and [MEAD](https://wywu.github.io/projects/MEAD/MEAD.html)) and some pioneering works (such as [EMO](https://humanaigc.github.io/emote-portrait-alive) and [Hallo](https://github.com/fudan-generative-vision/hallo)).
55
+
56
+ ## Ethics Statement
57
+
58
+ We acknowledge the potential of AI in generating talking videos, with applications spanning education, virtual assistants, and entertainment. However, we are equally aware of the ethical, legal, and societal challenges that misuse of this technology could pose.
59
+
60
+ To reduce potential risks, we have only open-sourced a preview model for research purposes. Demos on our website use publicly available materials. We welcome copyright concerns—please contact us if needed, and we will address issues promptly. Users are required to ensure that their actions align with legal regulations, cultural norms, and ethical standards.
61
+
62
+ It is strictly prohibited to use the model for creating malicious, misleading, defamatory, or privacy-infringing content, such as deepfake videos for political misinformation, impersonation, harassment, or fraud. We strongly encourage users to review generated content carefully, ensuring it meets ethical guidelines and respects the rights of all parties involved. Users must also ensure that their inputs (e.g., audio and reference images) and outputs are used with proper authorization. Unauthorized use of third-party intellectual property is strictly forbidden.
63
+
64
+ While users may claim ownership of content generated by the model, they must ensure compliance with copyright laws, particularly when involving public figures' likeness, voice, or other aspects protected under personality rights.
65
+
66
+ ## Citation
67
+
68
+ If you find our work useful, please use the following citation:
69
+
70
+ ```bibtex
71
+ @article{zheng2024memo,
72
+ title={MEMO: Memory-Guided Diffusion for Expressive Talking Video Generation},
73
+ author={Longtao Zheng and Yifan Zhang and Hanzhong Guo and Jiachun Pan and Zhenxiong Tan and Jiahao Lu and Chuanxin Tang and Bo An and Shuicheng Yan},
74
+ journal={arXiv preprint arXiv:2412.04448},
75
+ year={2024}
76
+ }
77
+ ```
models/checkpoints/memo/audio_proj/config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "_class_name": "AudioProjModel",
3
+ "_diffusers_version": "0.31.0"
4
+ }
models/checkpoints/memo/audio_proj/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb32c18e7890c6550e2312964a48f447b5e30cd449c25cfe06324842d6146f6e
3
+ size 145861272
models/checkpoints/memo/diffusion_net/config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_center_input_sample": false,
3
+ "_class_name": "UNet3DConditionModel",
4
+ "_diffusers_version": "0.31.0",
5
+ "_out_channels": 4,
6
+ "act_fn": "silu",
7
+ "addition_embed_type": null,
8
+ "addition_embed_type_num_heads": 64,
9
+ "addition_time_embed_dim": null,
10
+ "attention_head_dim": 8,
11
+ "attention_type": "default",
12
+ "audio_attention_dim": 768,
13
+ "block_out_channels": [
14
+ 320,
15
+ 640,
16
+ 1280,
17
+ 1280
18
+ ],
19
+ "center_input_sample": false,
20
+ "class_embed_type": null,
21
+ "class_embeddings_concat": false,
22
+ "conv_in_kernel": 3,
23
+ "cross_attention_dim": 768,
24
+ "down_block_types": [
25
+ "CrossAttnDownBlock3D",
26
+ "CrossAttnDownBlock3D",
27
+ "CrossAttnDownBlock3D",
28
+ "DownBlock3D"
29
+ ],
30
+ "downsample_padding": 1,
31
+ "dropout": 0.0,
32
+ "dual_cross_attention": false,
33
+ "emo_drop_rate": 0.05,
34
+ "encoder_hid_dim": null,
35
+ "encoder_hid_dim_type": null,
36
+ "flip_sin_to_cos": true,
37
+ "freq_shift": 0,
38
+ "in_channels": 4,
39
+ "layers_per_block": 2,
40
+ "mid_block_only_cross_attention": null,
41
+ "mid_block_scale_factor": 1,
42
+ "mid_block_type": "UNetMidBlock3DCrossAttn",
43
+ "motion_module_kwargs": {
44
+ "attention_block_types": [
45
+ "Temporal_Self",
46
+ "Temporal_Self"
47
+ ],
48
+ "num_attention_heads": 8,
49
+ "num_transformer_block": 1,
50
+ "temporal_attention_dim_div": 1,
51
+ "temporal_position_encoding": true,
52
+ "temporal_position_encoding_max_len": 32
53
+ },
54
+ "motion_module_resolutions": [
55
+ 1,
56
+ 2,
57
+ 4,
58
+ 8
59
+ ],
60
+ "norm_eps": 1e-05,
61
+ "norm_num_groups": 32,
62
+ "num_attention_heads": null,
63
+ "num_class_embeds": null,
64
+ "only_cross_attention": false,
65
+ "out_channels": 4,
66
+ "projection_class_embeddings_input_dim": null,
67
+ "resnet_time_scale_shift": "default",
68
+ "reverse_transformer_layers_per_block": null,
69
+ "sample_size": 64,
70
+ "time_cond_proj_dim": null,
71
+ "time_embedding_act_fn": null,
72
+ "time_embedding_dim": null,
73
+ "time_embedding_type": "positional",
74
+ "timestep_post_act": null,
75
+ "transformer_layers_per_block": 1,
76
+ "unet_use_cross_frame_attention": false,
77
+ "unet_use_temporal_attention": false,
78
+ "up_block_types": [
79
+ "UpBlock3D",
80
+ "CrossAttnUpBlock3D",
81
+ "CrossAttnUpBlock3D",
82
+ "CrossAttnUpBlock3D"
83
+ ],
84
+ "upcast_attention": false,
85
+ "use_inflated_groupnorm": true,
86
+ "use_linear_projection": false
87
+ }
models/checkpoints/memo/diffusion_net/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7581d7e0663fd27a3c7b2b242a7af5eda89e57c67e3259017f8b77d83b930479
3
+ size 6712434824
models/checkpoints/memo/image_proj/config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "_class_name": "ImageProjModel",
3
+ "_diffusers_version": "0.31.0"
4
+ }
models/checkpoints/memo/image_proj/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eac89d81e17f120f752548d028b4a9a9ad4abca9401590436b5c8c26d8cd8537
3
+ size 6310216
models/checkpoints/memo/misc/audio_emotion_classifier/config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "_class_name": "AudioEmotionClassifierModel",
3
+ "_diffusers_version": "0.31.0"
4
+ }
models/checkpoints/memo/misc/audio_emotion_classifier/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7c8ca4bcfd1695efcf80398d13e4a5f365ebba0d70052f24a8c232ee50ee76d
3
+ size 58827684
models/checkpoints/memo/misc/face_analysis/glintr100.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ab1d6435d639628a6f3e5008dd4f929edf4c4124b1a7169e1048f9fef534cdf
3
+ size 260665334
models/checkpoints/memo/misc/face_analysis/misc/face_analysis/models/1k3d68.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df5c06b8a0c12e422b2ed8947b8869faa4105387f199c477af038aa01f9a45cc
3
+ size 143607619
models/checkpoints/memo/misc/face_analysis/misc/face_analysis/models/2d106det.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f001b856447c413801ef5c42091ed0cd516fcd21f2d6b79635b1e733a7109dbf
3
+ size 5030888
models/checkpoints/memo/misc/face_analysis/misc/face_analysis/models/face_landmarker_v2_with_blendshapes.task ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64184e229b263107bc2b804c6625db1341ff2bb731874b0bcc2fe6544e0bc9ff
3
+ size 3758596
models/checkpoints/memo/misc/face_analysis/misc/face_analysis/models/genderage.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fde69b1c810857b88c64a335084f1c3fe8f01246c9a191b48c7bb756d6652fb
3
+ size 1322532
models/checkpoints/memo/misc/face_analysis/misc/face_analysis/models/glintr100.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ab1d6435d639628a6f3e5008dd4f929edf4c4124b1a7169e1048f9fef534cdf
3
+ size 260665334
models/checkpoints/memo/misc/face_analysis/misc/face_analysis/models/scrfd_10g_bnkps.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5838f7fe053675b1c7a08b633df49e7af5495cee0493c7dcf6697200b85b5b91
3
+ size 16923827
models/checkpoints/memo/misc/face_analysis/models.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "detection": [
3
+ "scrfd_10g_bnkps"
4
+ ],
5
+ "recognition": [
6
+ "glintr100"
7
+ ],
8
+ "analysis": [
9
+ "genderage",
10
+ "2d106det",
11
+ "1k3d68"
12
+ ]
13
+ }
models/checkpoints/memo/misc/face_analysis/models/1k3d68.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df5c06b8a0c12e422b2ed8947b8869faa4105387f199c477af038aa01f9a45cc
3
+ size 143607619
models/checkpoints/memo/misc/face_analysis/models/2d106det.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f001b856447c413801ef5c42091ed0cd516fcd21f2d6b79635b1e733a7109dbf
3
+ size 5030888
models/checkpoints/memo/misc/face_analysis/models/face_landmarker_v2_with_blendshapes.task ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64184e229b263107bc2b804c6625db1341ff2bb731874b0bcc2fe6544e0bc9ff
3
+ size 3758596
models/checkpoints/memo/misc/face_analysis/models/genderage.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fde69b1c810857b88c64a335084f1c3fe8f01246c9a191b48c7bb756d6652fb
3
+ size 1322532
models/checkpoints/memo/misc/face_analysis/models/scrfd_10g_bnkps.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5838f7fe053675b1c7a08b633df49e7af5495cee0493c7dcf6697200b85b5b91
3
+ size 16923827
models/checkpoints/memo/misc/vocal_separator/Kim_Vocal_2.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce74ef3b6a6024ce44211a07be9cf8bc6d87728cc852a68ab34eb8e58cde9c8b
3
+ size 66759214