kimwint commited on
Commit
7f69cfe
·
verified ·
1 Parent(s): 9e53aa2

Upload RMBG folder recursively

Browse files
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. SDPose_OOD/SDPose-Wholebody/.gitattributes +36 -0
  3. SDPose_OOD/SDPose-Wholebody/README.md +137 -0
  4. SDPose_OOD/SDPose-Wholebody/assets/wholebody_anno.png +3 -0
  5. SDPose_OOD/SDPose-Wholebody/decoder/decoder.safetensors +3 -0
  6. SDPose_OOD/SDPose-Wholebody/huggingface/.gitignore +1 -0
  7. SDPose_OOD/SDPose-Wholebody/huggingface/download/.gitattributes.lock +0 -0
  8. SDPose_OOD/SDPose-Wholebody/huggingface/download/.gitattributes.metadata +3 -0
  9. SDPose_OOD/SDPose-Wholebody/huggingface/download/README.md.lock +0 -0
  10. SDPose_OOD/SDPose-Wholebody/huggingface/download/README.md.metadata +3 -0
  11. SDPose_OOD/SDPose-Wholebody/huggingface/download/assets/wholebody_anno.png.lock +0 -0
  12. SDPose_OOD/SDPose-Wholebody/huggingface/download/assets/wholebody_anno.png.metadata +3 -0
  13. SDPose_OOD/SDPose-Wholebody/huggingface/download/decoder/decoder.safetensors.lock +0 -0
  14. SDPose_OOD/SDPose-Wholebody/huggingface/download/decoder/decoder.safetensors.metadata +3 -0
  15. SDPose_OOD/SDPose-Wholebody/huggingface/download/scheduler/scheduler_config.json.lock +0 -0
  16. SDPose_OOD/SDPose-Wholebody/huggingface/download/scheduler/scheduler_config.json.metadata +3 -0
  17. SDPose_OOD/SDPose-Wholebody/huggingface/download/text_encoder/config.json.lock +0 -0
  18. SDPose_OOD/SDPose-Wholebody/huggingface/download/text_encoder/config.json.metadata +3 -0
  19. SDPose_OOD/SDPose-Wholebody/huggingface/download/text_encoder/model.safetensors.lock +0 -0
  20. SDPose_OOD/SDPose-Wholebody/huggingface/download/text_encoder/model.safetensors.metadata +3 -0
  21. SDPose_OOD/SDPose-Wholebody/huggingface/download/tokenizer/merges.txt.lock +0 -0
  22. SDPose_OOD/SDPose-Wholebody/huggingface/download/tokenizer/merges.txt.metadata +3 -0
  23. SDPose_OOD/SDPose-Wholebody/huggingface/download/tokenizer/special_tokens_map.json.lock +0 -0
  24. SDPose_OOD/SDPose-Wholebody/huggingface/download/tokenizer/special_tokens_map.json.metadata +3 -0
  25. SDPose_OOD/SDPose-Wholebody/huggingface/download/tokenizer/tokenizer_config.json.lock +0 -0
  26. SDPose_OOD/SDPose-Wholebody/huggingface/download/tokenizer/tokenizer_config.json.metadata +3 -0
  27. SDPose_OOD/SDPose-Wholebody/huggingface/download/tokenizer/vocab.json.lock +0 -0
  28. SDPose_OOD/SDPose-Wholebody/huggingface/download/tokenizer/vocab.json.metadata +3 -0
  29. SDPose_OOD/SDPose-Wholebody/huggingface/download/unet/config.json.lock +0 -0
  30. SDPose_OOD/SDPose-Wholebody/huggingface/download/unet/config.json.metadata +3 -0
  31. SDPose_OOD/SDPose-Wholebody/huggingface/download/unet/diffusion_pytorch_model.safetensors.lock +0 -0
  32. SDPose_OOD/SDPose-Wholebody/huggingface/download/unet/diffusion_pytorch_model.safetensors.metadata +3 -0
  33. SDPose_OOD/SDPose-Wholebody/huggingface/download/vae/config.json.lock +0 -0
  34. SDPose_OOD/SDPose-Wholebody/huggingface/download/vae/config.json.metadata +3 -0
  35. SDPose_OOD/SDPose-Wholebody/huggingface/download/vae/diffusion_pytorch_model.safetensors.lock +0 -0
  36. SDPose_OOD/SDPose-Wholebody/huggingface/download/vae/diffusion_pytorch_model.safetensors.metadata +3 -0
  37. SDPose_OOD/SDPose-Wholebody/huggingface/download/yolo11x.pt.lock +0 -0
  38. SDPose_OOD/SDPose-Wholebody/huggingface/download/yolo11x.pt.metadata +3 -0
  39. SDPose_OOD/SDPose-Wholebody/scheduler/scheduler_config.json +14 -0
  40. SDPose_OOD/SDPose-Wholebody/text_encoder/config.json +25 -0
  41. SDPose_OOD/SDPose-Wholebody/text_encoder/model.safetensors +3 -0
  42. SDPose_OOD/SDPose-Wholebody/tokenizer/merges.txt +0 -0
  43. SDPose_OOD/SDPose-Wholebody/tokenizer/special_tokens_map.json +24 -0
  44. SDPose_OOD/SDPose-Wholebody/tokenizer/tokenizer_config.json +34 -0
  45. SDPose_OOD/SDPose-Wholebody/tokenizer/vocab.json +0 -0
  46. SDPose_OOD/SDPose-Wholebody/unet/config.json +73 -0
  47. SDPose_OOD/SDPose-Wholebody/unet/diffusion_pytorch_model.safetensors +3 -0
  48. SDPose_OOD/SDPose-Wholebody/vae/config.json +30 -0
  49. SDPose_OOD/SDPose-Wholebody/vae/diffusion_pytorch_model.safetensors +3 -0
  50. SDPose_OOD/SDPose-Wholebody/yolo11x.pt +3 -0
.gitattributes CHANGED
@@ -35,3 +35,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  RMBG/BiRefNet/__pycache__/birefnet.cpython-312.pyc filter=lfs diff=lfs merge=lfs -text
37
  RMBG/RMBG-2.0/__pycache__/birefnet.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
 
 
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  RMBG/BiRefNet/__pycache__/birefnet.cpython-312.pyc filter=lfs diff=lfs merge=lfs -text
37
  RMBG/RMBG-2.0/__pycache__/birefnet.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
38
+ SDPose_OOD/SDPose-Wholebody/assets/wholebody_anno.png filter=lfs diff=lfs merge=lfs -text
SDPose_OOD/SDPose-Wholebody/.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ assets/wholebody_anno.png filter=lfs diff=lfs merge=lfs -text
SDPose_OOD/SDPose-Wholebody/README.md ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: en
3
+ license: mit
4
+ tags:
5
+ - pose-estimation
6
+ - computer-vision
7
+ - keypoint-detection
8
+ - diffusion-models
9
+ - stable-diffusion
10
+ - out-of-distribution
11
+ - human-pose
12
+ - top-down-pose-estimation
13
+ - coco
14
+ - mmpose
15
+ library_name: pytorch
16
+ ---
17
+
18
+ # SDPose: Exploiting Diffusion Priors for Out-of-Domain and Robust Pose Estimation (WholeBody - 133 Keypoints)
19
+
20
+ <div align="center">
21
+
22
+ [![Paper](https://img.shields.io/badge/arXiv-Paper-b31b1b?logo=arxiv&logoColor=white)](https://arxiv.org/abs/2509.24980)
23
+ [![Project Page](https://img.shields.io/badge/Project-Website-pink?logo=googlechrome&logoColor=white)](https://t-s-liang.github.io/SDPose)
24
+ [![HuggingFace Demo](https://img.shields.io/badge/🤗%20HuggingFace-Demo-yellow)](https://huggingface.co/spaces/teemosliang/SDPose-Body)
25
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT)
26
+
27
+ </div>
28
+
29
+ ## Model Description
30
+
31
+ **SDPose** is a state-of-the-art human pose estimation model that leverages the powerful visual priors from **Stable Diffusion** to achieve exceptional performance on out-of-distribution (OOD) scenarios. This model variant estimates **133 wholebody keypoints,** including body, hands, face, feet.
32
+
33
+ ### Model Architecture
34
+
35
+ SDPose employs a **U-Net backbone** initialized with Stable Diffusion v2 weights, combined with a specialized heatmap head for keypoint prediction. The model operates in a top-down manner:
36
+
37
+ 1. **Person Detection**: Detect human bounding boxes using an object detector (e.g., YOLO11-x)
38
+ 2. **Pose Estimation**: Crop and estimate 17 body keypoints for each detected person
39
+ 3. **Heatmap Generation**: Produce confidence heatmaps for precise keypoint estimation
40
+
41
+ **Model Specifications:**
42
+ - **Backbone**: Stable Diffusion v2 U-Net (fine-tuned; minimal architectural changes)
43
+ - **Head**: Custom heatmap prediction head
44
+ - **Input Resolution**: 1024×768 (H×W)
45
+ - **Output**: 133 keypoint heatmaps + coordinates with confidence scores
46
+ - **Framework**: MMPose
47
+
48
+ ## Supported Keypoints (COCO Wholebody Format)
49
+
50
+ The model predicts 133 body keypoints following the COCO Wholebody keypoint format.
51
+
52
+ <p align="center">
53
+ <img src="assets/wholebody_anno.png" width="600"/>
54
+ </p>
55
+
56
+ ## Intended Use
57
+
58
+ - Human pose estimation in natural images
59
+ - Pose estimation in artistic and stylized domains (paintings, anime, sketches)
60
+ - Animation and video pose tracking
61
+ - Cross-domain pose analysis and research
62
+ - Applications requiring robust pose estimation under distribution shifts
63
+
64
+ ## How to Use
65
+
66
+ ### Installation
67
+
68
+ ```bash
69
+ # Clone the repository
70
+ git clone https://github.com/t-s-liang/SDPose-OOD.git
71
+ cd SDPose-OOD
72
+
73
+ # Install dependencies
74
+ pip install -r requirements.txt
75
+ # Download YOLO11-x for human detection
76
+ wget https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11x.pt -P models/
77
+
78
+ # Launch Gradio interface
79
+ cd gradio_app
80
+ bash launch_gradio.sh
81
+ ```
82
+
83
+ ## Training Data
84
+
85
+ ### Datasets
86
+
87
+ Trained exclusively on COCO-2017 train2017 (no extra data).
88
+
89
+ - **COCO-Wholebody (Common Objects in Context)**: 200K+ images with 133 wholebody keypoints
90
+
91
+ ### Preprocessing
92
+
93
+ - Images are resized and cropped to 1024×768 resolution
94
+ - Augmentation: random horizontal flip, half-body & bbox transforms, UDP affine; Albumentations (Gaussian/Median blur, coarse dropout).
95
+ - Heatmaps: UDP codec (MMPose style).
96
+
97
+ ### Comparison with Baselines
98
+
99
+ SDPose significantly outperforms traditional pose estimation models (e.g., Sapiens) on out-of-distribution benchmarks while maintaining competitive performance on in-domain data.
100
+
101
+ See our [paper](https://arxiv.org/abs/2509.24980) for comprehensive evaluation results.
102
+
103
+ ## Citation
104
+
105
+ If you use SDPose in your research, please cite our paper:
106
+
107
+ ```bibtex
108
+ @misc{liang2025sdposeexploitingdiffusionpriors,
109
+ title={SDPose: Exploiting Diffusion Priors for Out-of-Domain and Robust Pose Estimation},
110
+ author={Shuang Liang and Jing He and Chuanmeizhi Wang and Lejun Liao and Guo Zhang and Yingcong Chen and Yuan Yuan},
111
+ year={2025},
112
+ eprint={2509.24980},
113
+ archivePrefix={arXiv},
114
+ primaryClass={cs.CV},
115
+ url={https://arxiv.org/abs/2509.24980},
116
+ }
117
+ ```
118
+
119
+ ## License
120
+
121
+ This model is released under the [MIT License](https://opensource.org/licenses/MIT).
122
+
123
+ ## Additional Resources
124
+
125
+ - 🌐 **Project Website**: [https://t-s-liang.github.io/SDPose](https://t-s-liang.github.io/SDPose)
126
+ - 📄 **Paper**: [arXiv:2509.24980](https://arxiv.org/abs/2509.24980)
127
+ - 💻 **Code Repository**: [GitHub](https://github.com/t-s-liang/SDPose-OOD)
128
+ - 🤗 **Demo**: [HuggingFace Space](https://huggingface.co/spaces/teemosliang/SDPose-Body)
129
+ - 📧 **Contact**: tsliang2001@gmail.com
130
+
131
+ ---
132
+
133
+ <div align="center">
134
+
135
+ **⭐ Star us on GitHub — it motivates us a lot!**
136
+
137
+ </div>
SDPose_OOD/SDPose-Wholebody/assets/wholebody_anno.png ADDED

Git LFS Details

  • SHA256: 5ab1654bde9ccd55d2eb32e5728e6c909a8fc7041f548eb76e1594f3b455d891
  • Pointer size: 132 Bytes
  • Size of remote file: 7.07 MB
SDPose_OOD/SDPose-Wholebody/decoder/decoder.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ee297f62ade11c405ad56ae490aa1bef7be881df794f145ceb8e12e38a853fc
3
+ size 28196828
SDPose_OOD/SDPose-Wholebody/huggingface/.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ *
SDPose_OOD/SDPose-Wholebody/huggingface/download/.gitattributes.lock ADDED
File without changes
SDPose_OOD/SDPose-Wholebody/huggingface/download/.gitattributes.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 0c8153e39458bc8ae87fb68790c141bf0f03c11b
2
+ 6ecc7d0c41955e6c984d4ca14753fe548c961e68
3
+ 1771726775.832536
SDPose_OOD/SDPose-Wholebody/huggingface/download/README.md.lock ADDED
File without changes
SDPose_OOD/SDPose-Wholebody/huggingface/download/README.md.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 0c8153e39458bc8ae87fb68790c141bf0f03c11b
2
+ e376a63f46becfd4ac7005225a69fbb1b07f2dbd
3
+ 1771726775.9407606
SDPose_OOD/SDPose-Wholebody/huggingface/download/assets/wholebody_anno.png.lock ADDED
File without changes
SDPose_OOD/SDPose-Wholebody/huggingface/download/assets/wholebody_anno.png.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 0c8153e39458bc8ae87fb68790c141bf0f03c11b
2
+ 5ab1654bde9ccd55d2eb32e5728e6c909a8fc7041f548eb76e1594f3b455d891
3
+ 1771726776.1980946
SDPose_OOD/SDPose-Wholebody/huggingface/download/decoder/decoder.safetensors.lock ADDED
File without changes
SDPose_OOD/SDPose-Wholebody/huggingface/download/decoder/decoder.safetensors.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 0c8153e39458bc8ae87fb68790c141bf0f03c11b
2
+ 3ee297f62ade11c405ad56ae490aa1bef7be881df794f145ceb8e12e38a853fc
3
+ 1771726777.472846
SDPose_OOD/SDPose-Wholebody/huggingface/download/scheduler/scheduler_config.json.lock ADDED
File without changes
SDPose_OOD/SDPose-Wholebody/huggingface/download/scheduler/scheduler_config.json.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 0c8153e39458bc8ae87fb68790c141bf0f03c11b
2
+ ab4573d2098886789be83ac32d48b9737edb2830
3
+ 1771726775.8193738
SDPose_OOD/SDPose-Wholebody/huggingface/download/text_encoder/config.json.lock ADDED
File without changes
SDPose_OOD/SDPose-Wholebody/huggingface/download/text_encoder/config.json.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 0c8153e39458bc8ae87fb68790c141bf0f03c11b
2
+ 9c60528fdcb99a7caf834426a94ea13c56cf422b
3
+ 1771726775.8028762
SDPose_OOD/SDPose-Wholebody/huggingface/download/text_encoder/model.safetensors.lock ADDED
File without changes
SDPose_OOD/SDPose-Wholebody/huggingface/download/text_encoder/model.safetensors.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 0c8153e39458bc8ae87fb68790c141bf0f03c11b
2
+ cce6febb0b6d876ee5eb24af35e27e764eb4f9b1d0b7c026c8c3333d4cfc916c
3
+ 1771726847.5179892
SDPose_OOD/SDPose-Wholebody/huggingface/download/tokenizer/merges.txt.lock ADDED
File without changes
SDPose_OOD/SDPose-Wholebody/huggingface/download/tokenizer/merges.txt.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 0c8153e39458bc8ae87fb68790c141bf0f03c11b
2
+ 76e821f1b6f0a9709293c3b6b51ed90980b3166b
3
+ 1771726776.1691403
SDPose_OOD/SDPose-Wholebody/huggingface/download/tokenizer/special_tokens_map.json.lock ADDED
File without changes
SDPose_OOD/SDPose-Wholebody/huggingface/download/tokenizer/special_tokens_map.json.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 0c8153e39458bc8ae87fb68790c141bf0f03c11b
2
+ ae0c5be6f35217e51c4c000fd325d8de0294e99c
3
+ 1771726776.6460805
SDPose_OOD/SDPose-Wholebody/huggingface/download/tokenizer/tokenizer_config.json.lock ADDED
File without changes
SDPose_OOD/SDPose-Wholebody/huggingface/download/tokenizer/tokenizer_config.json.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 0c8153e39458bc8ae87fb68790c141bf0f03c11b
2
+ f4fe219b936c0e171504b4bba0c33c7bef6ea211
3
+ 1771726776.650374
SDPose_OOD/SDPose-Wholebody/huggingface/download/tokenizer/vocab.json.lock ADDED
File without changes
SDPose_OOD/SDPose-Wholebody/huggingface/download/tokenizer/vocab.json.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 0c8153e39458bc8ae87fb68790c141bf0f03c11b
2
+ 469be27c5c010538f845f518c4f5e8574c78f7c8
3
+ 1771726777.2009902
SDPose_OOD/SDPose-Wholebody/huggingface/download/unet/config.json.lock ADDED
File without changes
SDPose_OOD/SDPose-Wholebody/huggingface/download/unet/config.json.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 0c8153e39458bc8ae87fb68790c141bf0f03c11b
2
+ 466532dc49c3a8bc2fe21ed4e640ca124278b00b
3
+ 1771726776.8096793
SDPose_OOD/SDPose-Wholebody/huggingface/download/unet/diffusion_pytorch_model.safetensors.lock ADDED
File without changes
SDPose_OOD/SDPose-Wholebody/huggingface/download/unet/diffusion_pytorch_model.safetensors.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 0c8153e39458bc8ae87fb68790c141bf0f03c11b
2
+ e951ac1802f004243bbbea6f0d86abfa08360776424f2d228c62ed63c5b1ab66
3
+ 1771726911.1530993
SDPose_OOD/SDPose-Wholebody/huggingface/download/vae/config.json.lock ADDED
File without changes
SDPose_OOD/SDPose-Wholebody/huggingface/download/vae/config.json.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 0c8153e39458bc8ae87fb68790c141bf0f03c11b
2
+ f97af6a6a8235236b1346312f328569ce2d70f81
3
+ 1771726777.0636213
SDPose_OOD/SDPose-Wholebody/huggingface/download/vae/diffusion_pytorch_model.safetensors.lock ADDED
File without changes
SDPose_OOD/SDPose-Wholebody/huggingface/download/vae/diffusion_pytorch_model.safetensors.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 0c8153e39458bc8ae87fb68790c141bf0f03c11b
2
+ a1d993488569e928462932c8c38a0760b874d166399b14414135bd9c42df5815
3
+ 1771726805.619076
SDPose_OOD/SDPose-Wholebody/huggingface/download/yolo11x.pt.lock ADDED
File without changes
SDPose_OOD/SDPose-Wholebody/huggingface/download/yolo11x.pt.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 0c8153e39458bc8ae87fb68790c141bf0f03c11b
2
+ 7bc158aa95c0ebfdd87f70f01653c1131b93e92522dbe15c228bcd742e773a24
3
+ 1771726788.7594335
SDPose_OOD/SDPose-Wholebody/scheduler/scheduler_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "DDPMScheduler",
3
+ "_diffusers_version": "0.28.0.dev0",
4
+ "beta_end": 0.012,
5
+ "beta_schedule": "scaled_linear",
6
+ "beta_start": 0.00085,
7
+ "clip_sample": false,
8
+ "num_train_timesteps": 1000,
9
+ "prediction_type": "sample",
10
+ "set_alpha_to_one": false,
11
+ "skip_prk_steps": true,
12
+ "steps_offset": 1,
13
+ "trained_betas": null
14
+ }
SDPose_OOD/SDPose-Wholebody/text_encoder/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "hf-models/stable-diffusion-v2-768x768/text_encoder",
3
+ "architectures": [
4
+ "CLIPTextModel"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 0,
8
+ "dropout": 0.0,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_size": 1024,
12
+ "initializer_factor": 1.0,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 4096,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 77,
17
+ "model_type": "clip_text_model",
18
+ "num_attention_heads": 16,
19
+ "num_hidden_layers": 23,
20
+ "pad_token_id": 1,
21
+ "projection_dim": 512,
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.25.0.dev0",
24
+ "vocab_size": 49408
25
+ }
SDPose_OOD/SDPose-Wholebody/text_encoder/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cce6febb0b6d876ee5eb24af35e27e764eb4f9b1d0b7c026c8c3333d4cfc916c
3
+ size 1361597018
SDPose_OOD/SDPose-Wholebody/tokenizer/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
SDPose_OOD/SDPose-Wholebody/tokenizer/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|startoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "!",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
SDPose_OOD/SDPose-Wholebody/tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": {
4
+ "__type": "AddedToken",
5
+ "content": "<|startoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false
10
+ },
11
+ "do_lower_case": true,
12
+ "eos_token": {
13
+ "__type": "AddedToken",
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "errors": "replace",
21
+ "model_max_length": 77,
22
+ "name_or_path": "hf-models/stable-diffusion-v2-768x768/tokenizer",
23
+ "pad_token": "<|endoftext|>",
24
+ "special_tokens_map_file": "./special_tokens_map.json",
25
+ "tokenizer_class": "CLIPTokenizer",
26
+ "unk_token": {
27
+ "__type": "AddedToken",
28
+ "content": "<|endoftext|>",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false
33
+ }
34
+ }
SDPose_OOD/SDPose-Wholebody/tokenizer/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
SDPose_OOD/SDPose-Wholebody/unet/config.json ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "UNet2DConditionModel",
3
+ "_diffusers_version": "0.28.0",
4
+ "_name_or_path": "/data/coding/model",
5
+ "act_fn": "silu",
6
+ "addition_embed_type": null,
7
+ "addition_embed_type_num_heads": 64,
8
+ "addition_time_embed_dim": null,
9
+ "attention_head_dim": [
10
+ 5,
11
+ 10,
12
+ 20,
13
+ 20
14
+ ],
15
+ "attention_type": "default",
16
+ "block_out_channels": [
17
+ 320,
18
+ 640,
19
+ 1280,
20
+ 1280
21
+ ],
22
+ "center_input_sample": false,
23
+ "class_embed_type": "projection",
24
+ "class_embeddings_concat": false,
25
+ "conv_in_kernel": 3,
26
+ "conv_out_kernel": 3,
27
+ "cross_attention_dim": 1024,
28
+ "cross_attention_norm": null,
29
+ "down_block_types": [
30
+ "CrossAttnDownBlock2D",
31
+ "CrossAttnDownBlock2D",
32
+ "CrossAttnDownBlock2D",
33
+ "DownBlock2D"
34
+ ],
35
+ "downsample_padding": 1,
36
+ "dropout": 0.0,
37
+ "dual_cross_attention": false,
38
+ "encoder_hid_dim": null,
39
+ "encoder_hid_dim_type": null,
40
+ "flip_sin_to_cos": true,
41
+ "freq_shift": 0,
42
+ "in_channels": 4,
43
+ "layers_per_block": 2,
44
+ "mid_block_only_cross_attention": null,
45
+ "mid_block_scale_factor": 1,
46
+ "mid_block_type": "UNetMidBlock2DCrossAttn",
47
+ "norm_eps": 1e-05,
48
+ "norm_num_groups": 32,
49
+ "num_attention_heads": null,
50
+ "num_class_embeds": null,
51
+ "only_cross_attention": false,
52
+ "out_channels": 4,
53
+ "projection_class_embeddings_input_dim": 4,
54
+ "resnet_out_scale_factor": 1.0,
55
+ "resnet_skip_time_act": false,
56
+ "resnet_time_scale_shift": "default",
57
+ "reverse_transformer_layers_per_block": null,
58
+ "sample_size": 96,
59
+ "time_cond_proj_dim": null,
60
+ "time_embedding_act_fn": null,
61
+ "time_embedding_dim": null,
62
+ "time_embedding_type": "positional",
63
+ "timestep_post_act": null,
64
+ "transformer_layers_per_block": 1,
65
+ "up_block_types": [
66
+ "UpBlock2D",
67
+ "CrossAttnUpBlock2D",
68
+ "CrossAttnUpBlock2D",
69
+ "CrossAttnUpBlock2D"
70
+ ],
71
+ "upcast_attention": false,
72
+ "use_linear_projection": true
73
+ }
SDPose_OOD/SDPose-Wholebody/unet/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e951ac1802f004243bbbea6f0d86abfa08360776424f2d228c62ed63c5b1ab66
3
+ size 3470311272
SDPose_OOD/SDPose-Wholebody/vae/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "AutoencoderKL",
3
+ "_diffusers_version": "0.8.0",
4
+ "_name_or_path": "hf-models/stable-diffusion-v2-768x768/vae",
5
+ "act_fn": "silu",
6
+ "block_out_channels": [
7
+ 128,
8
+ 256,
9
+ 512,
10
+ 512
11
+ ],
12
+ "down_block_types": [
13
+ "DownEncoderBlock2D",
14
+ "DownEncoderBlock2D",
15
+ "DownEncoderBlock2D",
16
+ "DownEncoderBlock2D"
17
+ ],
18
+ "in_channels": 3,
19
+ "latent_channels": 4,
20
+ "layers_per_block": 2,
21
+ "norm_num_groups": 32,
22
+ "out_channels": 3,
23
+ "sample_size": 768,
24
+ "up_block_types": [
25
+ "UpDecoderBlock2D",
26
+ "UpDecoderBlock2D",
27
+ "UpDecoderBlock2D",
28
+ "UpDecoderBlock2D"
29
+ ]
30
+ }
SDPose_OOD/SDPose-Wholebody/vae/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1d993488569e928462932c8c38a0760b874d166399b14414135bd9c42df5815
3
+ size 334643276
SDPose_OOD/SDPose-Wholebody/yolo11x.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bc158aa95c0ebfdd87f70f01653c1131b93e92522dbe15c228bcd742e773a24
3
+ size 114636239