Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +82 -0
- README.md +73 -0
- README_en.md +66 -0
- app.py +230 -0
- config/config.ini +23 -0
- download.sh +32 -0
- example/audio.wav +3 -0
- example/video.mp4 +3 -0
- face_attr_detect/.DS_Store +0 -0
- face_attr_detect/__init__.py +1 -0
- face_attr_detect/face_attr.cpython-38-x86_64-linux-gnu.so +3 -0
- face_attr_detect/face_attr_epoch_12_220318.onnx +3 -0
- face_detect_utils/__init__.py +0 -0
- face_detect_utils/face_detect.cpython-38-x86_64-linux-gnu.so +3 -0
- face_detect_utils/head_pose.cpython-38-x86_64-linux-gnu.so +3 -0
- face_detect_utils/resources/model_float32.onnx +3 -0
- face_detect_utils/resources/pfpld_robust_sim_bs1_8003.onnx +3 -0
- face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx +3 -0
- face_detect_utils/scrfd.cpython-38-x86_64-linux-gnu.so +3 -0
- face_lib/__init__.py +0 -0
- face_lib/face_detect_and_align/__init__.py +3 -0
- face_lib/face_detect_and_align/face_align_5_landmarks.cpython-38-x86_64-linux-gnu.so +3 -0
- face_lib/face_detect_and_align/face_align_utils.cpython-38-x86_64-linux-gnu.so +3 -0
- face_lib/face_detect_and_align/scrfd_insightface/__init__.py +5 -0
- face_lib/face_detect_and_align/scrfd_insightface/scrfd.cpython-38-x86_64-linux-gnu.so +3 -0
- face_lib/face_parsing/__init__.py +6 -0
- face_lib/face_parsing/face_parsing_api.cpython-38-x86_64-linux-gnu.so +3 -0
- face_lib/face_restore/__init__.py +2 -0
- face_lib/face_restore/gfpgan_onnx/gfpgan_onnx_api.cpython-38-x86_64-linux-gnu.so +3 -0
- h_utils/__init__.py +9 -0
- h_utils/custom.cpython-38-x86_64-linux-gnu.so +3 -0
- h_utils/obs_client.cpython-38-x86_64-linux-gnu.so +3 -0
- h_utils/request_utils.cpython-38-x86_64-linux-gnu.so +3 -0
- h_utils/sweep_bot.cpython-38-x86_64-linux-gnu.so +3 -0
- h_utils/zip_utils.cpython-38-x86_64-linux-gnu.so +3 -0
- landmark2face_wy/audio_handler.cpython-38-x86_64-linux-gnu.so +3 -0
- landmark2face_wy/checkpoints/anylang/dinet_v1_20240131.pth +3 -0
- landmark2face_wy/checkpoints/test/opt.txt +74 -0
- landmark2face_wy/data/Facereala3dmm_dataset.cpython-38-x86_64-linux-gnu.so +3 -0
- landmark2face_wy/data/Facereala3dmmexp512_dataset.py +212 -0
- landmark2face_wy/data/Facereala3dmmexpwenet512_dataset.py +222 -0
- landmark2face_wy/data/__init__.py +99 -0
- landmark2face_wy/data/base_dataset.cpython-38-x86_64-linux-gnu.so +3 -0
- landmark2face_wy/data/image_folder.cpython-38-x86_64-linux-gnu.so +3 -0
- landmark2face_wy/data/l2faceaudio512_dataset.py +189 -0
- landmark2face_wy/data/l2faceaudio_dataset.py +166 -0
- landmark2face_wy/digitalhuman_interface.cpython-38-x86_64-linux-gnu.so +3 -0
- landmark2face_wy/loss/__init__.py +0 -0
- landmark2face_wy/loss/perceptual.cpython-38-x86_64-linux-gnu.so +3 -0
- landmark2face_wy/models/DINet.cpython-38-x86_64-linux-gnu.so +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,85 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
example/audio.wav filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
example/video.mp4 filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
face_attr_detect/face_attr.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
face_detect_utils/face_detect.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
face_detect_utils/head_pose.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
face_detect_utils/scrfd.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
face_lib/face_detect_and_align/face_align_5_landmarks.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
face_lib/face_detect_and_align/face_align_utils.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
face_lib/face_detect_and_align/scrfd_insightface/scrfd.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
face_lib/face_parsing/face_parsing_api.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
face_lib/face_restore/gfpgan_onnx/gfpgan_onnx_api.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
h_utils/custom.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
h_utils/obs_client.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
h_utils/request_utils.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
h_utils/sweep_bot.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
h_utils/zip_utils.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
landmark2face_wy/audio_handler.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
landmark2face_wy/data/Facereala3dmm_dataset.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
landmark2face_wy/data/base_dataset.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
landmark2face_wy/data/image_folder.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
landmark2face_wy/digitalhuman_interface.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
landmark2face_wy/loss/perceptual.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
landmark2face_wy/models/DINet.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
landmark2face_wy/models/base_function.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
landmark2face_wy/models/base_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
landmark2face_wy/models/face3d2face_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
landmark2face_wy/models/face_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
landmark2face_wy/models/l2faceaudio_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
landmark2face_wy/models/networks.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 65 |
+
landmark2face_wy/models/networks_HD.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 66 |
+
landmark2face_wy/models/networks_pix2pixHD.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 67 |
+
landmark2face_wy/models/pirender_3dmm_mouth_hd_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 68 |
+
landmark2face_wy/models/pirender_3dmm_mouth_hdv2_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 69 |
+
landmark2face_wy/options/base_options.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 70 |
+
landmark2face_wy/options/test_options.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 71 |
+
landmark2face_wy/options/train_options.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 72 |
+
landmark2face_wy/sync_batchnorm/batchnorm.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 73 |
+
landmark2face_wy/sync_batchnorm/batchnorm_reimpl.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 74 |
+
landmark2face_wy/sync_batchnorm/comm.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 75 |
+
landmark2face_wy/sync_batchnorm/replicate.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 76 |
+
landmark2face_wy/sync_batchnorm/unittest.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 77 |
+
landmark2face_wy/test_3dmm_multi_exp_wenet.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 78 |
+
landmark2face_wy/test_3dmm_multi_exp_wenet0.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 79 |
+
landmark2face_wy/util/flow_util.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 80 |
+
landmark2face_wy/util/get_data.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 81 |
+
landmark2face_wy/util/html.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 82 |
+
landmark2face_wy/util/image_pool.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 83 |
+
landmark2face_wy/util/util.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 84 |
+
landmark2face_wy/util/visualizer.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 85 |
+
model_lib/base_wrapper/onnx_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 86 |
+
preprocess_audio_and_3dmm.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 87 |
+
service/server.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 88 |
+
service/trans_dh_service.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 89 |
+
wenet/compute_ctc_att_bnf.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 90 |
+
wenet/transformer/asr_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 91 |
+
wenet/transformer/attention.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 92 |
+
wenet/transformer/cmvn.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 93 |
+
wenet/transformer/convolution.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 94 |
+
wenet/transformer/ctc.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 95 |
+
wenet/transformer/decoder.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 96 |
+
wenet/transformer/decoder_layer.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 97 |
+
wenet/transformer/embedding.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 98 |
+
wenet/transformer/encoder.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 99 |
+
wenet/transformer/encoder_layer.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 100 |
+
wenet/transformer/label_smoothing_loss.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 101 |
+
wenet/transformer/positionwise_feed_forward.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 102 |
+
wenet/transformer/subsampling.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 103 |
+
wenet/transformer/swish.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 104 |
+
wenet/utils/checkpoint.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 105 |
+
wenet/utils/common.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 106 |
+
wenet/utils/ctc_util.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 107 |
+
wenet/utils/executor.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 108 |
+
wenet/utils/mask.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 109 |
+
wenet/utils/scheduler.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 110 |
+
xseg/dfl_xseg_api.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 111 |
+
y_utils/config.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 112 |
+
y_utils/lcr.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 113 |
+
y_utils/liblcr.so filter=lfs diff=lfs merge=lfs -text
|
| 114 |
+
y_utils/logger.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 115 |
+
y_utils/md5.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 116 |
+
y_utils/time_utils.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 117 |
+
y_utils/tools.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
[](https://github.com/GuijiAI/HeyGem.ai/blob/main/LICENSE)
|
| 3 |
+

|
| 4 |
+

|
| 5 |
+
|
| 6 |
+
**[中文](#chinese-version)** | **[English](README_en.md)**
|
| 7 |
+
|
| 8 |
+
---
|
| 9 |
+
|
| 10 |
+
<a name="chinese-version"></a>
|
| 11 |
+
|
| 12 |
+
# HeyGem-Linux-Python-Hack
|
| 13 |
+
|
| 14 |
+
## 项目简介
|
| 15 |
+
|
| 16 |
+
[HeyGem-Linux-Python-Hack] 是一个基于 Python 的数字人项目,它从 [HeyGem.ai](https://github.com/GuijiAI/HeyGem.ai) 中提取出来,它能够直接在 Linux 系统上运行,摆脱了对 Docker 和 Windows 系统的依赖。我们的目标是提供一个更易于部署和使用的数字人解决方案。
|
| 17 |
+
|
| 18 |
+
**如果你觉得这个项目对你有帮助,欢迎给我们 Star!**
|
| 19 |
+
**如果运行过程中遇到问题,在查阅已有 Issue 后,在查阅 Google/baidu/ai 后,欢迎提交 Issues!**
|
| 20 |
+
|
| 21 |
+
## 主要特性
|
| 22 |
+
|
| 23 |
+
* 无需 Docker: 直接在 Linux 系统上运行,简化部署流程。
|
| 24 |
+
* 无需 Windows: 完全基于 Linux 开发和测试。
|
| 25 |
+
* Python 驱动: 使用 Python 语言开发,易于理解和扩展。
|
| 26 |
+
* 开发者友好: 易于使用和扩展。
|
| 27 |
+
* 完全离线。
|
| 28 |
+
|
| 29 |
+
## 开始使用
|
| 30 |
+
|
| 31 |
+
### 安装
|
| 32 |
+
本项目**支持且仅支持 Linux & python3.8 环境**
|
| 33 |
+
请确保你的 Linux 系统上已经安装了 **Python 3.8**。然后,使用 pip 安装项目依赖项
|
| 34 |
+
同时也提供一个备用的环境 [requirements_0.txt](requirements_0.txt),遇到问题的话,你可以参考它来建立一个新的环境。
|
| 35 |
+
**具体的 onnxruntime-gpu / torch 等需要结合你的机器上的 cuda 版本去尝试一些组合,否则仍旧可能遇到问题。**
|
| 36 |
+
**请尽量不要询问任何关于 pip 的问题,感谢合作**
|
| 37 |
+
|
| 38 |
+
```bash
|
| 39 |
+
# 直接安装整个 requirements.txt 不一定成功,更建议跑代码观察报错信息,然后根据报错信息结合 requirements 去尝试安装,祝你顺利。
|
| 40 |
+
# pip install -r requirements.txt
|
| 41 |
+
```
|
| 42 |
+
|
| 43 |
+
### 使用
|
| 44 |
+
把项目克隆到本地
|
| 45 |
+
```bash
|
| 46 |
+
git clone https://github.com/Holasyb918/HeyGem-Linux-Python-Hack
|
| 47 |
+
cd HeyGem-Linux-Python-Hack
|
| 48 |
+
bash download.sh
|
| 49 |
+
```
|
| 50 |
+
#### 开始使用
|
| 51 |
+
* repo 中已提供可以用于 demo 的音视频样例,代码可以直接运行。
|
| 52 |
+
#### command:
|
| 53 |
+
```bash
|
| 54 |
+
python run.py
|
| 55 |
+
```
|
| 56 |
+
|
| 57 |
+
* 如果要使用自己的数据,可以外部传入参数,请注意,**path 是本地文件,且仅支持相对路径**.
|
| 58 |
+
|
| 59 |
+
#### command:
|
| 60 |
+
```bash
|
| 61 |
+
python run.py --audio_path example/audio.wav --video_path example/video.mp4
|
| 62 |
+
```
|
| 63 |
+
#### gradio:
|
| 64 |
+
```bash
|
| 65 |
+
python app.py
|
| 66 |
+
# 请等待模型初始化完成后提交任务
|
| 67 |
+
```
|
| 68 |
+
|
| 69 |
+
## Contributing
|
| 70 |
+
欢迎贡献!
|
| 71 |
+
|
| 72 |
+
## License
|
| 73 |
+
参考 heyGem.ai 的协议.
|
README_en.md
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
[](https://github.com/GuijiAI/HeyGem.ai/blob/main/LICENSE)
|
| 3 |
+

|
| 4 |
+

|
| 5 |
+
|
| 6 |
+
**[中文](./readme.md)** | **[English](#english-version)**
|
| 7 |
+
|
| 8 |
+
---
|
| 9 |
+
|
| 10 |
+
<a name="english-version"></a>
|
| 11 |
+
|
| 12 |
+
# HeyGem-Linux-Python-Hack
|
| 13 |
+
|
| 14 |
+
## Introduction
|
| 15 |
+
|
| 16 |
+
[HeyGem-Linux-Python-Hack] is a Python-based digital human project extracted from HeyGem.ai. It is designed to run directly on Linux systems, eliminating the need for Docker and Windows. Our goal is to provide a easier-to-deploy, and user-friendly digital human solution.
|
| 17 |
+
|
| 18 |
+
**Feel free to Star us if you find this project useful!**
|
| 19 |
+
**Please submit an Issue if you run into any problems!**
|
| 20 |
+
|
| 21 |
+
## Key Features
|
| 22 |
+
|
| 23 |
+
* No Docker Required: Runs directly on Linux systems, simplifying the deployment process.
|
| 24 |
+
* No Windows Required: Fully developed and tested on Linux.
|
| 25 |
+
* Python Powered: Developed using the Python language, making it easy to understand and extend.
|
| 26 |
+
* Developer-Friendly: Easy to use, and easy to extend.
|
| 27 |
+
|
| 28 |
+
## Getting Started
|
| 29 |
+
|
| 30 |
+
### Installation
|
| 31 |
+
|
| 32 |
+
Please ensure that **Python 3.8** is installed on your Linux system. Then, you can install the project dependencies using pip:
|
| 33 |
+
|
| 34 |
+
```bash
|
| 35 |
+
pip install -r requirements.txt
|
| 36 |
+
```
|
| 37 |
+
|
| 38 |
+
### Usage
|
| 39 |
+
Clone this repository to your local machine:
|
| 40 |
+
```bash
|
| 41 |
+
git clone https://github.com/Holasyb918/HeyGem-Linux-Python-Hack
|
| 42 |
+
cd HeyGem-Linux-Python-Hack
|
| 43 |
+
bash download.sh
|
| 44 |
+
```
|
| 45 |
+
#### Getting Started
|
| 46 |
+
* Audio and video examples that can be used for the demo are already provided in the repo, and the code can be run directly.
|
| 47 |
+
#### Command:
|
| 48 |
+
```bash
|
| 49 |
+
python run.py
|
| 50 |
+
```
|
| 51 |
+
* If you want to use your own data, you can pass parameters externally. **Please note that the path is a local file and only supports relative paths.**
|
| 52 |
+
#### command:
|
| 53 |
+
```bash
|
| 54 |
+
python run.py --audio_path example/audio.wav --video_path example/video.mp4
|
| 55 |
+
```
|
| 56 |
+
#### gradio:
|
| 57 |
+
```bash
|
| 58 |
+
python app.py
|
| 59 |
+
# Please wait until processor init done.
|
| 60 |
+
```
|
| 61 |
+
|
| 62 |
+
## Contributing
|
| 63 |
+
Contributions are welcome!
|
| 64 |
+
|
| 65 |
+
## License
|
| 66 |
+
This project is licensed under the HeyGem.ai License.
|
app.py
ADDED
|
@@ -0,0 +1,230 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import gc
|
| 3 |
+
import json
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
|
| 7 |
+
import subprocess
|
| 8 |
+
import threading
|
| 9 |
+
import time
|
| 10 |
+
import traceback
|
| 11 |
+
import uuid
|
| 12 |
+
from enum import Enum
|
| 13 |
+
import queue
|
| 14 |
+
import shutil
|
| 15 |
+
from functools import partial
|
| 16 |
+
|
| 17 |
+
import cv2
|
| 18 |
+
import gradio as gr
|
| 19 |
+
from flask import Flask, request
|
| 20 |
+
|
| 21 |
+
import service.trans_dh_service
|
| 22 |
+
from h_utils.custom import CustomError
|
| 23 |
+
from y_utils.config import GlobalConfig
|
| 24 |
+
from y_utils.logger import logger
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def write_video_gradio(
|
| 28 |
+
output_imgs_queue,
|
| 29 |
+
temp_dir,
|
| 30 |
+
result_dir,
|
| 31 |
+
work_id,
|
| 32 |
+
audio_path,
|
| 33 |
+
result_queue,
|
| 34 |
+
width,
|
| 35 |
+
height,
|
| 36 |
+
fps,
|
| 37 |
+
watermark_switch=0,
|
| 38 |
+
digital_auth=0,
|
| 39 |
+
temp_queue=None,
|
| 40 |
+
):
|
| 41 |
+
output_mp4 = os.path.join(temp_dir, "{}-t.mp4".format(work_id))
|
| 42 |
+
fourcc = cv2.VideoWriter_fourcc(*"mp4v")
|
| 43 |
+
result_path = os.path.join(result_dir, "{}-r.mp4".format(work_id))
|
| 44 |
+
video_write = cv2.VideoWriter(output_mp4, fourcc, fps, (width, height))
|
| 45 |
+
print("Custom VideoWriter init done")
|
| 46 |
+
try:
|
| 47 |
+
while True:
|
| 48 |
+
state, reason, value_ = output_imgs_queue.get()
|
| 49 |
+
if type(state) == bool and state == True:
|
| 50 |
+
logger.info(
|
| 51 |
+
"Custom VideoWriter [{}]视频帧队列处理已结束".format(work_id)
|
| 52 |
+
)
|
| 53 |
+
logger.info(
|
| 54 |
+
"Custom VideoWriter Silence Video saved in {}".format(
|
| 55 |
+
os.path.realpath(output_mp4)
|
| 56 |
+
)
|
| 57 |
+
)
|
| 58 |
+
video_write.release()
|
| 59 |
+
break
|
| 60 |
+
else:
|
| 61 |
+
if type(state) == bool and state == False:
|
| 62 |
+
logger.error(
|
| 63 |
+
"Custom VideoWriter [{}]任务视频帧队列 -> 异常原因:[{}]".format(
|
| 64 |
+
work_id, reason
|
| 65 |
+
)
|
| 66 |
+
)
|
| 67 |
+
raise CustomError(reason)
|
| 68 |
+
for result_img in value_:
|
| 69 |
+
video_write.write(result_img)
|
| 70 |
+
if video_write is not None:
|
| 71 |
+
video_write.release()
|
| 72 |
+
if watermark_switch == 1 and digital_auth == 1:
|
| 73 |
+
logger.info(
|
| 74 |
+
"Custom VideoWriter [{}]任务需要水印和数字人标识".format(work_id)
|
| 75 |
+
)
|
| 76 |
+
if width > height:
|
| 77 |
+
command = 'ffmpeg -y -i {} -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:(main_h-overlay_h)-10,overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format(
|
| 78 |
+
audio_path,
|
| 79 |
+
output_mp4,
|
| 80 |
+
GlobalConfig.instance().watermark_path,
|
| 81 |
+
GlobalConfig.instance().digital_auth_path,
|
| 82 |
+
result_path,
|
| 83 |
+
)
|
| 84 |
+
logger.info("command:{}".format(command))
|
| 85 |
+
else:
|
| 86 |
+
command = 'ffmpeg -y -i {} -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:(main_h-overlay_h)-10,overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format(
|
| 87 |
+
audio_path,
|
| 88 |
+
output_mp4,
|
| 89 |
+
GlobalConfig.instance().watermark_path,
|
| 90 |
+
GlobalConfig.instance().digital_auth_path,
|
| 91 |
+
result_path,
|
| 92 |
+
)
|
| 93 |
+
logger.info("command:{}".format(command))
|
| 94 |
+
elif watermark_switch == 1 and digital_auth == 0:
|
| 95 |
+
logger.info("Custom VideoWriter [{}]任务需要水印".format(work_id))
|
| 96 |
+
command = 'ffmpeg -y -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:(main_h-overlay_h)-10" -c:a aac -crf 15 -strict -2 {}'.format(
|
| 97 |
+
audio_path,
|
| 98 |
+
output_mp4,
|
| 99 |
+
GlobalConfig.instance().watermark_path,
|
| 100 |
+
result_path,
|
| 101 |
+
)
|
| 102 |
+
logger.info("command:{}".format(command))
|
| 103 |
+
elif watermark_switch == 0 and digital_auth == 1:
|
| 104 |
+
logger.info("Custom VideoWriter [{}]任务需要数字人标识".format(work_id))
|
| 105 |
+
if width > height:
|
| 106 |
+
command = 'ffmpeg -loglevel warning -y -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format(
|
| 107 |
+
audio_path,
|
| 108 |
+
output_mp4,
|
| 109 |
+
GlobalConfig.instance().digital_auth_path,
|
| 110 |
+
result_path,
|
| 111 |
+
)
|
| 112 |
+
logger.info("command:{}".format(command))
|
| 113 |
+
else:
|
| 114 |
+
command = 'ffmpeg -loglevel warning -y -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format(
|
| 115 |
+
audio_path,
|
| 116 |
+
output_mp4,
|
| 117 |
+
GlobalConfig.instance().digital_auth_path,
|
| 118 |
+
result_path,
|
| 119 |
+
)
|
| 120 |
+
logger.info("command:{}".format(command))
|
| 121 |
+
else:
|
| 122 |
+
command = "ffmpeg -loglevel warning -y -i {} -i {} -c:a aac -c:v libx264 -crf 15 -strict -2 {}".format(
|
| 123 |
+
audio_path, output_mp4, result_path
|
| 124 |
+
)
|
| 125 |
+
logger.info("Custom command:{}".format(command))
|
| 126 |
+
subprocess.call(command, shell=True)
|
| 127 |
+
print("###### Custom Video Writer write over")
|
| 128 |
+
print(f"###### Video result saved in {os.path.realpath(result_path)}")
|
| 129 |
+
result_queue.put([True, result_path])
|
| 130 |
+
# temp_queue.put([True, result_path])
|
| 131 |
+
except Exception as e:
|
| 132 |
+
logger.error(
|
| 133 |
+
"Custom VideoWriter [{}]视频帧队列处理异常结束,异常原因:[{}]".format(
|
| 134 |
+
work_id, e.__str__()
|
| 135 |
+
)
|
| 136 |
+
)
|
| 137 |
+
result_queue.put(
|
| 138 |
+
[
|
| 139 |
+
False,
|
| 140 |
+
"[{}]视频帧队列处理异常结束,异常原因:[{}]".format(
|
| 141 |
+
work_id, e.__str__()
|
| 142 |
+
),
|
| 143 |
+
]
|
| 144 |
+
)
|
| 145 |
+
logger.info("Custom VideoWriter 后处理进程结束")
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
service.trans_dh_service.write_video = write_video_gradio
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
class VideoProcessor:
|
| 152 |
+
def __init__(self):
|
| 153 |
+
self.task = service.trans_dh_service.TransDhTask()
|
| 154 |
+
self.basedir = GlobalConfig.instance().result_dir
|
| 155 |
+
self.is_initialized = False
|
| 156 |
+
self._initialize_service()
|
| 157 |
+
print("VideoProcessor init done")
|
| 158 |
+
|
| 159 |
+
def _initialize_service(self):
|
| 160 |
+
logger.info("开始初始化 trans_dh_service...")
|
| 161 |
+
try:
|
| 162 |
+
time.sleep(5)
|
| 163 |
+
logger.info("trans_dh_service 初始化完成。")
|
| 164 |
+
self.is_initialized = True
|
| 165 |
+
except Exception as e:
|
| 166 |
+
logger.error(f"初始化 trans_dh_service 失败: {e}")
|
| 167 |
+
|
| 168 |
+
def process_video(
|
| 169 |
+
self, audio_file, video_file, watermark=False, digital_auth=False
|
| 170 |
+
):
|
| 171 |
+
while not self.is_initialized:
|
| 172 |
+
logger.info("服务尚未完成初始化,等待 1 秒...")
|
| 173 |
+
time.sleep(1)
|
| 174 |
+
work_id = str(uuid.uuid1())
|
| 175 |
+
code = work_id
|
| 176 |
+
temp_dir = os.path.join(GlobalConfig.instance().temp_dir, work_id)
|
| 177 |
+
result_dir = GlobalConfig.instance().result_dir
|
| 178 |
+
video_writer_thread = None
|
| 179 |
+
final_result = None
|
| 180 |
+
|
| 181 |
+
try:
|
| 182 |
+
cap = cv2.VideoCapture(video_file)
|
| 183 |
+
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
| 184 |
+
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
| 185 |
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
| 186 |
+
cap.release()
|
| 187 |
+
|
| 188 |
+
audio_path = audio_file
|
| 189 |
+
video_path = video_file
|
| 190 |
+
|
| 191 |
+
self.task.task_dic[code] = ""
|
| 192 |
+
self.task.work(audio_path, video_path, code, 0, 0, 0, 0)
|
| 193 |
+
|
| 194 |
+
result_path = self.task.task_dic[code][2]
|
| 195 |
+
final_result_dir = os.path.join("result", code)
|
| 196 |
+
os.makedirs(final_result_dir, exist_ok=True)
|
| 197 |
+
os.system(f"mv {result_path} {final_result_dir}")
|
| 198 |
+
os.system(
|
| 199 |
+
f"rm -rf {os.path.join(os.path.dirname(result_path), code + '*.*')}"
|
| 200 |
+
)
|
| 201 |
+
result_path = os.path.realpath(
|
| 202 |
+
os.path.join(final_result_dir, os.path.basename(result_path))
|
| 203 |
+
)
|
| 204 |
+
return result_path
|
| 205 |
+
|
| 206 |
+
except Exception as e:
|
| 207 |
+
logger.error(f"处理视频时发生错误: {e}")
|
| 208 |
+
raise gr.Error(str(e))
|
| 209 |
+
|
| 210 |
+
|
| 211 |
+
if __name__ == "__main__":
|
| 212 |
+
processor = VideoProcessor()
|
| 213 |
+
|
| 214 |
+
inputs = [
|
| 215 |
+
gr.File(label="上传音频文件/upload audio file"),
|
| 216 |
+
gr.File(label="上传视频文件/upload video file"),
|
| 217 |
+
]
|
| 218 |
+
outputs = gr.Video(label="生成的视频/Generated video")
|
| 219 |
+
|
| 220 |
+
title = "数字人视频生成/Digital Human Video Generation"
|
| 221 |
+
description = "上传音频和视频文件,即可生成数字人视频。/Upload audio and video files to generate digital human videos."
|
| 222 |
+
|
| 223 |
+
demo = gr.Interface(
|
| 224 |
+
fn=processor.process_video,
|
| 225 |
+
inputs=inputs,
|
| 226 |
+
outputs=outputs,
|
| 227 |
+
title=title,
|
| 228 |
+
description=description,
|
| 229 |
+
)
|
| 230 |
+
demo.queue().launch()
|
config/config.ini
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[log]
|
| 2 |
+
log_dir = ./log
|
| 3 |
+
log_file = dh.log
|
| 4 |
+
|
| 5 |
+
[http_server]
|
| 6 |
+
server_ip = 0.0.0.0
|
| 7 |
+
server_port = 8383
|
| 8 |
+
|
| 9 |
+
[temp]
|
| 10 |
+
temp_dir = ./
|
| 11 |
+
clean_switch = 1
|
| 12 |
+
|
| 13 |
+
[result]
|
| 14 |
+
result_dir = ./result
|
| 15 |
+
clean_switch = 0
|
| 16 |
+
|
| 17 |
+
[digital]
|
| 18 |
+
batch_size = 4
|
| 19 |
+
|
| 20 |
+
[register]
|
| 21 |
+
url = http://172.16.160.51:12120
|
| 22 |
+
report_interval = 10
|
| 23 |
+
enable=0
|
download.sh
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
set -e
|
| 2 |
+
set -u
|
| 3 |
+
|
| 4 |
+
# face attr
|
| 5 |
+
mkdir -p face_attr_detect
|
| 6 |
+
wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/face_attr_epoch_12_220318.onnx -O face_attr_detect/face_attr_epoch_12_220318.onnx
|
| 7 |
+
|
| 8 |
+
# face detect
|
| 9 |
+
mkdir -p face_detect_utils/resources
|
| 10 |
+
wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/pfpld_robust_sim_bs1_8003.onnx -O face_detect_utils/resources/pfpld_robust_sim_bs1_8003.onnx
|
| 11 |
+
wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/scrfd_500m_bnkps_shape640x640.onnx -O face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx
|
| 12 |
+
wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/model_float32.onnx -O face_detect_utils/resources/model_float32.onnx
|
| 13 |
+
|
| 14 |
+
# dh model
|
| 15 |
+
mkdir -p landmark2face_wy/checkpoints/anylang
|
| 16 |
+
wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/dinet_v1_20240131.pth -O landmark2face_wy/checkpoints/anylang/dinet_v1_20240131.pth
|
| 17 |
+
|
| 18 |
+
# face parsing
|
| 19 |
+
mkdir -p pretrain_models/face_lib/face_parsing
|
| 20 |
+
wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/79999_iter.onnx -O pretrain_models/face_lib/face_parsing/79999_iter.onnx
|
| 21 |
+
|
| 22 |
+
# gfpgan
|
| 23 |
+
mkdir -p pretrain_models/face_lib/face_restore/gfpgan
|
| 24 |
+
wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/GFPGANv1.4.onnx -O pretrain_models/face_lib/face_restore/gfpgan/GFPGANv1.4.onnx
|
| 25 |
+
|
| 26 |
+
# xseg
|
| 27 |
+
mkdir -p xseg
|
| 28 |
+
wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/xseg_211104_4790000.onnx -O xseg/xseg_211104_4790000.onnx
|
| 29 |
+
|
| 30 |
+
# wenet
|
| 31 |
+
mkdir -p wenet/examples/aishell/aidata/exp/conformer
|
| 32 |
+
wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/wenetmodel.pt -O wenet/examples/aishell/aidata/exp/conformer/wenetmodel.pt
|
example/audio.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:886f4e5cd90b79c8575c8bb18c93d41543b2619272f75841dac095a65c8f85dd
|
| 3 |
+
size 192044
|
example/video.mp4
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a11e32bda4b3d15777ed8d481e66859805e71c5168221d0098eac2b31b3f4e7b
|
| 3 |
+
size 7068410
|
face_attr_detect/.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
face_attr_detect/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
from .face_attr import FaceAttr
|
face_attr_detect/face_attr.cpython-38-x86_64-linux-gnu.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0aa56c5cbaddc1bc7204823fd2252cf54d056365795737c846f876236a3e5056
|
| 3 |
+
size 441864
|
face_attr_detect/face_attr_epoch_12_220318.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0fa6604beacd1e560ffc4cae6fa1537591d5f1a765a9f55473a295a1d22da3af
|
| 3 |
+
size 3723167
|
face_detect_utils/__init__.py
ADDED
|
File without changes
|
face_detect_utils/face_detect.cpython-38-x86_64-linux-gnu.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:68b5fd825eebc9421090c1daf3e940833b7bf5712ecee16deef937c87bbe666e
|
| 3 |
+
size 1363368
|
face_detect_utils/head_pose.cpython-38-x86_64-linux-gnu.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1897346bf14dbbac7328a43598ba0c6d3f4db9ab6628dbebb381d4139aca179e
|
| 3 |
+
size 1158712
|
face_detect_utils/resources/model_float32.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7e5dc9dd52836b2029a599e74134f1a0f03e416db3e40e932f69609adb0238ad
|
| 3 |
+
size 17315434
|
face_detect_utils/resources/pfpld_robust_sim_bs1_8003.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8bd9913817152831562cccde7e51ed431d1cf4547d8f21e0876b58a0d82baa55
|
| 3 |
+
size 6889235
|
face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b467f878e26ff1e7ee05cd9936fdff63fa2a5af5d732ed17ee231f2dd5cc07ae
|
| 3 |
+
size 2524648
|
face_detect_utils/scrfd.cpython-38-x86_64-linux-gnu.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7fa07e8146248e1b4deaafdfa0cc6c1e1b7a9d641db536aa3ae9b9ee10ab3b01
|
| 3 |
+
size 3178688
|
face_lib/__init__.py
ADDED
|
File without changes
|
face_lib/face_detect_and_align/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .face_align_5_landmarks import FaceDetect5Landmarks
|
| 2 |
+
from .face_align_utils import estimate_norm
|
| 3 |
+
|
face_lib/face_detect_and_align/face_align_5_landmarks.cpython-38-x86_64-linux-gnu.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7f0fd0fff60f8fb1fe6cbb1b549d5c43ae9bfaef1e5f4ee4edb27085d3023d22
|
| 3 |
+
size 1321904
|
face_lib/face_detect_and_align/face_align_utils.cpython-38-x86_64-linux-gnu.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:da29cd727e8bf8f7107c322d5d40ef8596b29e2a858cad779362dbf2516c38a0
|
| 3 |
+
size 1611632
|
face_lib/face_detect_and_align/scrfd_insightface/__init__.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -- coding: utf-8 --
|
| 2 |
+
# @Time : 2021/11/10
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
from .scrfd import SCRFD
|
face_lib/face_detect_and_align/scrfd_insightface/scrfd.cpython-38-x86_64-linux-gnu.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a8b9f64f8ef1c198e7d240ba6f1c9e4ff333c48f0259b71e74ca466d5ea274bb
|
| 3 |
+
size 2529880
|
face_lib/face_parsing/__init__.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -- coding: utf-8 --
|
| 2 |
+
# @Time : 2022/3/29
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
from .face_parsing_api import FaceParsing
|
| 6 |
+
# from .dfl_xseg_net import XsegNet
|
face_lib/face_parsing/face_parsing_api.cpython-38-x86_64-linux-gnu.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:04931709d9c22f909e7ead81acb06cae05b70162fbcb6d2055ac7315b61834d3
|
| 3 |
+
size 1053792
|
face_lib/face_restore/__init__.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
from .gfpgan_onnx.gfpgan_onnx_api import GFPGAN
|
face_lib/face_restore/gfpgan_onnx/gfpgan_onnx_api.cpython-38-x86_64-linux-gnu.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b2088f84d33b0a442e7dcb04135939e56d918b8edefd1de0b06340cb38573d1e
|
| 3 |
+
size 567104
|
h_utils/__init__.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/user/bin/env python
|
| 2 |
+
# coding=utf-8
|
| 3 |
+
"""
|
| 4 |
+
@project : dhp-service
|
| 5 |
+
@author : huyi
|
| 6 |
+
@file : __init__.py.py
|
| 7 |
+
@ide : PyCharm
|
| 8 |
+
@time : 2021-08-18 15:45:13
|
| 9 |
+
"""
|
h_utils/custom.cpython-38-x86_64-linux-gnu.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:37ecc1ed06eb9b804f9de3470dbe6780976514d120bde8fed20d67c4cd26fe0e
|
| 3 |
+
size 259136
|
h_utils/obs_client.cpython-38-x86_64-linux-gnu.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7d881bc9a1303697bd912c005c71ccc74bab724f1ade6e1156c2d9ca0277e344
|
| 3 |
+
size 982656
|
h_utils/request_utils.cpython-38-x86_64-linux-gnu.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:215847205ce3a0f416ee3f5d07b4406b88ac0815cdd3e671c0d317c649cf2420
|
| 3 |
+
size 1304616
|
h_utils/sweep_bot.cpython-38-x86_64-linux-gnu.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2fcfcce02dcbc5f9740329c10ab5fd0bfb157f3e6eb2fa4622adb901ac1feab5
|
| 3 |
+
size 607848
|
h_utils/zip_utils.cpython-38-x86_64-linux-gnu.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4baf454a9940278b6696977b71a489ccf7c920faf37340b9968dcddfa582c508
|
| 3 |
+
size 594864
|
landmark2face_wy/audio_handler.cpython-38-x86_64-linux-gnu.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ced0b512524155f205d2b4c6a46a1a63c2d347387b30550a1fd99ccd53df172
|
| 3 |
+
size 1749648
|
landmark2face_wy/checkpoints/anylang/dinet_v1_20240131.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c4568b1f1f2890b4a92edc3f9457af63f908065961233af2125d060cb56fd9e
|
| 3 |
+
size 392392973
|
landmark2face_wy/checkpoints/test/opt.txt
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
----------------- Options ---------------
|
| 2 |
+
aspect_ratio: 1.0
|
| 3 |
+
audio_feature: 3dmm
|
| 4 |
+
batch_size: 16
|
| 5 |
+
checkpoints_dir: ./landmark2face_wy/checkpoints
|
| 6 |
+
crop_size: 256
|
| 7 |
+
dataroot: ./data
|
| 8 |
+
dataset_mode: Facereala3dmm
|
| 9 |
+
direction: AtoB
|
| 10 |
+
display_winsize: 256
|
| 11 |
+
distributed: False
|
| 12 |
+
epoch: latest
|
| 13 |
+
eval: False
|
| 14 |
+
feat_num: 3
|
| 15 |
+
feature_path: ../AnnI_deep3dface_256_contains_id/
|
| 16 |
+
fp16: False
|
| 17 |
+
gpu_ids: 0
|
| 18 |
+
img_size: 256
|
| 19 |
+
init_gain: 0.02
|
| 20 |
+
init_type: normal
|
| 21 |
+
input_nc: 3
|
| 22 |
+
instance_feat: False
|
| 23 |
+
isTrain: False [default: None]
|
| 24 |
+
label_feat: False
|
| 25 |
+
lan_size: 1
|
| 26 |
+
load_features: False
|
| 27 |
+
load_iter: 0 [default: 0]
|
| 28 |
+
load_size: 286
|
| 29 |
+
local_rank: -1
|
| 30 |
+
max_dataset_size: inf
|
| 31 |
+
mfcc0_rate: 0.2
|
| 32 |
+
model: pirender_3dmm_mouth_hd
|
| 33 |
+
model_path: ./landmark2face_wy/checkpoints/anylang/dinet_v1_20240131.pth
|
| 34 |
+
n_blocks: 9
|
| 35 |
+
n_blocks_global: 9
|
| 36 |
+
n_blocks_local: 3
|
| 37 |
+
n_clusters: 10
|
| 38 |
+
n_downsample_E: 4
|
| 39 |
+
n_downsample_global: 4
|
| 40 |
+
n_layers_D: 3
|
| 41 |
+
n_local_enhancers: 1
|
| 42 |
+
name: test
|
| 43 |
+
ndf: 64
|
| 44 |
+
nef: 16
|
| 45 |
+
netD: basic
|
| 46 |
+
netG: pirender
|
| 47 |
+
ngf: 64
|
| 48 |
+
niter_fix_global: 0
|
| 49 |
+
no_dropout: True
|
| 50 |
+
no_flip: False
|
| 51 |
+
no_ganFeat_loss: False
|
| 52 |
+
no_instance: False
|
| 53 |
+
norm: instance
|
| 54 |
+
ntest: inf
|
| 55 |
+
num_D: 2
|
| 56 |
+
num_test: 50
|
| 57 |
+
num_threads: 4
|
| 58 |
+
output_nc: 3
|
| 59 |
+
perceptual_layers: ['relu_1_1', 'relu_2_1', 'relu_3_1', 'relu_4_1', 'relu_5_1']
|
| 60 |
+
perceptual_network: vgg19
|
| 61 |
+
perceptual_num_scales: 4
|
| 62 |
+
perceptual_use_style_loss: True
|
| 63 |
+
perceptual_weights: [4, 4, 4, 4, 4]
|
| 64 |
+
phase: test
|
| 65 |
+
preprocess: resize_and_crop
|
| 66 |
+
resize_size: 512
|
| 67 |
+
results_dir: ./results/
|
| 68 |
+
serial_batches: False
|
| 69 |
+
suffix:
|
| 70 |
+
test_audio_path: None
|
| 71 |
+
test_muban: None
|
| 72 |
+
verbose: False
|
| 73 |
+
weight_style_to_perceptual: 250
|
| 74 |
+
----------------- End -------------------
|
landmark2face_wy/data/Facereala3dmm_dataset.cpython-38-x86_64-linux-gnu.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bfb97b75c48cdbf3cde9f0d6cb9c2e0665b8a0f8f6870a78480263638f8b2bd9
|
| 3 |
+
size 3479728
|
landmark2face_wy/data/Facereala3dmmexp512_dataset.py
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os.path
|
| 2 |
+
import random
|
| 3 |
+
from data.base_dataset import BaseDataset, get_params, get_transform
|
| 4 |
+
import torchvision.transforms as transforms
|
| 5 |
+
from data.image_folder import make_dataset
|
| 6 |
+
from PIL import Image, ImageEnhance
|
| 7 |
+
import numpy as np
|
| 8 |
+
import cv2
|
| 9 |
+
import torch
|
| 10 |
+
import time
|
| 11 |
+
|
| 12 |
+
def get_idts(config_name):
|
| 13 |
+
idts = list()
|
| 14 |
+
with open(os.path.join('../config', config_name + '.txt')) as f:
|
| 15 |
+
for line in f:
|
| 16 |
+
line = line.strip()
|
| 17 |
+
video_name = line.split(':')[0]
|
| 18 |
+
idts.append(video_name)
|
| 19 |
+
return idts
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def obtain_seq_index(index, num_frames):
|
| 23 |
+
seq = list(range(index - 13, index + 13 + 1))
|
| 24 |
+
seq = [min(max(item, 0), num_frames - 1) for item in seq]
|
| 25 |
+
return seq
|
| 26 |
+
|
| 27 |
+
def get_3dmm_feature(img_path, idx, new_dict):
|
| 28 |
+
id = img_path.split('/')[-3]
|
| 29 |
+
features = new_dict[id]
|
| 30 |
+
idx_list = obtain_seq_index(idx, features.shape[0])
|
| 31 |
+
feature = features[idx_list, 80:144]
|
| 32 |
+
# feature[:, -1] = 50
|
| 33 |
+
return np.transpose(feature, (1, 0))
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
class Facereala3dmmexp512Dataset(BaseDataset):
|
| 38 |
+
def __init__(self, opt, mode=None):
|
| 39 |
+
BaseDataset.__init__(self, opt)
|
| 40 |
+
img_size = opt.img_size
|
| 41 |
+
idts = get_idts(opt.name.split('_')[0])
|
| 42 |
+
print("---------load data list--------: ", idts)
|
| 43 |
+
self.new_dict = {}
|
| 44 |
+
if mode == 'train':
|
| 45 |
+
self.labels = []
|
| 46 |
+
self.label_starts = []
|
| 47 |
+
self.label_ends = []
|
| 48 |
+
count = 0
|
| 49 |
+
for idt_name in idts:
|
| 50 |
+
# root = '../AnnVI/feature/{}'.format(idt_name)
|
| 51 |
+
root = os.path.join(opt.feature_path, idt_name)
|
| 52 |
+
feature = np.load(os.path.join(root, '%s.npy' % opt.audio_feature))
|
| 53 |
+
self.new_dict[idt_name] = feature
|
| 54 |
+
if opt.audio_feature == "3dmm":
|
| 55 |
+
training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
|
| 56 |
+
else:
|
| 57 |
+
training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
|
| 58 |
+
training_data = torch.load(training_data_path)
|
| 59 |
+
img_paths = training_data['img_paths']
|
| 60 |
+
features_3dmm = training_data['features_3dmm']
|
| 61 |
+
index = [i[0].split('/')[-1] for i in img_paths]
|
| 62 |
+
|
| 63 |
+
image_dir = '{}/{}_dlib_crop'.format(root, img_size)
|
| 64 |
+
self.label_starts.append(count)
|
| 65 |
+
for img in range(len(index)):
|
| 66 |
+
img_path = os.path.join(image_dir, index[img])
|
| 67 |
+
# idx_list = obtain_seq_index(img, feature.shape[0])
|
| 68 |
+
# self.labels.append([img_path, np.transpose(feature[idx_list, ...], (1, 0))])
|
| 69 |
+
self.labels.append([img_path, features_3dmm[img]])
|
| 70 |
+
count = count + 1
|
| 71 |
+
self.label_ends.append(count)
|
| 72 |
+
|
| 73 |
+
self.label_starts = np.array(self.label_starts)
|
| 74 |
+
self.label_ends = np.array(self.label_ends)
|
| 75 |
+
self.transforms_image = transforms.Compose([transforms.ToTensor(),
|
| 76 |
+
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
|
| 77 |
+
|
| 78 |
+
self.transforms_label = transforms.Compose([transforms.ToTensor(),
|
| 79 |
+
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
|
| 80 |
+
self.shuffle()
|
| 81 |
+
elif mode == 'test':
|
| 82 |
+
self.labels = []
|
| 83 |
+
self.label_starts = []
|
| 84 |
+
self.label_ends = []
|
| 85 |
+
count = 0
|
| 86 |
+
for idt_name in idts:
|
| 87 |
+
# root = '../AnnVI/feature/{}'.format(idt_name)
|
| 88 |
+
root = os.path.join(opt.feature_path, idt_name)
|
| 89 |
+
feature = np.load(os.path.join(root, '%s.npy' % opt.audio_feature))
|
| 90 |
+
self.new_dict[idt_name] = feature
|
| 91 |
+
if opt.audio_feature == "3dmm":
|
| 92 |
+
training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
|
| 93 |
+
else:
|
| 94 |
+
training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
|
| 95 |
+
training_data = torch.load(training_data_path)
|
| 96 |
+
img_paths = training_data['img_paths']
|
| 97 |
+
features_3dmm = training_data['features_3dmm']
|
| 98 |
+
index = [i[0].split('/')[-1] for i in img_paths]
|
| 99 |
+
|
| 100 |
+
image_dir = '{}/{}_dlib_crop'.format(root, img_size)
|
| 101 |
+
self.label_starts.append(count)
|
| 102 |
+
for img in range(len(index)):
|
| 103 |
+
img_path = os.path.join(image_dir, index[img])
|
| 104 |
+
# idx_list = obtain_seq_index(img, feature.shape[0])
|
| 105 |
+
# self.labels.append([img_path, np.transpose(feature[idx_list, ...], (1, 0))])
|
| 106 |
+
self.labels.append([img_path, features_3dmm[img]])
|
| 107 |
+
count = count + 1
|
| 108 |
+
self.label_ends.append(count)
|
| 109 |
+
|
| 110 |
+
self.label_starts = np.array(self.label_starts)
|
| 111 |
+
self.label_ends = np.array(self.label_ends)
|
| 112 |
+
self.transforms_image = transforms.Compose([transforms.ToTensor(),
|
| 113 |
+
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
|
| 114 |
+
|
| 115 |
+
self.transforms_label = transforms.Compose([transforms.ToTensor(),
|
| 116 |
+
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
|
| 117 |
+
self.shuffle()
|
| 118 |
+
|
| 119 |
+
def shuffle(self):
|
| 120 |
+
self.labels_index = list(range(len(self.labels)))
|
| 121 |
+
random.shuffle(self.labels_index)
|
| 122 |
+
|
| 123 |
+
def add_mouth_mask2(self, img):
|
| 124 |
+
mask = np.ones_like(img)
|
| 125 |
+
rect_area = [img.shape[1] // 2 - 60, np.random.randint(226, 246), 30, 256 - 30]
|
| 126 |
+
mask_rect_area = mask[rect_area[0]: rect_area[1], rect_area[2]:rect_area[3]]
|
| 127 |
+
x = np.tile(np.arange(rect_area[1] - rect_area[0])[:, np.newaxis], (1, rect_area[3] - rect_area[2]))
|
| 128 |
+
x = np.flip(x, 0)
|
| 129 |
+
y = np.tile(np.arange(rect_area[3] - rect_area[2])[:, np.newaxis], (1, rect_area[1] - rect_area[0])).transpose()
|
| 130 |
+
zz1 = -y - x + 88 > 0
|
| 131 |
+
zz2 = np.flip(zz1, 1)
|
| 132 |
+
zz = (zz1 + zz2) > 0
|
| 133 |
+
mask[rect_area[0]:rect_area[1], rect_area[2]:rect_area[3]] = np.tile(zz[:, :, np.newaxis], (1, 1, 3)) * 1
|
| 134 |
+
imgm = img * mask
|
| 135 |
+
return imgm
|
| 136 |
+
|
| 137 |
+
def __getitem__(self, index):
|
| 138 |
+
# s1= time.time()
|
| 139 |
+
idx = self.labels_index[index]
|
| 140 |
+
img_path, feature_3dmm_idx= self.labels[idx]
|
| 141 |
+
# print(img_path, feature_3dmm_idx)
|
| 142 |
+
feature_3dmm = get_3dmm_feature(img_path, feature_3dmm_idx, self.new_dict)
|
| 143 |
+
#print(img_path, feature_3dmm_idx, feature_3dmm.shape)
|
| 144 |
+
|
| 145 |
+
img = np.array(Image.open(img_path).convert('RGB'))
|
| 146 |
+
img = np.array(np.clip(img + np.random.randint(-20, 20, size=3, dtype='int8'), 0, 255), dtype='uint8')
|
| 147 |
+
cut_pad1 = np.random.randint(0, 20)
|
| 148 |
+
cut_pad2 = np.random.randint(0, 20)
|
| 149 |
+
img = img[cut_pad1:512 + cut_pad1, cut_pad2:512 + cut_pad2]
|
| 150 |
+
# s2 =time.time()
|
| 151 |
+
# print('get data and read data ', s2-s1)
|
| 152 |
+
mask_B = img.copy()
|
| 153 |
+
# mask_end = np.random.randint(236*2, 250*2)
|
| 154 |
+
# index = np.random.randint(80, 90)
|
| 155 |
+
# mask_B[mask_B.shape[1] // 2 - index:mask_end, 30:-30] = 0
|
| 156 |
+
mask_end = np.random.randint(480, 500)
|
| 157 |
+
index = np.random.randint(15, 30)
|
| 158 |
+
mask_B[index:mask_end, 70:-70] = 0
|
| 159 |
+
img = Image.fromarray(img)
|
| 160 |
+
|
| 161 |
+
mask_B = Image.fromarray(mask_B)
|
| 162 |
+
img = self.transforms_image(img)
|
| 163 |
+
mask_B = self.transforms_image(mask_B)
|
| 164 |
+
|
| 165 |
+
x = np.where((idx >= self.label_starts) * (idx < self.label_ends))[0]
|
| 166 |
+
|
| 167 |
+
audio = torch.tensor(feature_3dmm)
|
| 168 |
+
# s3 = time.time()
|
| 169 |
+
# print('get 3dmm and mask ', s3 - s2)
|
| 170 |
+
# 保证real_A_index不是idx
|
| 171 |
+
max_i = 0
|
| 172 |
+
real_A_index = random.randint(self.label_starts[x], self.label_ends[x] - 1)
|
| 173 |
+
while real_A_index == idx:
|
| 174 |
+
max_i += 1
|
| 175 |
+
real_A_index = random.randint(self.label_starts[x], self.label_ends[x] - 1)
|
| 176 |
+
if max_i > 5:
|
| 177 |
+
break
|
| 178 |
+
|
| 179 |
+
imgA_path, _ = self.labels[real_A_index]
|
| 180 |
+
imgA = np.array(Image.open(imgA_path).convert('RGB'))
|
| 181 |
+
cut_pad1 = np.random.randint(0, 20)
|
| 182 |
+
cut_pad2 = np.random.randint(0, 20)
|
| 183 |
+
imgA = imgA[cut_pad1:256*2 + cut_pad1, cut_pad2:256*2 + cut_pad2]
|
| 184 |
+
|
| 185 |
+
########椭圆##########
|
| 186 |
+
# mask = np.zeros(imgA.shape, dtype=np.uint8)
|
| 187 |
+
# cv2.ellipse(mask, (imgA.shape[1] // 2, imgA.shape[0] // 2 - 165 - cut_pad1),
|
| 188 |
+
# (imgA.shape[1] // 2 + 25, imgA.shape[0]), 0, 0, 360, (255, 255, 255), -1)
|
| 189 |
+
# ROI = cv2.bitwise_and(imgA, mask)
|
| 190 |
+
# imgA = Image.fromarray(ROI)
|
| 191 |
+
#############################
|
| 192 |
+
# imgA[:imgA.shape[1] // 2 - 40 - index2, :] = 0
|
| 193 |
+
imgA = Image.fromarray(imgA)
|
| 194 |
+
imgA = self.transforms_image(imgA)
|
| 195 |
+
# s4 = time.time()
|
| 196 |
+
# print('end time reala ', s4 - s3)
|
| 197 |
+
return {'A': imgA, 'A_label': audio, 'B': img, 'B_label': audio, 'mask_B': mask_B}
|
| 198 |
+
|
| 199 |
+
def __len__(self):
|
| 200 |
+
"""Return the total number of images in the dataset."""
|
| 201 |
+
return len(self.labels)
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
if __name__ == '__main__':
|
| 205 |
+
from options.train_options import TrainOptions
|
| 206 |
+
|
| 207 |
+
opt = TrainOptions().parse()
|
| 208 |
+
dataset = Facereala3dmmDataset(opt)
|
| 209 |
+
dataset_size = len(dataset)
|
| 210 |
+
print(dataset_size)
|
| 211 |
+
for i, data in enumerate(dataset):
|
| 212 |
+
print(data)
|
landmark2face_wy/data/Facereala3dmmexpwenet512_dataset.py
ADDED
|
@@ -0,0 +1,222 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os.path
|
| 2 |
+
import random
|
| 3 |
+
from data.base_dataset import BaseDataset, get_params, get_transform
|
| 4 |
+
import torchvision.transforms as transforms
|
| 5 |
+
from data.image_folder import make_dataset
|
| 6 |
+
from PIL import Image, ImageEnhance
|
| 7 |
+
import numpy as np
|
| 8 |
+
import cv2
|
| 9 |
+
import torch
|
| 10 |
+
import time
|
| 11 |
+
|
| 12 |
+
def get_idts(config_name):
|
| 13 |
+
idts = list()
|
| 14 |
+
with open(os.path.join('../config', config_name + '.txt')) as f:
|
| 15 |
+
for line in f:
|
| 16 |
+
line = line.strip()
|
| 17 |
+
video_name = line.split(':')[0]
|
| 18 |
+
idts.append(video_name)
|
| 19 |
+
return idts
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def obtain_seq_index(index, num_frames):
|
| 23 |
+
seq = list(range(index - 10, index + 9 + 1))
|
| 24 |
+
seq = [min(max(item, 0), num_frames - 1) for item in seq]
|
| 25 |
+
return seq
|
| 26 |
+
|
| 27 |
+
def get_3dmm_feature(img_path, idx, audio_feature, new_dict):
|
| 28 |
+
id = img_path.split('/')[-3]
|
| 29 |
+
features, features1, features1 = new_dict[id]
|
| 30 |
+
idx_list = obtain_seq_index(idx, features.shape[0])
|
| 31 |
+
feature = features[idx_list, 80:144]
|
| 32 |
+
feature1 = features1[:,audio_feature[0]:audio_feature[1]]
|
| 33 |
+
feature = np.concatenate([feature, features[idx_list, -3:], np.transpose(feature1, (1, 0))], 1)
|
| 34 |
+
# print(feature.shape)
|
| 35 |
+
return np.transpose(feature, (1, 0))
|
| 36 |
+
# return feature
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
class Facereala3dmmexpwenet512Dataset(BaseDataset):
|
| 41 |
+
def __init__(self, opt, mode=None):
|
| 42 |
+
BaseDataset.__init__(self, opt)
|
| 43 |
+
img_size = opt.img_size
|
| 44 |
+
idts = get_idts(opt.name.split('_')[0])
|
| 45 |
+
print("---------load data list--------: ", idts)
|
| 46 |
+
self.new_dict = {}
|
| 47 |
+
if mode == 'train':
|
| 48 |
+
self.labels = []
|
| 49 |
+
self.label_starts = []
|
| 50 |
+
self.label_ends = []
|
| 51 |
+
count = 0
|
| 52 |
+
for idt_name in idts:
|
| 53 |
+
# root = '../AnnVI/feature/{}'.format(idt_name)
|
| 54 |
+
root = os.path.join(opt.feature_path, idt_name)
|
| 55 |
+
feature = np.load(os.path.join(root, '%s.npy' % opt.audio_feature))
|
| 56 |
+
feature1 = np.load(os.path.join(root,'audio_wenet_feature.npy'))
|
| 57 |
+
self.new_dict[idt_name] = [feature, feature1, feature1]
|
| 58 |
+
if opt.audio_feature == "3dmm":
|
| 59 |
+
training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
|
| 60 |
+
else:
|
| 61 |
+
training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
|
| 62 |
+
training_data = torch.load(training_data_path)
|
| 63 |
+
img_paths = training_data['img_paths']
|
| 64 |
+
features_3dmm = training_data['features_3dmm']
|
| 65 |
+
audio_features = np.load(os.path.join(root, 'audio_data.npy'), allow_pickle=True)
|
| 66 |
+
audio_features = audio_features.tolist()
|
| 67 |
+
index = [i[0].split('/')[-1] for i in img_paths]
|
| 68 |
+
|
| 69 |
+
image_dir = '{}/{}_dlib_crop'.format(root, img_size)
|
| 70 |
+
self.label_starts.append(count)
|
| 71 |
+
for img in range(len(index)):
|
| 72 |
+
img_path = os.path.join(image_dir, index[img])
|
| 73 |
+
# idx_list = obtain_seq_index(img, feature.shape[0])
|
| 74 |
+
# self.labels.append([img_path, np.transpose(feature[idx_list, ...], (1, 0))])
|
| 75 |
+
if type(features_3dmm[img]) != int:
|
| 76 |
+
print(img_path)
|
| 77 |
+
audio_feature = audio_features[img]
|
| 78 |
+
self.labels.append([img_path, features_3dmm[img], audio_feature])
|
| 79 |
+
count = count + 1
|
| 80 |
+
self.label_ends.append(count)
|
| 81 |
+
|
| 82 |
+
self.label_starts = np.array(self.label_starts)
|
| 83 |
+
self.label_ends = np.array(self.label_ends)
|
| 84 |
+
self.transforms_image = transforms.Compose([transforms.ToTensor(),
|
| 85 |
+
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
|
| 86 |
+
|
| 87 |
+
self.transforms_label = transforms.Compose([transforms.ToTensor(),
|
| 88 |
+
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
|
| 89 |
+
self.shuffle()
|
| 90 |
+
elif mode == 'test':
|
| 91 |
+
self.labels = []
|
| 92 |
+
self.label_starts = []
|
| 93 |
+
self.label_ends = []
|
| 94 |
+
count = 0
|
| 95 |
+
for idt_name in idts:
|
| 96 |
+
# root = '../AnnVI/feature/{}'.format(idt_name)
|
| 97 |
+
root = os.path.join(opt.feature_path, idt_name)
|
| 98 |
+
feature = np.load(os.path.join(root, '%s.npy' % opt.audio_feature))
|
| 99 |
+
self.new_dict[idt_name] = feature
|
| 100 |
+
if opt.audio_feature == "3dmm":
|
| 101 |
+
training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
|
| 102 |
+
else:
|
| 103 |
+
training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
|
| 104 |
+
training_data = torch.load(training_data_path)
|
| 105 |
+
img_paths = training_data['img_paths']
|
| 106 |
+
features_3dmm = training_data['features_3dmm']
|
| 107 |
+
index = [i[0].split('/')[-1] for i in img_paths]
|
| 108 |
+
|
| 109 |
+
image_dir = '{}/{}_dlib_crop'.format(root, img_size)
|
| 110 |
+
self.label_starts.append(count)
|
| 111 |
+
for img in range(len(index)):
|
| 112 |
+
img_path = os.path.join(image_dir, index[img])
|
| 113 |
+
# idx_list = obtain_seq_index(img, feature.shape[0])
|
| 114 |
+
# self.labels.append([img_path, np.transpose(feature[idx_list, ...], (1, 0))])
|
| 115 |
+
self.labels.append([img_path, features_3dmm[img]])
|
| 116 |
+
count = count + 1
|
| 117 |
+
self.label_ends.append(count)
|
| 118 |
+
|
| 119 |
+
self.label_starts = np.array(self.label_starts)
|
| 120 |
+
self.label_ends = np.array(self.label_ends)
|
| 121 |
+
self.transforms_image = transforms.Compose([transforms.ToTensor(),
|
| 122 |
+
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
|
| 123 |
+
|
| 124 |
+
self.transforms_label = transforms.Compose([transforms.ToTensor(),
|
| 125 |
+
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
|
| 126 |
+
self.shuffle()
|
| 127 |
+
|
| 128 |
+
def shuffle(self):
|
| 129 |
+
self.labels_index = list(range(len(self.labels)))
|
| 130 |
+
random.shuffle(self.labels_index)
|
| 131 |
+
|
| 132 |
+
def add_mouth_mask2(self, img):
|
| 133 |
+
mask = np.ones_like(img)
|
| 134 |
+
rect_area = [img.shape[1] // 2 - 60, np.random.randint(226, 246), 30, 256 - 30]
|
| 135 |
+
mask_rect_area = mask[rect_area[0]: rect_area[1], rect_area[2]:rect_area[3]]
|
| 136 |
+
x = np.tile(np.arange(rect_area[1] - rect_area[0])[:, np.newaxis], (1, rect_area[3] - rect_area[2]))
|
| 137 |
+
x = np.flip(x, 0)
|
| 138 |
+
y = np.tile(np.arange(rect_area[3] - rect_area[2])[:, np.newaxis], (1, rect_area[1] - rect_area[0])).transpose()
|
| 139 |
+
zz1 = -y - x + 88 > 0
|
| 140 |
+
zz2 = np.flip(zz1, 1)
|
| 141 |
+
zz = (zz1 + zz2) > 0
|
| 142 |
+
mask[rect_area[0]:rect_area[1], rect_area[2]:rect_area[3]] = np.tile(zz[:, :, np.newaxis], (1, 1, 3)) * 1
|
| 143 |
+
imgm = img * mask
|
| 144 |
+
return imgm
|
| 145 |
+
|
| 146 |
+
def __getitem__(self, index):
|
| 147 |
+
# s1= time.time()
|
| 148 |
+
idx = self.labels_index[index]
|
| 149 |
+
img_path, feature_3dmm_idx, audio_feature= self.labels[idx]
|
| 150 |
+
# print(img_path, feature_3dmm_idx)
|
| 151 |
+
feature_3dmm = get_3dmm_feature(img_path, feature_3dmm_idx, audio_feature, self.new_dict)
|
| 152 |
+
#print(img_path, feature_3dmm_idx, feature_3dmm.shape)
|
| 153 |
+
|
| 154 |
+
img = np.array(Image.open(img_path).convert('RGB'))
|
| 155 |
+
img = np.array(np.clip(img + np.random.randint(-20, 20, size=3, dtype='int8'), 0, 255), dtype='uint8')
|
| 156 |
+
cut_pad1 = np.random.randint(0, 20)
|
| 157 |
+
cut_pad2 = np.random.randint(0, 20)
|
| 158 |
+
img = img[cut_pad1:512 + cut_pad1, cut_pad2:512 + cut_pad2]
|
| 159 |
+
# s2 =time.time()
|
| 160 |
+
# print('get data and read data ', s2-s1)
|
| 161 |
+
mask_B = img.copy()
|
| 162 |
+
# mask_end = np.random.randint(236*2, 250*2)
|
| 163 |
+
# index = np.random.randint(80, 90)
|
| 164 |
+
# mask_B[mask_B.shape[1] // 2 - index:mask_end, 30:-30] = 0
|
| 165 |
+
mask_end = np.random.randint(480, 500)
|
| 166 |
+
index = np.random.randint(15, 30)
|
| 167 |
+
# index = np.random.randint(90, 100)
|
| 168 |
+
mask_B[index:mask_end, 70:-70] = 0
|
| 169 |
+
img = Image.fromarray(img)
|
| 170 |
+
|
| 171 |
+
mask_B = Image.fromarray(mask_B)
|
| 172 |
+
img = self.transforms_image(img)
|
| 173 |
+
mask_B = self.transforms_image(mask_B)
|
| 174 |
+
|
| 175 |
+
x = np.where((idx >= self.label_starts) * (idx < self.label_ends))[0]
|
| 176 |
+
|
| 177 |
+
audio = torch.tensor(feature_3dmm)
|
| 178 |
+
# s3 = time.time()
|
| 179 |
+
# print('get 3dmm and mask ', s3 - s2)
|
| 180 |
+
# 保证real_A_index不是idx
|
| 181 |
+
max_i = 0
|
| 182 |
+
real_A_index = random.randint(self.label_starts[x], self.label_ends[x] - 1)
|
| 183 |
+
while real_A_index == idx:
|
| 184 |
+
max_i += 1
|
| 185 |
+
real_A_index = random.randint(self.label_starts[x], self.label_ends[x] - 1)
|
| 186 |
+
if max_i > 5:
|
| 187 |
+
break
|
| 188 |
+
|
| 189 |
+
imgA_path, _, _ = self.labels[real_A_index]
|
| 190 |
+
imgA = np.array(Image.open(imgA_path).convert('RGB'))
|
| 191 |
+
cut_pad1 = np.random.randint(0, 20)
|
| 192 |
+
cut_pad2 = np.random.randint(0, 20)
|
| 193 |
+
imgA = imgA[cut_pad1:256*2 + cut_pad1, cut_pad2:256*2 + cut_pad2]
|
| 194 |
+
|
| 195 |
+
########椭圆##########
|
| 196 |
+
# mask = np.zeros(imgA.shape, dtype=np.uint8)
|
| 197 |
+
# cv2.ellipse(mask, (imgA.shape[1] // 2, imgA.shape[0] // 2 - 165 - cut_pad1),
|
| 198 |
+
# (imgA.shape[1] // 2 + 25, imgA.shape[0]), 0, 0, 360, (255, 255, 255), -1)
|
| 199 |
+
# ROI = cv2.bitwise_and(imgA, mask)
|
| 200 |
+
# imgA = Image.fromarray(ROI)
|
| 201 |
+
#############################
|
| 202 |
+
# imgA[:imgA.shape[1] // 2 - 40 - index2, :] = 0
|
| 203 |
+
imgA = Image.fromarray(imgA)
|
| 204 |
+
imgA = self.transforms_image(imgA)
|
| 205 |
+
# s4 = time.time()
|
| 206 |
+
# print('end time reala ', s4 - s3)
|
| 207 |
+
return {'A': imgA, 'A_label': audio, 'B': img, 'B_label': audio, 'mask_B': mask_B}
|
| 208 |
+
|
| 209 |
+
def __len__(self):
|
| 210 |
+
"""Return the total number of images in the dataset."""
|
| 211 |
+
return len(self.labels)
|
| 212 |
+
|
| 213 |
+
|
| 214 |
+
if __name__ == '__main__':
|
| 215 |
+
from options.train_options import TrainOptions
|
| 216 |
+
|
| 217 |
+
opt = TrainOptions().parse()
|
| 218 |
+
dataset = Facereala3dmmDataset(opt)
|
| 219 |
+
dataset_size = len(dataset)
|
| 220 |
+
print(dataset_size)
|
| 221 |
+
for i, data in enumerate(dataset):
|
| 222 |
+
print(data)
|
landmark2face_wy/data/__init__.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""This package includes all the modules related to data loading and preprocessing
|
| 2 |
+
|
| 3 |
+
To add a custom dataset class called 'dummy', you need to add a file called 'dummy_dataset.py' and define a subclass 'DummyDataset' inherited from BaseDataset.
|
| 4 |
+
You need to implement four functions:
|
| 5 |
+
-- <__init__>: initialize the class, first call BaseDataset.__init__(self, opt).
|
| 6 |
+
-- <__len__>: return the size of dataset.
|
| 7 |
+
-- <__getitem__>: get a data point from data loader.
|
| 8 |
+
-- <modify_commandline_options>: (optionally) add dataset-specific options and set default options.
|
| 9 |
+
|
| 10 |
+
Now you can use the dataset class by specifying flag '--dataset_mode dummy'.
|
| 11 |
+
See our template dataset class 'template_dataset.py' for more details.
|
| 12 |
+
"""
|
| 13 |
+
import importlib
|
| 14 |
+
import torch.utils.data
|
| 15 |
+
from landmark2face_wy.data.base_dataset import BaseDataset
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def find_dataset_using_name(dataset_name):
|
| 19 |
+
"""Import the module "data/[dataset_name]_dataset.py".
|
| 20 |
+
|
| 21 |
+
In the file, the class called DatasetNameDataset() will
|
| 22 |
+
be instantiated. It has to be a subclass of BaseDataset,
|
| 23 |
+
and it is case-insensitive.
|
| 24 |
+
"""
|
| 25 |
+
dataset_filename = "landmark2face_wy.data." + dataset_name + "_dataset"
|
| 26 |
+
datasetlib = importlib.import_module(dataset_filename)
|
| 27 |
+
|
| 28 |
+
dataset = None
|
| 29 |
+
target_dataset_name = dataset_name.replace('_', '') + 'dataset'
|
| 30 |
+
for name, cls in datasetlib.__dict__.items():
|
| 31 |
+
if name.lower() == target_dataset_name.lower() \
|
| 32 |
+
and issubclass(cls, BaseDataset):
|
| 33 |
+
dataset = cls
|
| 34 |
+
|
| 35 |
+
if dataset is None:
|
| 36 |
+
raise NotImplementedError("In %s.py, there should be a subclass of BaseDataset with class name that matches %s in lowercase." % (dataset_filename, target_dataset_name))
|
| 37 |
+
|
| 38 |
+
return dataset
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def get_option_setter(dataset_name):
|
| 42 |
+
"""Return the static method <modify_commandline_options> of the dataset class."""
|
| 43 |
+
dataset_class = find_dataset_using_name(dataset_name)
|
| 44 |
+
return dataset_class.modify_commandline_options
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def create_dataset(opt, mode='train'):
|
| 48 |
+
"""Create a dataset given the option.
|
| 49 |
+
|
| 50 |
+
This function wraps the class CustomDatasetDataLoader.
|
| 51 |
+
This is the main interface between this package and 'train.py'/'test.py'
|
| 52 |
+
|
| 53 |
+
Example:
|
| 54 |
+
>>> from data import create_dataset
|
| 55 |
+
>>> dataset = create_dataset(opt)
|
| 56 |
+
"""
|
| 57 |
+
data_loader = CustomDatasetDataLoader(opt, mode)
|
| 58 |
+
dataset = data_loader.load_data()
|
| 59 |
+
return dataset
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
class CustomDatasetDataLoader():
|
| 63 |
+
"""Wrapper class of Dataset class that performs multi-threaded data loading"""
|
| 64 |
+
|
| 65 |
+
def __init__(self, opt, mode):
|
| 66 |
+
"""Initialize this class
|
| 67 |
+
|
| 68 |
+
Step 1: create a dataset instance given the name [dataset_mode]
|
| 69 |
+
Step 2: create a multi-threaded data loader.
|
| 70 |
+
"""
|
| 71 |
+
self.opt = opt
|
| 72 |
+
dataset_class = find_dataset_using_name(opt.dataset_mode)
|
| 73 |
+
self.dataset = dataset_class(opt, mode)
|
| 74 |
+
print("dataset [%s] was created" % type(self.dataset).__name__)
|
| 75 |
+
if mode == 'test':
|
| 76 |
+
batchsize = opt.batch_size // 2
|
| 77 |
+
else:
|
| 78 |
+
batchsize = opt.batch_size
|
| 79 |
+
print(opt.batch_size)
|
| 80 |
+
if not opt.distributed:
|
| 81 |
+
self.dataloader = torch.utils.data.DataLoader(self.dataset,batch_size=batchsize,
|
| 82 |
+
shuffle=not opt.serial_batches,num_workers=int(opt.num_threads))
|
| 83 |
+
else:
|
| 84 |
+
self.train_sampler = torch.utils.data.distributed.DistributedSampler(self.dataset) ### 数据切分
|
| 85 |
+
self.dataloader = torch.utils.data.DataLoader(self.dataset, batch_size=batchsize, sampler=self.train_sampler, num_workers=int(opt.num_threads), pin_memory=True)
|
| 86 |
+
|
| 87 |
+
def load_data(self):
|
| 88 |
+
return self
|
| 89 |
+
|
| 90 |
+
def __len__(self):
|
| 91 |
+
"""Return the number of data in the dataset"""
|
| 92 |
+
return min(len(self.dataset), self.opt.max_dataset_size)
|
| 93 |
+
|
| 94 |
+
def __iter__(self):
|
| 95 |
+
"""Return a batch of data"""
|
| 96 |
+
for i, data in enumerate(self.dataloader):
|
| 97 |
+
if i * self.opt.batch_size >= self.opt.max_dataset_size:
|
| 98 |
+
break
|
| 99 |
+
yield data
|
landmark2face_wy/data/base_dataset.cpython-38-x86_64-linux-gnu.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b9f9eca68615a251926ce113af4594a8dd1f50644c66be50ff5ab27020569c89
|
| 3 |
+
size 1093920
|
landmark2face_wy/data/image_folder.cpython-38-x86_64-linux-gnu.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:180bb0b0dc195aa073049a4c7630e071577f5607bbb3bd2c8247468ec84c7f6c
|
| 3 |
+
size 860856
|
landmark2face_wy/data/l2faceaudio512_dataset.py
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os.path
|
| 2 |
+
import random
|
| 3 |
+
from data.base_dataset import BaseDataset, get_params, get_transform
|
| 4 |
+
import torchvision.transforms as transforms
|
| 5 |
+
from data.image_folder import make_dataset
|
| 6 |
+
from PIL import Image, ImageEnhance
|
| 7 |
+
import numpy as np
|
| 8 |
+
import cv2
|
| 9 |
+
import torch
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def get_idts(config_name):
|
| 13 |
+
idts = list()
|
| 14 |
+
with open(os.path.join('../config', config_name + '.txt')) as f:
|
| 15 |
+
for line in f:
|
| 16 |
+
line = line.strip()
|
| 17 |
+
idts.append(line)
|
| 18 |
+
return idts
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class L2FaceAudio512Dataset(BaseDataset):
|
| 22 |
+
def __init__(self, opt, mode=None):
|
| 23 |
+
BaseDataset.__init__(self, opt)
|
| 24 |
+
img_size = opt.img_size
|
| 25 |
+
idts = get_idts(opt.name.split('_')[0])
|
| 26 |
+
print("---------load data list--------: ", idts)
|
| 27 |
+
if mode == 'train':
|
| 28 |
+
self.labels = []
|
| 29 |
+
for idt_name in idts:
|
| 30 |
+
# root = '../AnnVI/feature/{}'.format(idt_name)
|
| 31 |
+
root = os.path.join(opt.feature_path, idt_name)
|
| 32 |
+
if opt.audio_feature == "mfcc":
|
| 33 |
+
training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
|
| 34 |
+
else:
|
| 35 |
+
training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
|
| 36 |
+
training_data = torch.load(training_data_path)
|
| 37 |
+
img_paths = training_data['img_paths']
|
| 38 |
+
audio_features = training_data['audio_features']
|
| 39 |
+
index = [i[0].split('/')[-1] for i in img_paths]
|
| 40 |
+
|
| 41 |
+
image_dir = '{}/{}_dlib_crop'.format(root, img_size)
|
| 42 |
+
# label_dir = '{}/512_landmark_crop'.format(root)
|
| 43 |
+
|
| 44 |
+
# if 'man' in opt.name:
|
| 45 |
+
# imgs.sort(key=lambda x:int(x.split('.')[0]))
|
| 46 |
+
# else:
|
| 47 |
+
# imgs.sort(key=lambda x: (int(x.split('.')[0].split('-')[0]), int(x.split('.')[0].split('-')[1])))
|
| 48 |
+
for img in range(len(index)):
|
| 49 |
+
img_path = os.path.join(image_dir, index[img])
|
| 50 |
+
audio_feature = audio_features[img]
|
| 51 |
+
self.labels.append([img_path, audio_feature])
|
| 52 |
+
# transforms.Resize([img_size, img_size], Image.BICUBIC),
|
| 53 |
+
self.transforms_image = transforms.Compose([transforms.ToTensor(),
|
| 54 |
+
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
|
| 55 |
+
# transforms.Resize([img_size, img_size], Image.BICUBIC),
|
| 56 |
+
self.transforms_label = transforms.Compose([transforms.ToTensor(),
|
| 57 |
+
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
|
| 58 |
+
self.shuffle()
|
| 59 |
+
elif mode == 'test':
|
| 60 |
+
self.labels = []
|
| 61 |
+
for idt_name in idts:
|
| 62 |
+
# root = '../AnnVI/feature/{}'.format(idt_name)
|
| 63 |
+
root = os.path.join(opt.feature_path, idt_name)
|
| 64 |
+
if opt.audio_feature == "mfcc":
|
| 65 |
+
training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
|
| 66 |
+
else:
|
| 67 |
+
training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
|
| 68 |
+
training_data = torch.load(training_data_path)
|
| 69 |
+
img_paths = training_data['img_paths']
|
| 70 |
+
audio_features = training_data['audio_features']
|
| 71 |
+
index = [i[0].split('/')[-1] for i in img_paths]
|
| 72 |
+
|
| 73 |
+
image_dir = '{}/{}_dlib_crop'.format(root, img_size)
|
| 74 |
+
# label_dir = '{}/512_landmark_crop'.format(root)
|
| 75 |
+
|
| 76 |
+
# if 'man' in opt.name:
|
| 77 |
+
# imgs.sort(key=lambda x:int(x.split('.')[0]))
|
| 78 |
+
# else:
|
| 79 |
+
# imgs.sort(key=lambda x: (int(x.split('.')[0].split('-')[0]), int(x.split('.')[0].split('-')[1])))
|
| 80 |
+
for img in range(len(index)):
|
| 81 |
+
img_path = os.path.join(image_dir, index[img])
|
| 82 |
+
audio_feature = audio_features[img]
|
| 83 |
+
self.labels.append([img_path, audio_feature])
|
| 84 |
+
# transforms.Resize([img_size, img_size], Image.BICUBIC),
|
| 85 |
+
self.transforms_image = transforms.Compose([transforms.ToTensor(),
|
| 86 |
+
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
|
| 87 |
+
# transforms.Resize([img_size, img_size], Image.BICUBIC),
|
| 88 |
+
self.transforms_label = transforms.Compose([transforms.ToTensor(),
|
| 89 |
+
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
|
| 90 |
+
self.shuffle()
|
| 91 |
+
|
| 92 |
+
def shuffle(self):
|
| 93 |
+
random.shuffle(self.labels)
|
| 94 |
+
|
| 95 |
+
def add_mouth_mask2(self, img):
|
| 96 |
+
mask = np.ones_like(img)
|
| 97 |
+
rect_area = [img.shape[1] // 2 - np.random.randint(50, 60), np.random.randint(226, 246), 30, 256 - 30]
|
| 98 |
+
mask_rect_area = mask[rect_area[0]: rect_area[1], rect_area[2]:rect_area[3]]
|
| 99 |
+
x = np.tile(np.arange(rect_area[1] - rect_area[0])[:, np.newaxis], (1, rect_area[3] - rect_area[2]))
|
| 100 |
+
x = np.flip(x, 0)
|
| 101 |
+
y = np.tile(np.arange(rect_area[3] - rect_area[2])[:, np.newaxis], (1, rect_area[1] - rect_area[0])).transpose()
|
| 102 |
+
zz1 = -y - x + 88 > 0
|
| 103 |
+
zz2 = np.flip(zz1, 1)
|
| 104 |
+
zz = (zz1 + zz2) > 0
|
| 105 |
+
mask[rect_area[0]:rect_area[1], rect_area[2]:rect_area[3]] = np.tile(zz[:, :, np.newaxis], (1, 1, 3)) * 1
|
| 106 |
+
imgm = img * mask
|
| 107 |
+
return imgm
|
| 108 |
+
|
| 109 |
+
def __getitem__(self, index):
|
| 110 |
+
cv2.setNumThreads(0)
|
| 111 |
+
img_path, audio_feature = self.labels[index]
|
| 112 |
+
img = np.array(Image.open(img_path).convert('RGB'))
|
| 113 |
+
img = np.array(np.clip(img + np.random.randint(-20, 20, size=3, dtype='int8'), 0, 255), dtype='uint8')
|
| 114 |
+
cut_pad1 = np.random.randint(0, 20)
|
| 115 |
+
cut_pad2 = np.random.randint(0, 20)
|
| 116 |
+
img = img[cut_pad1:512 + cut_pad1, cut_pad2:512 + cut_pad2]
|
| 117 |
+
|
| 118 |
+
####椭圆mask遮住衣领#####
|
| 119 |
+
'''
|
| 120 |
+
mask = np.zeros(img.shape, dtype=np.uint8)
|
| 121 |
+
cv2.ellipse(mask, (img.shape[1] // 2, img.shape[0] // 2 - 160 - cut_pad1), (img.shape[1] // 2 + 10, img.shape[0]), 0, 0, 360, (255, 255, 255), -1)
|
| 122 |
+
'''
|
| 123 |
+
####mask遮眼睛#####
|
| 124 |
+
mask = np.ones(img.shape, dtype=np.uint8) * 255
|
| 125 |
+
mask[40 - cut_pad1:140 - cut_pad1, 110 - cut_pad2:-110 - cut_pad2] = 0
|
| 126 |
+
img = cv2.bitwise_and(img, mask)
|
| 127 |
+
|
| 128 |
+
mask_B = img.copy()
|
| 129 |
+
mask_B = cv2.resize(mask_B, (256, 256))
|
| 130 |
+
##########脖子分割加mask#############
|
| 131 |
+
# img_edge = cv2.imread(img_path.replace("dlib_crop", "dlib_crop_neck"))
|
| 132 |
+
# img_edge = img_edge[cut_pad1:256 + cut_pad1, cut_pad2:256 + cut_pad2]
|
| 133 |
+
# mask_B = cv2.bitwise_and(img, 255 - img_edge)
|
| 134 |
+
# img_edge[:128, :, :] = img[:128, :, :]
|
| 135 |
+
|
| 136 |
+
##########增加脖子椭圆mask#############
|
| 137 |
+
'''
|
| 138 |
+
maske = np.zeros(img.shape, dtype=np.uint8)
|
| 139 |
+
cv2.ellipse(maske, (img.shape[1] // 2, img.shape[0] // 2 + 50),
|
| 140 |
+
(img.shape[1] // 4 + np.random.randint(-5, 5), img.shape[0] // 3 + np.random.randint(-10, 10)),
|
| 141 |
+
0, 0, 360, (255, 255, 255), -1)
|
| 142 |
+
maske[:img.shape[0] // 2, :, :] = 0
|
| 143 |
+
mask_B = cv2.bitwise_and(mask_B, 255-maske)
|
| 144 |
+
'''
|
| 145 |
+
##########之前老的矩形mask#############
|
| 146 |
+
mask_end = np.random.randint(236, 256)
|
| 147 |
+
mask_B[mask_B.shape[1] // 2 - np.random.randint(40, 50):mask_end, 30:-30] = 0
|
| 148 |
+
##########之前老的矩形mask#############
|
| 149 |
+
##########蔡星宇三角mask#############
|
| 150 |
+
# mask_B = self.add_mouth_mask2(mask_B)
|
| 151 |
+
##########蔡星宇三角mask#############
|
| 152 |
+
# mask_B[mask_B.shape[1] // 2 - 50:, 30:-30] = 0
|
| 153 |
+
img = Image.fromarray(img)
|
| 154 |
+
mask_B = Image.fromarray(mask_B)
|
| 155 |
+
img = self.transforms_image(img)
|
| 156 |
+
mask_B = self.transforms_image(mask_B)
|
| 157 |
+
# lab = Image.open(lab_path).convert('RGB')
|
| 158 |
+
# lab = self.transforms_label(lab)
|
| 159 |
+
audio = np.zeros((256, 256), dtype=np.float32)
|
| 160 |
+
audio_feature = np.array(audio_feature)
|
| 161 |
+
audio[:audio_feature.shape[0], :audio_feature.shape[1]] = audio_feature
|
| 162 |
+
audio = torch.tensor([audio])
|
| 163 |
+
|
| 164 |
+
imgA_path, _ = random.sample(self.labels, 1)[0]
|
| 165 |
+
imgA = np.array(Image.open(imgA_path).convert('RGB'))
|
| 166 |
+
cut_pad1 = np.random.randint(0, 20)
|
| 167 |
+
cut_pad2 = np.random.randint(0, 20)
|
| 168 |
+
imgA = imgA[cut_pad1:512 + cut_pad1, cut_pad2:512 + cut_pad2]
|
| 169 |
+
# mask = np.ones(imgA.shape, dtype=np.uint8) * 255
|
| 170 |
+
# mask[40 - cut_pad1:140 - cut_pad1, 110 - cut_pad2:-110 - cut_pad2] = 0
|
| 171 |
+
imgA = cv2.bitwise_and(imgA, mask)
|
| 172 |
+
imgA = Image.fromarray(imgA)
|
| 173 |
+
imgA = self.transforms_image(imgA)
|
| 174 |
+
return {'A': imgA, 'A_label': audio, 'B': img, 'B_label': audio, 'mask_B': mask_B}
|
| 175 |
+
|
| 176 |
+
def __len__(self):
|
| 177 |
+
"""Return the total number of images in the dataset."""
|
| 178 |
+
return len(self.labels)
|
| 179 |
+
|
| 180 |
+
|
| 181 |
+
if __name__ == '__main__':
|
| 182 |
+
from options.train_options import TrainOptions
|
| 183 |
+
|
| 184 |
+
opt = TrainOptions().parse()
|
| 185 |
+
dataset = L2FaceDataset(opt)
|
| 186 |
+
dataset_size = len(dataset)
|
| 187 |
+
print(dataset_size)
|
| 188 |
+
for i, data in enumerate(dataset):
|
| 189 |
+
print(data)
|
landmark2face_wy/data/l2faceaudio_dataset.py
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os.path
|
| 2 |
+
import random
|
| 3 |
+
from data.base_dataset import BaseDataset, get_params, get_transform
|
| 4 |
+
import torchvision.transforms as transforms
|
| 5 |
+
from data.image_folder import make_dataset
|
| 6 |
+
from PIL import Image, ImageEnhance
|
| 7 |
+
import numpy as np
|
| 8 |
+
import cv2
|
| 9 |
+
import torch
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def get_idts(config_name):
|
| 13 |
+
idts = list()
|
| 14 |
+
with open(os.path.join('../config', config_name + '.txt')) as f:
|
| 15 |
+
for line in f:
|
| 16 |
+
line = line.strip()
|
| 17 |
+
idts.append(line)
|
| 18 |
+
return idts
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class L2FaceAudioDataset(BaseDataset):
|
| 22 |
+
def __init__(self, opt, mode=None):
|
| 23 |
+
BaseDataset.__init__(self, opt)
|
| 24 |
+
img_size = opt.img_size
|
| 25 |
+
idts = get_idts(opt.name.split('_')[0])
|
| 26 |
+
print("---------load data list--------: ", idts)
|
| 27 |
+
if mode == 'train':
|
| 28 |
+
self.labels = []
|
| 29 |
+
for idt_name in idts:
|
| 30 |
+
# root = '../AnnVI/feature/{}'.format(idt_name)
|
| 31 |
+
root = os.path.join(opt.feature_path, idt_name)
|
| 32 |
+
if opt.audio_feature == "mfcc":
|
| 33 |
+
training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
|
| 34 |
+
else:
|
| 35 |
+
training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
|
| 36 |
+
training_data = torch.load(training_data_path)
|
| 37 |
+
img_paths = training_data['img_paths']
|
| 38 |
+
audio_features = training_data['audio_features']
|
| 39 |
+
index = [i[0].split('/')[-1] for i in img_paths]
|
| 40 |
+
|
| 41 |
+
image_dir = '{}/{}_dlib_crop'.format(root, img_size)
|
| 42 |
+
# label_dir = '{}/512_landmark_crop'.format(root)
|
| 43 |
+
|
| 44 |
+
# if 'man' in opt.name:
|
| 45 |
+
# imgs.sort(key=lambda x:int(x.split('.')[0]))
|
| 46 |
+
# else:
|
| 47 |
+
# imgs.sort(key=lambda x: (int(x.split('.')[0].split('-')[0]), int(x.split('.')[0].split('-')[1])))
|
| 48 |
+
for img in range(len(index)):
|
| 49 |
+
img_path = os.path.join(image_dir, index[img])
|
| 50 |
+
audio_feature = audio_features[img]
|
| 51 |
+
self.labels.append([img_path, audio_feature])
|
| 52 |
+
# transforms.Resize([img_size, img_size], Image.BICUBIC),
|
| 53 |
+
self.transforms_image = transforms.Compose([transforms.ToTensor(),
|
| 54 |
+
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
|
| 55 |
+
# transforms.Resize([img_size, img_size], Image.BICUBIC),
|
| 56 |
+
self.transforms_label = transforms.Compose([transforms.ToTensor(),
|
| 57 |
+
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
|
| 58 |
+
self.shuffle()
|
| 59 |
+
elif mode == 'test':
|
| 60 |
+
self.labels = []
|
| 61 |
+
for idt_name in idts:
|
| 62 |
+
# root = '../AnnVI/feature/{}'.format(idt_name)
|
| 63 |
+
root = os.path.join(opt.feature_path, idt_name)
|
| 64 |
+
if opt.audio_feature == "mfcc":
|
| 65 |
+
training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
|
| 66 |
+
else:
|
| 67 |
+
training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
|
| 68 |
+
training_data = torch.load(training_data_path)
|
| 69 |
+
img_paths = training_data['img_paths']
|
| 70 |
+
audio_features = training_data['audio_features']
|
| 71 |
+
index = [i[0].split('/')[-1] for i in img_paths]
|
| 72 |
+
|
| 73 |
+
image_dir = '{}/{}_dlib_crop'.format(root, img_size)
|
| 74 |
+
# label_dir = '{}/512_landmark_crop'.format(root)
|
| 75 |
+
|
| 76 |
+
# if 'man' in opt.name:
|
| 77 |
+
# imgs.sort(key=lambda x:int(x.split('.')[0]))
|
| 78 |
+
# else:
|
| 79 |
+
# imgs.sort(key=lambda x: (int(x.split('.')[0].split('-')[0]), int(x.split('.')[0].split('-')[1])))
|
| 80 |
+
for img in range(len(index)):
|
| 81 |
+
img_path = os.path.join(image_dir, index[img])
|
| 82 |
+
audio_feature = audio_features[img]
|
| 83 |
+
self.labels.append([img_path, audio_feature])
|
| 84 |
+
# transforms.Resize([img_size, img_size], Image.BICUBIC),
|
| 85 |
+
self.transforms_image = transforms.Compose([transforms.ToTensor(),
|
| 86 |
+
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
|
| 87 |
+
# transforms.Resize([img_size, img_size], Image.BICUBIC),
|
| 88 |
+
self.transforms_label = transforms.Compose([transforms.ToTensor(),
|
| 89 |
+
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
|
| 90 |
+
self.shuffle()
|
| 91 |
+
|
| 92 |
+
def shuffle(self):
|
| 93 |
+
random.shuffle(self.labels)
|
| 94 |
+
|
| 95 |
+
def add_mouth_mask2(self, img):
|
| 96 |
+
mask = np.ones_like(img)
|
| 97 |
+
rect_area = [img.shape[1] // 2 - 60, np.random.randint(226, 246), 30, 256 - 30]
|
| 98 |
+
mask_rect_area = mask[rect_area[0]: rect_area[1], rect_area[2]:rect_area[3]]
|
| 99 |
+
x = np.tile(np.arange(rect_area[1] - rect_area[0])[:, np.newaxis], (1, rect_area[3] - rect_area[2]))
|
| 100 |
+
x = np.flip(x, 0)
|
| 101 |
+
y = np.tile(np.arange(rect_area[3] - rect_area[2])[:, np.newaxis], (1, rect_area[1] - rect_area[0])).transpose()
|
| 102 |
+
zz1 = -y - x + 88 > 0
|
| 103 |
+
zz2 = np.flip(zz1, 1)
|
| 104 |
+
zz = (zz1 + zz2) > 0
|
| 105 |
+
mask[rect_area[0]:rect_area[1], rect_area[2]:rect_area[3]] = np.tile(zz[:, :, np.newaxis], (1, 1, 3)) * 1
|
| 106 |
+
imgm = img * mask
|
| 107 |
+
return imgm
|
| 108 |
+
|
| 109 |
+
def __getitem__(self, index):
|
| 110 |
+
cv2.setNumThreads(0)
|
| 111 |
+
img_path, audio_feature = self.labels[index]
|
| 112 |
+
img = np.array(Image.open(img_path).convert('RGB'))
|
| 113 |
+
img = np.array(np.clip(img + np.random.randint(-20, 20, size=3, dtype='int8'), 0, 255), dtype='uint8')
|
| 114 |
+
cut_pad1 = np.random.randint(0, 10)
|
| 115 |
+
cut_pad2 = np.random.randint(0, 10)
|
| 116 |
+
img = img[cut_pad1:256 + cut_pad1, cut_pad2:256 + cut_pad2]
|
| 117 |
+
|
| 118 |
+
####mask遮眼睛#####
|
| 119 |
+
mask = np.ones(img.shape, dtype=np.uint8) * 255
|
| 120 |
+
mask[20 - cut_pad1:70 - cut_pad1, 55 - cut_pad2:-55 - cut_pad2] = 0
|
| 121 |
+
img = cv2.bitwise_and(img, mask)
|
| 122 |
+
|
| 123 |
+
mask_B = img.copy()
|
| 124 |
+
mask_end = np.random.randint(236, 256)
|
| 125 |
+
##########之前老的矩形mask#############
|
| 126 |
+
mask_B[mask_B.shape[1] // 2 - np.random.randint(40, 50):mask_end, 30:-30] = 0
|
| 127 |
+
##########之前老的矩形mask#############
|
| 128 |
+
##########蔡星宇三角mask#############
|
| 129 |
+
# mask_B = self.add_mouth_mask2(mask_B)
|
| 130 |
+
##########蔡星宇三角mask#############
|
| 131 |
+
# mask_B[mask_B.shape[1] // 2 - 50:, 30:-30] = 0
|
| 132 |
+
img = Image.fromarray(img)
|
| 133 |
+
mask_B = Image.fromarray(mask_B)
|
| 134 |
+
img = self.transforms_image(img)
|
| 135 |
+
mask_B = self.transforms_image(mask_B)
|
| 136 |
+
# lab = Image.open(lab_path).convert('RGB')
|
| 137 |
+
# lab = self.transforms_label(lab)
|
| 138 |
+
audio = np.zeros((256, 256), dtype=np.float32)
|
| 139 |
+
audio_feature = np.array(audio_feature)
|
| 140 |
+
audio[:audio_feature.shape[0], :audio_feature.shape[1]] = audio_feature
|
| 141 |
+
audio = torch.tensor([audio])
|
| 142 |
+
|
| 143 |
+
imgA_path, _ = random.sample(self.labels, 1)[0]
|
| 144 |
+
imgA = np.array(Image.open(imgA_path).convert('RGB'))
|
| 145 |
+
cut_pad1 = np.random.randint(0, 10)
|
| 146 |
+
cut_pad2 = np.random.randint(0, 10)
|
| 147 |
+
imgA = imgA[cut_pad1:256 + cut_pad1, cut_pad2:256 + cut_pad2]
|
| 148 |
+
imgA = cv2.bitwise_and(imgA, mask)
|
| 149 |
+
imgA = Image.fromarray(imgA)
|
| 150 |
+
imgA = self.transforms_image(imgA)
|
| 151 |
+
return {'A': imgA, 'A_label': audio, 'B': img, 'B_label': audio, 'mask_B': mask_B}
|
| 152 |
+
|
| 153 |
+
def __len__(self):
|
| 154 |
+
"""Return the total number of images in the dataset."""
|
| 155 |
+
return len(self.labels)
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
if __name__ == '__main__':
|
| 159 |
+
from options.train_options import TrainOptions
|
| 160 |
+
|
| 161 |
+
opt = TrainOptions().parse()
|
| 162 |
+
dataset = L2FaceDataset(opt)
|
| 163 |
+
dataset_size = len(dataset)
|
| 164 |
+
print(dataset_size)
|
| 165 |
+
for i, data in enumerate(dataset):
|
| 166 |
+
print(data)
|
landmark2face_wy/digitalhuman_interface.cpython-38-x86_64-linux-gnu.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f1bc38a9e2a82a9022573da1e8326128a98a661a17d61283f3911c5ee3aa504a
|
| 3 |
+
size 5284104
|
landmark2face_wy/loss/__init__.py
ADDED
|
File without changes
|
landmark2face_wy/loss/perceptual.cpython-38-x86_64-linux-gnu.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:784d25b825d83d4634932f2628747a4f7f9f6c8bfe84610bc757131810c2e412
|
| 3 |
+
size 2056248
|
landmark2face_wy/models/DINet.cpython-38-x86_64-linux-gnu.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:71be733004b4ba60a93dee4971f4e69eefe575dfc99d8e3dffc3ed160d9ba4d4
|
| 3 |
+
size 10580992
|