Kelton commited on
Commit
3d3198b
·
verified ·
1 Parent(s): e8238ae

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +82 -0
  2. README.md +73 -0
  3. README_en.md +66 -0
  4. app.py +230 -0
  5. config/config.ini +23 -0
  6. download.sh +32 -0
  7. example/audio.wav +3 -0
  8. example/video.mp4 +3 -0
  9. face_attr_detect/.DS_Store +0 -0
  10. face_attr_detect/__init__.py +1 -0
  11. face_attr_detect/face_attr.cpython-38-x86_64-linux-gnu.so +3 -0
  12. face_attr_detect/face_attr_epoch_12_220318.onnx +3 -0
  13. face_detect_utils/__init__.py +0 -0
  14. face_detect_utils/face_detect.cpython-38-x86_64-linux-gnu.so +3 -0
  15. face_detect_utils/head_pose.cpython-38-x86_64-linux-gnu.so +3 -0
  16. face_detect_utils/resources/model_float32.onnx +3 -0
  17. face_detect_utils/resources/pfpld_robust_sim_bs1_8003.onnx +3 -0
  18. face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx +3 -0
  19. face_detect_utils/scrfd.cpython-38-x86_64-linux-gnu.so +3 -0
  20. face_lib/__init__.py +0 -0
  21. face_lib/face_detect_and_align/__init__.py +3 -0
  22. face_lib/face_detect_and_align/face_align_5_landmarks.cpython-38-x86_64-linux-gnu.so +3 -0
  23. face_lib/face_detect_and_align/face_align_utils.cpython-38-x86_64-linux-gnu.so +3 -0
  24. face_lib/face_detect_and_align/scrfd_insightface/__init__.py +5 -0
  25. face_lib/face_detect_and_align/scrfd_insightface/scrfd.cpython-38-x86_64-linux-gnu.so +3 -0
  26. face_lib/face_parsing/__init__.py +6 -0
  27. face_lib/face_parsing/face_parsing_api.cpython-38-x86_64-linux-gnu.so +3 -0
  28. face_lib/face_restore/__init__.py +2 -0
  29. face_lib/face_restore/gfpgan_onnx/gfpgan_onnx_api.cpython-38-x86_64-linux-gnu.so +3 -0
  30. h_utils/__init__.py +9 -0
  31. h_utils/custom.cpython-38-x86_64-linux-gnu.so +3 -0
  32. h_utils/obs_client.cpython-38-x86_64-linux-gnu.so +3 -0
  33. h_utils/request_utils.cpython-38-x86_64-linux-gnu.so +3 -0
  34. h_utils/sweep_bot.cpython-38-x86_64-linux-gnu.so +3 -0
  35. h_utils/zip_utils.cpython-38-x86_64-linux-gnu.so +3 -0
  36. landmark2face_wy/audio_handler.cpython-38-x86_64-linux-gnu.so +3 -0
  37. landmark2face_wy/checkpoints/anylang/dinet_v1_20240131.pth +3 -0
  38. landmark2face_wy/checkpoints/test/opt.txt +74 -0
  39. landmark2face_wy/data/Facereala3dmm_dataset.cpython-38-x86_64-linux-gnu.so +3 -0
  40. landmark2face_wy/data/Facereala3dmmexp512_dataset.py +212 -0
  41. landmark2face_wy/data/Facereala3dmmexpwenet512_dataset.py +222 -0
  42. landmark2face_wy/data/__init__.py +99 -0
  43. landmark2face_wy/data/base_dataset.cpython-38-x86_64-linux-gnu.so +3 -0
  44. landmark2face_wy/data/image_folder.cpython-38-x86_64-linux-gnu.so +3 -0
  45. landmark2face_wy/data/l2faceaudio512_dataset.py +189 -0
  46. landmark2face_wy/data/l2faceaudio_dataset.py +166 -0
  47. landmark2face_wy/digitalhuman_interface.cpython-38-x86_64-linux-gnu.so +3 -0
  48. landmark2face_wy/loss/__init__.py +0 -0
  49. landmark2face_wy/loss/perceptual.cpython-38-x86_64-linux-gnu.so +3 -0
  50. landmark2face_wy/models/DINet.cpython-38-x86_64-linux-gnu.so +3 -0
.gitattributes CHANGED
@@ -33,3 +33,85 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ example/audio.wav filter=lfs diff=lfs merge=lfs -text
37
+ example/video.mp4 filter=lfs diff=lfs merge=lfs -text
38
+ face_attr_detect/face_attr.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
39
+ face_detect_utils/face_detect.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
40
+ face_detect_utils/head_pose.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
41
+ face_detect_utils/scrfd.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
42
+ face_lib/face_detect_and_align/face_align_5_landmarks.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
43
+ face_lib/face_detect_and_align/face_align_utils.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
44
+ face_lib/face_detect_and_align/scrfd_insightface/scrfd.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
45
+ face_lib/face_parsing/face_parsing_api.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
46
+ face_lib/face_restore/gfpgan_onnx/gfpgan_onnx_api.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
47
+ h_utils/custom.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
48
+ h_utils/obs_client.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
49
+ h_utils/request_utils.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
50
+ h_utils/sweep_bot.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
51
+ h_utils/zip_utils.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
52
+ landmark2face_wy/audio_handler.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
53
+ landmark2face_wy/data/Facereala3dmm_dataset.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
54
+ landmark2face_wy/data/base_dataset.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
55
+ landmark2face_wy/data/image_folder.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
56
+ landmark2face_wy/digitalhuman_interface.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
57
+ landmark2face_wy/loss/perceptual.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
58
+ landmark2face_wy/models/DINet.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
59
+ landmark2face_wy/models/base_function.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
60
+ landmark2face_wy/models/base_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
61
+ landmark2face_wy/models/face3d2face_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
62
+ landmark2face_wy/models/face_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
63
+ landmark2face_wy/models/l2faceaudio_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
64
+ landmark2face_wy/models/networks.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
65
+ landmark2face_wy/models/networks_HD.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
66
+ landmark2face_wy/models/networks_pix2pixHD.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
67
+ landmark2face_wy/models/pirender_3dmm_mouth_hd_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
68
+ landmark2face_wy/models/pirender_3dmm_mouth_hdv2_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
69
+ landmark2face_wy/options/base_options.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
70
+ landmark2face_wy/options/test_options.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
71
+ landmark2face_wy/options/train_options.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
72
+ landmark2face_wy/sync_batchnorm/batchnorm.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
73
+ landmark2face_wy/sync_batchnorm/batchnorm_reimpl.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
74
+ landmark2face_wy/sync_batchnorm/comm.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
75
+ landmark2face_wy/sync_batchnorm/replicate.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
76
+ landmark2face_wy/sync_batchnorm/unittest.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
77
+ landmark2face_wy/test_3dmm_multi_exp_wenet.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
78
+ landmark2face_wy/test_3dmm_multi_exp_wenet0.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
79
+ landmark2face_wy/util/flow_util.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
80
+ landmark2face_wy/util/get_data.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
81
+ landmark2face_wy/util/html.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
82
+ landmark2face_wy/util/image_pool.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
83
+ landmark2face_wy/util/util.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
84
+ landmark2face_wy/util/visualizer.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
85
+ model_lib/base_wrapper/onnx_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
86
+ preprocess_audio_and_3dmm.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
87
+ service/server.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
88
+ service/trans_dh_service.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
89
+ wenet/compute_ctc_att_bnf.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
90
+ wenet/transformer/asr_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
91
+ wenet/transformer/attention.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
92
+ wenet/transformer/cmvn.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
93
+ wenet/transformer/convolution.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
94
+ wenet/transformer/ctc.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
95
+ wenet/transformer/decoder.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
96
+ wenet/transformer/decoder_layer.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
97
+ wenet/transformer/embedding.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
98
+ wenet/transformer/encoder.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
99
+ wenet/transformer/encoder_layer.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
100
+ wenet/transformer/label_smoothing_loss.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
101
+ wenet/transformer/positionwise_feed_forward.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
102
+ wenet/transformer/subsampling.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
103
+ wenet/transformer/swish.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
104
+ wenet/utils/checkpoint.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
105
+ wenet/utils/common.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
106
+ wenet/utils/ctc_util.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
107
+ wenet/utils/executor.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
108
+ wenet/utils/mask.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
109
+ wenet/utils/scheduler.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
110
+ xseg/dfl_xseg_api.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
111
+ y_utils/config.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
112
+ y_utils/lcr.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
113
+ y_utils/liblcr.so filter=lfs diff=lfs merge=lfs -text
114
+ y_utils/logger.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
115
+ y_utils/md5.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
116
+ y_utils/time_utils.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
117
+ y_utils/tools.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ [![License](https://img.shields.io/badge/License-View%20License-blue.svg)](https://github.com/GuijiAI/HeyGem.ai/blob/main/LICENSE)
3
+ ![Python](https://img.shields.io/badge/Python-3.8-blue.svg)
4
+ ![Linux](https://img.shields.io/badge/OS-Linux-brightgreen.svg)
5
+
6
+ **[中文](#chinese-version)** | **[English](README_en.md)**
7
+
8
+ ---
9
+
10
+ <a name="chinese-version"></a>
11
+
12
+ # HeyGem-Linux-Python-Hack
13
+
14
+ ## 项目简介
15
+
16
+ [HeyGem-Linux-Python-Hack] 是一个基于 Python 的数字人项目,它从 [HeyGem.ai](https://github.com/GuijiAI/HeyGem.ai) 中提取出来,它能够直接在 Linux 系统上运行,摆脱了对 Docker 和 Windows 系统的依赖。我们的目标是提供一个更易于部署和使用的数字人解决方案。
17
+
18
+ **如果你觉得这个项目对你有帮助,欢迎给我们 Star!**
19
+ **如果运行过程中遇到问题,在查阅已有 Issue 后,在查阅 Google/baidu/ai 后,欢迎提交 Issues!**
20
+
21
+ ## 主要特性
22
+
23
+ * 无需 Docker: 直接在 Linux 系统上运行,简化部署流程。
24
+ * 无需 Windows: 完全基于 Linux 开发和测试。
25
+ * Python 驱动: 使用 Python 语言开发,易于理解和扩展。
26
+ * 开发者友好: 易于使用和扩展。
27
+ * 完全离线。
28
+
29
+ ## 开始使用
30
+
31
+ ### 安装
32
+ 本项目**支持且仅支持 Linux & python3.8 环境**
33
+ 请确保你的 Linux 系统上已经安装了 **Python 3.8**。然后,使用 pip 安装项目依赖项
34
+ 同时也提供一个备用的环境 [requirements_0.txt](requirements_0.txt),遇到问题的话,你可以参考它来建立一个新的环境。
35
+ **具体的 onnxruntime-gpu / torch 等需要结合你的机器上的 cuda 版本去尝试一些组合,否则仍旧可能遇到问题。**
36
+ **请尽量不要询问任何关于 pip 的问题,感谢合作**
37
+
38
+ ```bash
39
+ # 直接安装整个 requirements.txt 不一定成功,更建议跑代码观察报错信息,然后根据报错信息结合 requirements 去尝试安装,祝你顺利。
40
+ # pip install -r requirements.txt
41
+ ```
42
+
43
+ ### 使用
44
+ 把项目克隆到本地
45
+ ```bash
46
+ git clone https://github.com/Holasyb918/HeyGem-Linux-Python-Hack
47
+ cd HeyGem-Linux-Python-Hack
48
+ bash download.sh
49
+ ```
50
+ #### 开始使用
51
+ * repo 中已提供可以用于 demo 的音视频样例,代码可以直接运行。
52
+ #### command:
53
+ ```bash
54
+ python run.py
55
+ ```
56
+
57
+ * 如果要使用自己的数据,可以外部传入参数,请注意,**path 是本地文件,且仅支持相对路径**.
58
+
59
+ #### command:
60
+ ```bash
61
+ python run.py --audio_path example/audio.wav --video_path example/video.mp4
62
+ ```
63
+ #### gradio:
64
+ ```bash
65
+ python app.py
66
+ # 请等待模型初始化完成后提交任务
67
+ ```
68
+
69
+ ## Contributing
70
+ 欢迎贡献!
71
+
72
+ ## License
73
+ 参考 heyGem.ai 的协议.
README_en.md ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ [![License](https://img.shields.io/badge/License-View%20License-blue.svg)](https://github.com/GuijiAI/HeyGem.ai/blob/main/LICENSE)
3
+ ![Python](https://img.shields.io/badge/Python-3.8-blue.svg)
4
+ ![Linux](https://img.shields.io/badge/OS-Linux-brightgreen.svg)
5
+
6
+ **[中文](./readme.md)** | **[English](#english-version)**
7
+
8
+ ---
9
+
10
+ <a name="english-version"></a>
11
+
12
+ # HeyGem-Linux-Python-Hack
13
+
14
+ ## Introduction
15
+
16
+ [HeyGem-Linux-Python-Hack] is a Python-based digital human project extracted from HeyGem.ai. It is designed to run directly on Linux systems, eliminating the need for Docker and Windows. Our goal is to provide a easier-to-deploy, and user-friendly digital human solution.
17
+
18
+ **Feel free to Star us if you find this project useful!**
19
+ **Please submit an Issue if you run into any problems!**
20
+
21
+ ## Key Features
22
+
23
+ * No Docker Required: Runs directly on Linux systems, simplifying the deployment process.
24
+ * No Windows Required: Fully developed and tested on Linux.
25
+ * Python Powered: Developed using the Python language, making it easy to understand and extend.
26
+ * Developer-Friendly: Easy to use, and easy to extend.
27
+
28
+ ## Getting Started
29
+
30
+ ### Installation
31
+
32
+ Please ensure that **Python 3.8** is installed on your Linux system. Then, you can install the project dependencies using pip:
33
+
34
+ ```bash
35
+ pip install -r requirements.txt
36
+ ```
37
+
38
+ ### Usage
39
+ Clone this repository to your local machine:
40
+ ```bash
41
+ git clone https://github.com/Holasyb918/HeyGem-Linux-Python-Hack
42
+ cd HeyGem-Linux-Python-Hack
43
+ bash download.sh
44
+ ```
45
+ #### Getting Started
46
+ * Audio and video examples that can be used for the demo are already provided in the repo, and the code can be run directly.
47
+ #### Command:
48
+ ```bash
49
+ python run.py
50
+ ```
51
+ * If you want to use your own data, you can pass parameters externally. **Please note that the path is a local file and only supports relative paths.**
52
+ #### command:
53
+ ```bash
54
+ python run.py --audio_path example/audio.wav --video_path example/video.mp4
55
+ ```
56
+ #### gradio:
57
+ ```bash
58
+ python app.py
59
+ # Please wait until processor init done.
60
+ ```
61
+
62
+ ## Contributing
63
+ Contributions are welcome!
64
+
65
+ ## License
66
+ This project is licensed under the HeyGem.ai License.
app.py ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import gc
3
+ import json
4
+ import os
5
+
6
+ os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
7
+ import subprocess
8
+ import threading
9
+ import time
10
+ import traceback
11
+ import uuid
12
+ from enum import Enum
13
+ import queue
14
+ import shutil
15
+ from functools import partial
16
+
17
+ import cv2
18
+ import gradio as gr
19
+ from flask import Flask, request
20
+
21
+ import service.trans_dh_service
22
+ from h_utils.custom import CustomError
23
+ from y_utils.config import GlobalConfig
24
+ from y_utils.logger import logger
25
+
26
+
27
+ def write_video_gradio(
28
+ output_imgs_queue,
29
+ temp_dir,
30
+ result_dir,
31
+ work_id,
32
+ audio_path,
33
+ result_queue,
34
+ width,
35
+ height,
36
+ fps,
37
+ watermark_switch=0,
38
+ digital_auth=0,
39
+ temp_queue=None,
40
+ ):
41
+ output_mp4 = os.path.join(temp_dir, "{}-t.mp4".format(work_id))
42
+ fourcc = cv2.VideoWriter_fourcc(*"mp4v")
43
+ result_path = os.path.join(result_dir, "{}-r.mp4".format(work_id))
44
+ video_write = cv2.VideoWriter(output_mp4, fourcc, fps, (width, height))
45
+ print("Custom VideoWriter init done")
46
+ try:
47
+ while True:
48
+ state, reason, value_ = output_imgs_queue.get()
49
+ if type(state) == bool and state == True:
50
+ logger.info(
51
+ "Custom VideoWriter [{}]视频帧队列处理已结束".format(work_id)
52
+ )
53
+ logger.info(
54
+ "Custom VideoWriter Silence Video saved in {}".format(
55
+ os.path.realpath(output_mp4)
56
+ )
57
+ )
58
+ video_write.release()
59
+ break
60
+ else:
61
+ if type(state) == bool and state == False:
62
+ logger.error(
63
+ "Custom VideoWriter [{}]任务视频帧队列 -> 异常原因:[{}]".format(
64
+ work_id, reason
65
+ )
66
+ )
67
+ raise CustomError(reason)
68
+ for result_img in value_:
69
+ video_write.write(result_img)
70
+ if video_write is not None:
71
+ video_write.release()
72
+ if watermark_switch == 1 and digital_auth == 1:
73
+ logger.info(
74
+ "Custom VideoWriter [{}]任务需要水印和数字人标识".format(work_id)
75
+ )
76
+ if width > height:
77
+ command = 'ffmpeg -y -i {} -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:(main_h-overlay_h)-10,overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format(
78
+ audio_path,
79
+ output_mp4,
80
+ GlobalConfig.instance().watermark_path,
81
+ GlobalConfig.instance().digital_auth_path,
82
+ result_path,
83
+ )
84
+ logger.info("command:{}".format(command))
85
+ else:
86
+ command = 'ffmpeg -y -i {} -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:(main_h-overlay_h)-10,overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format(
87
+ audio_path,
88
+ output_mp4,
89
+ GlobalConfig.instance().watermark_path,
90
+ GlobalConfig.instance().digital_auth_path,
91
+ result_path,
92
+ )
93
+ logger.info("command:{}".format(command))
94
+ elif watermark_switch == 1 and digital_auth == 0:
95
+ logger.info("Custom VideoWriter [{}]任务需要水印".format(work_id))
96
+ command = 'ffmpeg -y -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:(main_h-overlay_h)-10" -c:a aac -crf 15 -strict -2 {}'.format(
97
+ audio_path,
98
+ output_mp4,
99
+ GlobalConfig.instance().watermark_path,
100
+ result_path,
101
+ )
102
+ logger.info("command:{}".format(command))
103
+ elif watermark_switch == 0 and digital_auth == 1:
104
+ logger.info("Custom VideoWriter [{}]任务需要数字人标识".format(work_id))
105
+ if width > height:
106
+ command = 'ffmpeg -loglevel warning -y -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format(
107
+ audio_path,
108
+ output_mp4,
109
+ GlobalConfig.instance().digital_auth_path,
110
+ result_path,
111
+ )
112
+ logger.info("command:{}".format(command))
113
+ else:
114
+ command = 'ffmpeg -loglevel warning -y -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format(
115
+ audio_path,
116
+ output_mp4,
117
+ GlobalConfig.instance().digital_auth_path,
118
+ result_path,
119
+ )
120
+ logger.info("command:{}".format(command))
121
+ else:
122
+ command = "ffmpeg -loglevel warning -y -i {} -i {} -c:a aac -c:v libx264 -crf 15 -strict -2 {}".format(
123
+ audio_path, output_mp4, result_path
124
+ )
125
+ logger.info("Custom command:{}".format(command))
126
+ subprocess.call(command, shell=True)
127
+ print("###### Custom Video Writer write over")
128
+ print(f"###### Video result saved in {os.path.realpath(result_path)}")
129
+ result_queue.put([True, result_path])
130
+ # temp_queue.put([True, result_path])
131
+ except Exception as e:
132
+ logger.error(
133
+ "Custom VideoWriter [{}]视频帧队列处理异常结束,异常原因:[{}]".format(
134
+ work_id, e.__str__()
135
+ )
136
+ )
137
+ result_queue.put(
138
+ [
139
+ False,
140
+ "[{}]视频帧队列处理异常结束,异常原因:[{}]".format(
141
+ work_id, e.__str__()
142
+ ),
143
+ ]
144
+ )
145
+ logger.info("Custom VideoWriter 后处理进程结束")
146
+
147
+
148
+ service.trans_dh_service.write_video = write_video_gradio
149
+
150
+
151
+ class VideoProcessor:
152
+ def __init__(self):
153
+ self.task = service.trans_dh_service.TransDhTask()
154
+ self.basedir = GlobalConfig.instance().result_dir
155
+ self.is_initialized = False
156
+ self._initialize_service()
157
+ print("VideoProcessor init done")
158
+
159
+ def _initialize_service(self):
160
+ logger.info("开始初始化 trans_dh_service...")
161
+ try:
162
+ time.sleep(5)
163
+ logger.info("trans_dh_service 初始化完成。")
164
+ self.is_initialized = True
165
+ except Exception as e:
166
+ logger.error(f"初始化 trans_dh_service 失败: {e}")
167
+
168
+ def process_video(
169
+ self, audio_file, video_file, watermark=False, digital_auth=False
170
+ ):
171
+ while not self.is_initialized:
172
+ logger.info("服务尚未完成初始化,等待 1 秒...")
173
+ time.sleep(1)
174
+ work_id = str(uuid.uuid1())
175
+ code = work_id
176
+ temp_dir = os.path.join(GlobalConfig.instance().temp_dir, work_id)
177
+ result_dir = GlobalConfig.instance().result_dir
178
+ video_writer_thread = None
179
+ final_result = None
180
+
181
+ try:
182
+ cap = cv2.VideoCapture(video_file)
183
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
184
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
185
+ fps = cap.get(cv2.CAP_PROP_FPS)
186
+ cap.release()
187
+
188
+ audio_path = audio_file
189
+ video_path = video_file
190
+
191
+ self.task.task_dic[code] = ""
192
+ self.task.work(audio_path, video_path, code, 0, 0, 0, 0)
193
+
194
+ result_path = self.task.task_dic[code][2]
195
+ final_result_dir = os.path.join("result", code)
196
+ os.makedirs(final_result_dir, exist_ok=True)
197
+ os.system(f"mv {result_path} {final_result_dir}")
198
+ os.system(
199
+ f"rm -rf {os.path.join(os.path.dirname(result_path), code + '*.*')}"
200
+ )
201
+ result_path = os.path.realpath(
202
+ os.path.join(final_result_dir, os.path.basename(result_path))
203
+ )
204
+ return result_path
205
+
206
+ except Exception as e:
207
+ logger.error(f"处理视频时发生错误: {e}")
208
+ raise gr.Error(str(e))
209
+
210
+
211
+ if __name__ == "__main__":
212
+ processor = VideoProcessor()
213
+
214
+ inputs = [
215
+ gr.File(label="上传音频文件/upload audio file"),
216
+ gr.File(label="上传视频文件/upload video file"),
217
+ ]
218
+ outputs = gr.Video(label="生成的视频/Generated video")
219
+
220
+ title = "数字人视频生成/Digital Human Video Generation"
221
+ description = "上传音频和视频文件,即可生成数字人视频。/Upload audio and video files to generate digital human videos."
222
+
223
+ demo = gr.Interface(
224
+ fn=processor.process_video,
225
+ inputs=inputs,
226
+ outputs=outputs,
227
+ title=title,
228
+ description=description,
229
+ )
230
+ demo.queue().launch()
config/config.ini ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [log]
2
+ log_dir = ./log
3
+ log_file = dh.log
4
+
5
+ [http_server]
6
+ server_ip = 0.0.0.0
7
+ server_port = 8383
8
+
9
+ [temp]
10
+ temp_dir = ./
11
+ clean_switch = 1
12
+
13
+ [result]
14
+ result_dir = ./result
15
+ clean_switch = 0
16
+
17
+ [digital]
18
+ batch_size = 4
19
+
20
+ [register]
21
+ url = http://172.16.160.51:12120
22
+ report_interval = 10
23
+ enable=0
download.sh ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ set -e
2
+ set -u
3
+
4
+ # face attr
5
+ mkdir -p face_attr_detect
6
+ wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/face_attr_epoch_12_220318.onnx -O face_attr_detect/face_attr_epoch_12_220318.onnx
7
+
8
+ # face detect
9
+ mkdir -p face_detect_utils/resources
10
+ wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/pfpld_robust_sim_bs1_8003.onnx -O face_detect_utils/resources/pfpld_robust_sim_bs1_8003.onnx
11
+ wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/scrfd_500m_bnkps_shape640x640.onnx -O face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx
12
+ wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/model_float32.onnx -O face_detect_utils/resources/model_float32.onnx
13
+
14
+ # dh model
15
+ mkdir -p landmark2face_wy/checkpoints/anylang
16
+ wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/dinet_v1_20240131.pth -O landmark2face_wy/checkpoints/anylang/dinet_v1_20240131.pth
17
+
18
+ # face parsing
19
+ mkdir -p pretrain_models/face_lib/face_parsing
20
+ wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/79999_iter.onnx -O pretrain_models/face_lib/face_parsing/79999_iter.onnx
21
+
22
+ # gfpgan
23
+ mkdir -p pretrain_models/face_lib/face_restore/gfpgan
24
+ wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/GFPGANv1.4.onnx -O pretrain_models/face_lib/face_restore/gfpgan/GFPGANv1.4.onnx
25
+
26
+ # xseg
27
+ mkdir -p xseg
28
+ wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/xseg_211104_4790000.onnx -O xseg/xseg_211104_4790000.onnx
29
+
30
+ # wenet
31
+ mkdir -p wenet/examples/aishell/aidata/exp/conformer
32
+ wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/wenetmodel.pt -O wenet/examples/aishell/aidata/exp/conformer/wenetmodel.pt
example/audio.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:886f4e5cd90b79c8575c8bb18c93d41543b2619272f75841dac095a65c8f85dd
3
+ size 192044
example/video.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a11e32bda4b3d15777ed8d481e66859805e71c5168221d0098eac2b31b3f4e7b
3
+ size 7068410
face_attr_detect/.DS_Store ADDED
Binary file (6.15 kB). View file
 
face_attr_detect/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .face_attr import FaceAttr
face_attr_detect/face_attr.cpython-38-x86_64-linux-gnu.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0aa56c5cbaddc1bc7204823fd2252cf54d056365795737c846f876236a3e5056
3
+ size 441864
face_attr_detect/face_attr_epoch_12_220318.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fa6604beacd1e560ffc4cae6fa1537591d5f1a765a9f55473a295a1d22da3af
3
+ size 3723167
face_detect_utils/__init__.py ADDED
File without changes
face_detect_utils/face_detect.cpython-38-x86_64-linux-gnu.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68b5fd825eebc9421090c1daf3e940833b7bf5712ecee16deef937c87bbe666e
3
+ size 1363368
face_detect_utils/head_pose.cpython-38-x86_64-linux-gnu.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1897346bf14dbbac7328a43598ba0c6d3f4db9ab6628dbebb381d4139aca179e
3
+ size 1158712
face_detect_utils/resources/model_float32.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e5dc9dd52836b2029a599e74134f1a0f03e416db3e40e932f69609adb0238ad
3
+ size 17315434
face_detect_utils/resources/pfpld_robust_sim_bs1_8003.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bd9913817152831562cccde7e51ed431d1cf4547d8f21e0876b58a0d82baa55
3
+ size 6889235
face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b467f878e26ff1e7ee05cd9936fdff63fa2a5af5d732ed17ee231f2dd5cc07ae
3
+ size 2524648
face_detect_utils/scrfd.cpython-38-x86_64-linux-gnu.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fa07e8146248e1b4deaafdfa0cc6c1e1b7a9d641db536aa3ae9b9ee10ab3b01
3
+ size 3178688
face_lib/__init__.py ADDED
File without changes
face_lib/face_detect_and_align/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from .face_align_5_landmarks import FaceDetect5Landmarks
2
+ from .face_align_utils import estimate_norm
3
+
face_lib/face_detect_and_align/face_align_5_landmarks.cpython-38-x86_64-linux-gnu.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f0fd0fff60f8fb1fe6cbb1b549d5c43ae9bfaef1e5f4ee4edb27085d3023d22
3
+ size 1321904
face_lib/face_detect_and_align/face_align_utils.cpython-38-x86_64-linux-gnu.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da29cd727e8bf8f7107c322d5d40ef8596b29e2a858cad779362dbf2516c38a0
3
+ size 1611632
face_lib/face_detect_and_align/scrfd_insightface/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # -- coding: utf-8 --
2
+ # @Time : 2021/11/10
3
+
4
+
5
+ from .scrfd import SCRFD
face_lib/face_detect_and_align/scrfd_insightface/scrfd.cpython-38-x86_64-linux-gnu.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8b9f64f8ef1c198e7d240ba6f1c9e4ff333c48f0259b71e74ca466d5ea274bb
3
+ size 2529880
face_lib/face_parsing/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # -- coding: utf-8 --
2
+ # @Time : 2022/3/29
3
+
4
+
5
+ from .face_parsing_api import FaceParsing
6
+ # from .dfl_xseg_net import XsegNet
face_lib/face_parsing/face_parsing_api.cpython-38-x86_64-linux-gnu.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04931709d9c22f909e7ead81acb06cae05b70162fbcb6d2055ac7315b61834d3
3
+ size 1053792
face_lib/face_restore/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+
2
+ from .gfpgan_onnx.gfpgan_onnx_api import GFPGAN
face_lib/face_restore/gfpgan_onnx/gfpgan_onnx_api.cpython-38-x86_64-linux-gnu.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2088f84d33b0a442e7dcb04135939e56d918b8edefd1de0b06340cb38573d1e
3
+ size 567104
h_utils/__init__.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ #!/user/bin/env python
2
+ # coding=utf-8
3
+ """
4
+ @project : dhp-service
5
+ @author : huyi
6
+ @file : __init__.py.py
7
+ @ide : PyCharm
8
+ @time : 2021-08-18 15:45:13
9
+ """
h_utils/custom.cpython-38-x86_64-linux-gnu.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37ecc1ed06eb9b804f9de3470dbe6780976514d120bde8fed20d67c4cd26fe0e
3
+ size 259136
h_utils/obs_client.cpython-38-x86_64-linux-gnu.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d881bc9a1303697bd912c005c71ccc74bab724f1ade6e1156c2d9ca0277e344
3
+ size 982656
h_utils/request_utils.cpython-38-x86_64-linux-gnu.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:215847205ce3a0f416ee3f5d07b4406b88ac0815cdd3e671c0d317c649cf2420
3
+ size 1304616
h_utils/sweep_bot.cpython-38-x86_64-linux-gnu.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fcfcce02dcbc5f9740329c10ab5fd0bfb157f3e6eb2fa4622adb901ac1feab5
3
+ size 607848
h_utils/zip_utils.cpython-38-x86_64-linux-gnu.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4baf454a9940278b6696977b71a489ccf7c920faf37340b9968dcddfa582c508
3
+ size 594864
landmark2face_wy/audio_handler.cpython-38-x86_64-linux-gnu.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ced0b512524155f205d2b4c6a46a1a63c2d347387b30550a1fd99ccd53df172
3
+ size 1749648
landmark2face_wy/checkpoints/anylang/dinet_v1_20240131.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c4568b1f1f2890b4a92edc3f9457af63f908065961233af2125d060cb56fd9e
3
+ size 392392973
landmark2face_wy/checkpoints/test/opt.txt ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ----------------- Options ---------------
2
+ aspect_ratio: 1.0
3
+ audio_feature: 3dmm
4
+ batch_size: 16
5
+ checkpoints_dir: ./landmark2face_wy/checkpoints
6
+ crop_size: 256
7
+ dataroot: ./data
8
+ dataset_mode: Facereala3dmm
9
+ direction: AtoB
10
+ display_winsize: 256
11
+ distributed: False
12
+ epoch: latest
13
+ eval: False
14
+ feat_num: 3
15
+ feature_path: ../AnnI_deep3dface_256_contains_id/
16
+ fp16: False
17
+ gpu_ids: 0
18
+ img_size: 256
19
+ init_gain: 0.02
20
+ init_type: normal
21
+ input_nc: 3
22
+ instance_feat: False
23
+ isTrain: False [default: None]
24
+ label_feat: False
25
+ lan_size: 1
26
+ load_features: False
27
+ load_iter: 0 [default: 0]
28
+ load_size: 286
29
+ local_rank: -1
30
+ max_dataset_size: inf
31
+ mfcc0_rate: 0.2
32
+ model: pirender_3dmm_mouth_hd
33
+ model_path: ./landmark2face_wy/checkpoints/anylang/dinet_v1_20240131.pth
34
+ n_blocks: 9
35
+ n_blocks_global: 9
36
+ n_blocks_local: 3
37
+ n_clusters: 10
38
+ n_downsample_E: 4
39
+ n_downsample_global: 4
40
+ n_layers_D: 3
41
+ n_local_enhancers: 1
42
+ name: test
43
+ ndf: 64
44
+ nef: 16
45
+ netD: basic
46
+ netG: pirender
47
+ ngf: 64
48
+ niter_fix_global: 0
49
+ no_dropout: True
50
+ no_flip: False
51
+ no_ganFeat_loss: False
52
+ no_instance: False
53
+ norm: instance
54
+ ntest: inf
55
+ num_D: 2
56
+ num_test: 50
57
+ num_threads: 4
58
+ output_nc: 3
59
+ perceptual_layers: ['relu_1_1', 'relu_2_1', 'relu_3_1', 'relu_4_1', 'relu_5_1']
60
+ perceptual_network: vgg19
61
+ perceptual_num_scales: 4
62
+ perceptual_use_style_loss: True
63
+ perceptual_weights: [4, 4, 4, 4, 4]
64
+ phase: test
65
+ preprocess: resize_and_crop
66
+ resize_size: 512
67
+ results_dir: ./results/
68
+ serial_batches: False
69
+ suffix:
70
+ test_audio_path: None
71
+ test_muban: None
72
+ verbose: False
73
+ weight_style_to_perceptual: 250
74
+ ----------------- End -------------------
landmark2face_wy/data/Facereala3dmm_dataset.cpython-38-x86_64-linux-gnu.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfb97b75c48cdbf3cde9f0d6cb9c2e0665b8a0f8f6870a78480263638f8b2bd9
3
+ size 3479728
landmark2face_wy/data/Facereala3dmmexp512_dataset.py ADDED
@@ -0,0 +1,212 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os.path
2
+ import random
3
+ from data.base_dataset import BaseDataset, get_params, get_transform
4
+ import torchvision.transforms as transforms
5
+ from data.image_folder import make_dataset
6
+ from PIL import Image, ImageEnhance
7
+ import numpy as np
8
+ import cv2
9
+ import torch
10
+ import time
11
+
12
+ def get_idts(config_name):
13
+ idts = list()
14
+ with open(os.path.join('../config', config_name + '.txt')) as f:
15
+ for line in f:
16
+ line = line.strip()
17
+ video_name = line.split(':')[0]
18
+ idts.append(video_name)
19
+ return idts
20
+
21
+
22
+ def obtain_seq_index(index, num_frames):
23
+ seq = list(range(index - 13, index + 13 + 1))
24
+ seq = [min(max(item, 0), num_frames - 1) for item in seq]
25
+ return seq
26
+
27
+ def get_3dmm_feature(img_path, idx, new_dict):
28
+ id = img_path.split('/')[-3]
29
+ features = new_dict[id]
30
+ idx_list = obtain_seq_index(idx, features.shape[0])
31
+ feature = features[idx_list, 80:144]
32
+ # feature[:, -1] = 50
33
+ return np.transpose(feature, (1, 0))
34
+
35
+
36
+
37
+ class Facereala3dmmexp512Dataset(BaseDataset):
38
+ def __init__(self, opt, mode=None):
39
+ BaseDataset.__init__(self, opt)
40
+ img_size = opt.img_size
41
+ idts = get_idts(opt.name.split('_')[0])
42
+ print("---------load data list--------: ", idts)
43
+ self.new_dict = {}
44
+ if mode == 'train':
45
+ self.labels = []
46
+ self.label_starts = []
47
+ self.label_ends = []
48
+ count = 0
49
+ for idt_name in idts:
50
+ # root = '../AnnVI/feature/{}'.format(idt_name)
51
+ root = os.path.join(opt.feature_path, idt_name)
52
+ feature = np.load(os.path.join(root, '%s.npy' % opt.audio_feature))
53
+ self.new_dict[idt_name] = feature
54
+ if opt.audio_feature == "3dmm":
55
+ training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
56
+ else:
57
+ training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
58
+ training_data = torch.load(training_data_path)
59
+ img_paths = training_data['img_paths']
60
+ features_3dmm = training_data['features_3dmm']
61
+ index = [i[0].split('/')[-1] for i in img_paths]
62
+
63
+ image_dir = '{}/{}_dlib_crop'.format(root, img_size)
64
+ self.label_starts.append(count)
65
+ for img in range(len(index)):
66
+ img_path = os.path.join(image_dir, index[img])
67
+ # idx_list = obtain_seq_index(img, feature.shape[0])
68
+ # self.labels.append([img_path, np.transpose(feature[idx_list, ...], (1, 0))])
69
+ self.labels.append([img_path, features_3dmm[img]])
70
+ count = count + 1
71
+ self.label_ends.append(count)
72
+
73
+ self.label_starts = np.array(self.label_starts)
74
+ self.label_ends = np.array(self.label_ends)
75
+ self.transforms_image = transforms.Compose([transforms.ToTensor(),
76
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
77
+
78
+ self.transforms_label = transforms.Compose([transforms.ToTensor(),
79
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
80
+ self.shuffle()
81
+ elif mode == 'test':
82
+ self.labels = []
83
+ self.label_starts = []
84
+ self.label_ends = []
85
+ count = 0
86
+ for idt_name in idts:
87
+ # root = '../AnnVI/feature/{}'.format(idt_name)
88
+ root = os.path.join(opt.feature_path, idt_name)
89
+ feature = np.load(os.path.join(root, '%s.npy' % opt.audio_feature))
90
+ self.new_dict[idt_name] = feature
91
+ if opt.audio_feature == "3dmm":
92
+ training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
93
+ else:
94
+ training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
95
+ training_data = torch.load(training_data_path)
96
+ img_paths = training_data['img_paths']
97
+ features_3dmm = training_data['features_3dmm']
98
+ index = [i[0].split('/')[-1] for i in img_paths]
99
+
100
+ image_dir = '{}/{}_dlib_crop'.format(root, img_size)
101
+ self.label_starts.append(count)
102
+ for img in range(len(index)):
103
+ img_path = os.path.join(image_dir, index[img])
104
+ # idx_list = obtain_seq_index(img, feature.shape[0])
105
+ # self.labels.append([img_path, np.transpose(feature[idx_list, ...], (1, 0))])
106
+ self.labels.append([img_path, features_3dmm[img]])
107
+ count = count + 1
108
+ self.label_ends.append(count)
109
+
110
+ self.label_starts = np.array(self.label_starts)
111
+ self.label_ends = np.array(self.label_ends)
112
+ self.transforms_image = transforms.Compose([transforms.ToTensor(),
113
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
114
+
115
+ self.transforms_label = transforms.Compose([transforms.ToTensor(),
116
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
117
+ self.shuffle()
118
+
119
+ def shuffle(self):
120
+ self.labels_index = list(range(len(self.labels)))
121
+ random.shuffle(self.labels_index)
122
+
123
+ def add_mouth_mask2(self, img):
124
+ mask = np.ones_like(img)
125
+ rect_area = [img.shape[1] // 2 - 60, np.random.randint(226, 246), 30, 256 - 30]
126
+ mask_rect_area = mask[rect_area[0]: rect_area[1], rect_area[2]:rect_area[3]]
127
+ x = np.tile(np.arange(rect_area[1] - rect_area[0])[:, np.newaxis], (1, rect_area[3] - rect_area[2]))
128
+ x = np.flip(x, 0)
129
+ y = np.tile(np.arange(rect_area[3] - rect_area[2])[:, np.newaxis], (1, rect_area[1] - rect_area[0])).transpose()
130
+ zz1 = -y - x + 88 > 0
131
+ zz2 = np.flip(zz1, 1)
132
+ zz = (zz1 + zz2) > 0
133
+ mask[rect_area[0]:rect_area[1], rect_area[2]:rect_area[3]] = np.tile(zz[:, :, np.newaxis], (1, 1, 3)) * 1
134
+ imgm = img * mask
135
+ return imgm
136
+
137
+ def __getitem__(self, index):
138
+ # s1= time.time()
139
+ idx = self.labels_index[index]
140
+ img_path, feature_3dmm_idx= self.labels[idx]
141
+ # print(img_path, feature_3dmm_idx)
142
+ feature_3dmm = get_3dmm_feature(img_path, feature_3dmm_idx, self.new_dict)
143
+ #print(img_path, feature_3dmm_idx, feature_3dmm.shape)
144
+
145
+ img = np.array(Image.open(img_path).convert('RGB'))
146
+ img = np.array(np.clip(img + np.random.randint(-20, 20, size=3, dtype='int8'), 0, 255), dtype='uint8')
147
+ cut_pad1 = np.random.randint(0, 20)
148
+ cut_pad2 = np.random.randint(0, 20)
149
+ img = img[cut_pad1:512 + cut_pad1, cut_pad2:512 + cut_pad2]
150
+ # s2 =time.time()
151
+ # print('get data and read data ', s2-s1)
152
+ mask_B = img.copy()
153
+ # mask_end = np.random.randint(236*2, 250*2)
154
+ # index = np.random.randint(80, 90)
155
+ # mask_B[mask_B.shape[1] // 2 - index:mask_end, 30:-30] = 0
156
+ mask_end = np.random.randint(480, 500)
157
+ index = np.random.randint(15, 30)
158
+ mask_B[index:mask_end, 70:-70] = 0
159
+ img = Image.fromarray(img)
160
+
161
+ mask_B = Image.fromarray(mask_B)
162
+ img = self.transforms_image(img)
163
+ mask_B = self.transforms_image(mask_B)
164
+
165
+ x = np.where((idx >= self.label_starts) * (idx < self.label_ends))[0]
166
+
167
+ audio = torch.tensor(feature_3dmm)
168
+ # s3 = time.time()
169
+ # print('get 3dmm and mask ', s3 - s2)
170
+ # 保证real_A_index不是idx
171
+ max_i = 0
172
+ real_A_index = random.randint(self.label_starts[x], self.label_ends[x] - 1)
173
+ while real_A_index == idx:
174
+ max_i += 1
175
+ real_A_index = random.randint(self.label_starts[x], self.label_ends[x] - 1)
176
+ if max_i > 5:
177
+ break
178
+
179
+ imgA_path, _ = self.labels[real_A_index]
180
+ imgA = np.array(Image.open(imgA_path).convert('RGB'))
181
+ cut_pad1 = np.random.randint(0, 20)
182
+ cut_pad2 = np.random.randint(0, 20)
183
+ imgA = imgA[cut_pad1:256*2 + cut_pad1, cut_pad2:256*2 + cut_pad2]
184
+
185
+ ########椭圆##########
186
+ # mask = np.zeros(imgA.shape, dtype=np.uint8)
187
+ # cv2.ellipse(mask, (imgA.shape[1] // 2, imgA.shape[0] // 2 - 165 - cut_pad1),
188
+ # (imgA.shape[1] // 2 + 25, imgA.shape[0]), 0, 0, 360, (255, 255, 255), -1)
189
+ # ROI = cv2.bitwise_and(imgA, mask)
190
+ # imgA = Image.fromarray(ROI)
191
+ #############################
192
+ # imgA[:imgA.shape[1] // 2 - 40 - index2, :] = 0
193
+ imgA = Image.fromarray(imgA)
194
+ imgA = self.transforms_image(imgA)
195
+ # s4 = time.time()
196
+ # print('end time reala ', s4 - s3)
197
+ return {'A': imgA, 'A_label': audio, 'B': img, 'B_label': audio, 'mask_B': mask_B}
198
+
199
+ def __len__(self):
200
+ """Return the total number of images in the dataset."""
201
+ return len(self.labels)
202
+
203
+
204
+ if __name__ == '__main__':
205
+ from options.train_options import TrainOptions
206
+
207
+ opt = TrainOptions().parse()
208
+ dataset = Facereala3dmmDataset(opt)
209
+ dataset_size = len(dataset)
210
+ print(dataset_size)
211
+ for i, data in enumerate(dataset):
212
+ print(data)
landmark2face_wy/data/Facereala3dmmexpwenet512_dataset.py ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os.path
2
+ import random
3
+ from data.base_dataset import BaseDataset, get_params, get_transform
4
+ import torchvision.transforms as transforms
5
+ from data.image_folder import make_dataset
6
+ from PIL import Image, ImageEnhance
7
+ import numpy as np
8
+ import cv2
9
+ import torch
10
+ import time
11
+
12
+ def get_idts(config_name):
13
+ idts = list()
14
+ with open(os.path.join('../config', config_name + '.txt')) as f:
15
+ for line in f:
16
+ line = line.strip()
17
+ video_name = line.split(':')[0]
18
+ idts.append(video_name)
19
+ return idts
20
+
21
+
22
+ def obtain_seq_index(index, num_frames):
23
+ seq = list(range(index - 10, index + 9 + 1))
24
+ seq = [min(max(item, 0), num_frames - 1) for item in seq]
25
+ return seq
26
+
27
+ def get_3dmm_feature(img_path, idx, audio_feature, new_dict):
28
+ id = img_path.split('/')[-3]
29
+ features, features1, features1 = new_dict[id]
30
+ idx_list = obtain_seq_index(idx, features.shape[0])
31
+ feature = features[idx_list, 80:144]
32
+ feature1 = features1[:,audio_feature[0]:audio_feature[1]]
33
+ feature = np.concatenate([feature, features[idx_list, -3:], np.transpose(feature1, (1, 0))], 1)
34
+ # print(feature.shape)
35
+ return np.transpose(feature, (1, 0))
36
+ # return feature
37
+
38
+
39
+
40
+ class Facereala3dmmexpwenet512Dataset(BaseDataset):
41
+ def __init__(self, opt, mode=None):
42
+ BaseDataset.__init__(self, opt)
43
+ img_size = opt.img_size
44
+ idts = get_idts(opt.name.split('_')[0])
45
+ print("---------load data list--------: ", idts)
46
+ self.new_dict = {}
47
+ if mode == 'train':
48
+ self.labels = []
49
+ self.label_starts = []
50
+ self.label_ends = []
51
+ count = 0
52
+ for idt_name in idts:
53
+ # root = '../AnnVI/feature/{}'.format(idt_name)
54
+ root = os.path.join(opt.feature_path, idt_name)
55
+ feature = np.load(os.path.join(root, '%s.npy' % opt.audio_feature))
56
+ feature1 = np.load(os.path.join(root,'audio_wenet_feature.npy'))
57
+ self.new_dict[idt_name] = [feature, feature1, feature1]
58
+ if opt.audio_feature == "3dmm":
59
+ training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
60
+ else:
61
+ training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
62
+ training_data = torch.load(training_data_path)
63
+ img_paths = training_data['img_paths']
64
+ features_3dmm = training_data['features_3dmm']
65
+ audio_features = np.load(os.path.join(root, 'audio_data.npy'), allow_pickle=True)
66
+ audio_features = audio_features.tolist()
67
+ index = [i[0].split('/')[-1] for i in img_paths]
68
+
69
+ image_dir = '{}/{}_dlib_crop'.format(root, img_size)
70
+ self.label_starts.append(count)
71
+ for img in range(len(index)):
72
+ img_path = os.path.join(image_dir, index[img])
73
+ # idx_list = obtain_seq_index(img, feature.shape[0])
74
+ # self.labels.append([img_path, np.transpose(feature[idx_list, ...], (1, 0))])
75
+ if type(features_3dmm[img]) != int:
76
+ print(img_path)
77
+ audio_feature = audio_features[img]
78
+ self.labels.append([img_path, features_3dmm[img], audio_feature])
79
+ count = count + 1
80
+ self.label_ends.append(count)
81
+
82
+ self.label_starts = np.array(self.label_starts)
83
+ self.label_ends = np.array(self.label_ends)
84
+ self.transforms_image = transforms.Compose([transforms.ToTensor(),
85
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
86
+
87
+ self.transforms_label = transforms.Compose([transforms.ToTensor(),
88
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
89
+ self.shuffle()
90
+ elif mode == 'test':
91
+ self.labels = []
92
+ self.label_starts = []
93
+ self.label_ends = []
94
+ count = 0
95
+ for idt_name in idts:
96
+ # root = '../AnnVI/feature/{}'.format(idt_name)
97
+ root = os.path.join(opt.feature_path, idt_name)
98
+ feature = np.load(os.path.join(root, '%s.npy' % opt.audio_feature))
99
+ self.new_dict[idt_name] = feature
100
+ if opt.audio_feature == "3dmm":
101
+ training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
102
+ else:
103
+ training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
104
+ training_data = torch.load(training_data_path)
105
+ img_paths = training_data['img_paths']
106
+ features_3dmm = training_data['features_3dmm']
107
+ index = [i[0].split('/')[-1] for i in img_paths]
108
+
109
+ image_dir = '{}/{}_dlib_crop'.format(root, img_size)
110
+ self.label_starts.append(count)
111
+ for img in range(len(index)):
112
+ img_path = os.path.join(image_dir, index[img])
113
+ # idx_list = obtain_seq_index(img, feature.shape[0])
114
+ # self.labels.append([img_path, np.transpose(feature[idx_list, ...], (1, 0))])
115
+ self.labels.append([img_path, features_3dmm[img]])
116
+ count = count + 1
117
+ self.label_ends.append(count)
118
+
119
+ self.label_starts = np.array(self.label_starts)
120
+ self.label_ends = np.array(self.label_ends)
121
+ self.transforms_image = transforms.Compose([transforms.ToTensor(),
122
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
123
+
124
+ self.transforms_label = transforms.Compose([transforms.ToTensor(),
125
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
126
+ self.shuffle()
127
+
128
+ def shuffle(self):
129
+ self.labels_index = list(range(len(self.labels)))
130
+ random.shuffle(self.labels_index)
131
+
132
+ def add_mouth_mask2(self, img):
133
+ mask = np.ones_like(img)
134
+ rect_area = [img.shape[1] // 2 - 60, np.random.randint(226, 246), 30, 256 - 30]
135
+ mask_rect_area = mask[rect_area[0]: rect_area[1], rect_area[2]:rect_area[3]]
136
+ x = np.tile(np.arange(rect_area[1] - rect_area[0])[:, np.newaxis], (1, rect_area[3] - rect_area[2]))
137
+ x = np.flip(x, 0)
138
+ y = np.tile(np.arange(rect_area[3] - rect_area[2])[:, np.newaxis], (1, rect_area[1] - rect_area[0])).transpose()
139
+ zz1 = -y - x + 88 > 0
140
+ zz2 = np.flip(zz1, 1)
141
+ zz = (zz1 + zz2) > 0
142
+ mask[rect_area[0]:rect_area[1], rect_area[2]:rect_area[3]] = np.tile(zz[:, :, np.newaxis], (1, 1, 3)) * 1
143
+ imgm = img * mask
144
+ return imgm
145
+
146
+ def __getitem__(self, index):
147
+ # s1= time.time()
148
+ idx = self.labels_index[index]
149
+ img_path, feature_3dmm_idx, audio_feature= self.labels[idx]
150
+ # print(img_path, feature_3dmm_idx)
151
+ feature_3dmm = get_3dmm_feature(img_path, feature_3dmm_idx, audio_feature, self.new_dict)
152
+ #print(img_path, feature_3dmm_idx, feature_3dmm.shape)
153
+
154
+ img = np.array(Image.open(img_path).convert('RGB'))
155
+ img = np.array(np.clip(img + np.random.randint(-20, 20, size=3, dtype='int8'), 0, 255), dtype='uint8')
156
+ cut_pad1 = np.random.randint(0, 20)
157
+ cut_pad2 = np.random.randint(0, 20)
158
+ img = img[cut_pad1:512 + cut_pad1, cut_pad2:512 + cut_pad2]
159
+ # s2 =time.time()
160
+ # print('get data and read data ', s2-s1)
161
+ mask_B = img.copy()
162
+ # mask_end = np.random.randint(236*2, 250*2)
163
+ # index = np.random.randint(80, 90)
164
+ # mask_B[mask_B.shape[1] // 2 - index:mask_end, 30:-30] = 0
165
+ mask_end = np.random.randint(480, 500)
166
+ index = np.random.randint(15, 30)
167
+ # index = np.random.randint(90, 100)
168
+ mask_B[index:mask_end, 70:-70] = 0
169
+ img = Image.fromarray(img)
170
+
171
+ mask_B = Image.fromarray(mask_B)
172
+ img = self.transforms_image(img)
173
+ mask_B = self.transforms_image(mask_B)
174
+
175
+ x = np.where((idx >= self.label_starts) * (idx < self.label_ends))[0]
176
+
177
+ audio = torch.tensor(feature_3dmm)
178
+ # s3 = time.time()
179
+ # print('get 3dmm and mask ', s3 - s2)
180
+ # 保证real_A_index不是idx
181
+ max_i = 0
182
+ real_A_index = random.randint(self.label_starts[x], self.label_ends[x] - 1)
183
+ while real_A_index == idx:
184
+ max_i += 1
185
+ real_A_index = random.randint(self.label_starts[x], self.label_ends[x] - 1)
186
+ if max_i > 5:
187
+ break
188
+
189
+ imgA_path, _, _ = self.labels[real_A_index]
190
+ imgA = np.array(Image.open(imgA_path).convert('RGB'))
191
+ cut_pad1 = np.random.randint(0, 20)
192
+ cut_pad2 = np.random.randint(0, 20)
193
+ imgA = imgA[cut_pad1:256*2 + cut_pad1, cut_pad2:256*2 + cut_pad2]
194
+
195
+ ########椭圆##########
196
+ # mask = np.zeros(imgA.shape, dtype=np.uint8)
197
+ # cv2.ellipse(mask, (imgA.shape[1] // 2, imgA.shape[0] // 2 - 165 - cut_pad1),
198
+ # (imgA.shape[1] // 2 + 25, imgA.shape[0]), 0, 0, 360, (255, 255, 255), -1)
199
+ # ROI = cv2.bitwise_and(imgA, mask)
200
+ # imgA = Image.fromarray(ROI)
201
+ #############################
202
+ # imgA[:imgA.shape[1] // 2 - 40 - index2, :] = 0
203
+ imgA = Image.fromarray(imgA)
204
+ imgA = self.transforms_image(imgA)
205
+ # s4 = time.time()
206
+ # print('end time reala ', s4 - s3)
207
+ return {'A': imgA, 'A_label': audio, 'B': img, 'B_label': audio, 'mask_B': mask_B}
208
+
209
+ def __len__(self):
210
+ """Return the total number of images in the dataset."""
211
+ return len(self.labels)
212
+
213
+
214
+ if __name__ == '__main__':
215
+ from options.train_options import TrainOptions
216
+
217
+ opt = TrainOptions().parse()
218
+ dataset = Facereala3dmmDataset(opt)
219
+ dataset_size = len(dataset)
220
+ print(dataset_size)
221
+ for i, data in enumerate(dataset):
222
+ print(data)
landmark2face_wy/data/__init__.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """This package includes all the modules related to data loading and preprocessing
2
+
3
+ To add a custom dataset class called 'dummy', you need to add a file called 'dummy_dataset.py' and define a subclass 'DummyDataset' inherited from BaseDataset.
4
+ You need to implement four functions:
5
+ -- <__init__>: initialize the class, first call BaseDataset.__init__(self, opt).
6
+ -- <__len__>: return the size of dataset.
7
+ -- <__getitem__>: get a data point from data loader.
8
+ -- <modify_commandline_options>: (optionally) add dataset-specific options and set default options.
9
+
10
+ Now you can use the dataset class by specifying flag '--dataset_mode dummy'.
11
+ See our template dataset class 'template_dataset.py' for more details.
12
+ """
13
+ import importlib
14
+ import torch.utils.data
15
+ from landmark2face_wy.data.base_dataset import BaseDataset
16
+
17
+
18
+ def find_dataset_using_name(dataset_name):
19
+ """Import the module "data/[dataset_name]_dataset.py".
20
+
21
+ In the file, the class called DatasetNameDataset() will
22
+ be instantiated. It has to be a subclass of BaseDataset,
23
+ and it is case-insensitive.
24
+ """
25
+ dataset_filename = "landmark2face_wy.data." + dataset_name + "_dataset"
26
+ datasetlib = importlib.import_module(dataset_filename)
27
+
28
+ dataset = None
29
+ target_dataset_name = dataset_name.replace('_', '') + 'dataset'
30
+ for name, cls in datasetlib.__dict__.items():
31
+ if name.lower() == target_dataset_name.lower() \
32
+ and issubclass(cls, BaseDataset):
33
+ dataset = cls
34
+
35
+ if dataset is None:
36
+ raise NotImplementedError("In %s.py, there should be a subclass of BaseDataset with class name that matches %s in lowercase." % (dataset_filename, target_dataset_name))
37
+
38
+ return dataset
39
+
40
+
41
+ def get_option_setter(dataset_name):
42
+ """Return the static method <modify_commandline_options> of the dataset class."""
43
+ dataset_class = find_dataset_using_name(dataset_name)
44
+ return dataset_class.modify_commandline_options
45
+
46
+
47
+ def create_dataset(opt, mode='train'):
48
+ """Create a dataset given the option.
49
+
50
+ This function wraps the class CustomDatasetDataLoader.
51
+ This is the main interface between this package and 'train.py'/'test.py'
52
+
53
+ Example:
54
+ >>> from data import create_dataset
55
+ >>> dataset = create_dataset(opt)
56
+ """
57
+ data_loader = CustomDatasetDataLoader(opt, mode)
58
+ dataset = data_loader.load_data()
59
+ return dataset
60
+
61
+
62
+ class CustomDatasetDataLoader():
63
+ """Wrapper class of Dataset class that performs multi-threaded data loading"""
64
+
65
+ def __init__(self, opt, mode):
66
+ """Initialize this class
67
+
68
+ Step 1: create a dataset instance given the name [dataset_mode]
69
+ Step 2: create a multi-threaded data loader.
70
+ """
71
+ self.opt = opt
72
+ dataset_class = find_dataset_using_name(opt.dataset_mode)
73
+ self.dataset = dataset_class(opt, mode)
74
+ print("dataset [%s] was created" % type(self.dataset).__name__)
75
+ if mode == 'test':
76
+ batchsize = opt.batch_size // 2
77
+ else:
78
+ batchsize = opt.batch_size
79
+ print(opt.batch_size)
80
+ if not opt.distributed:
81
+ self.dataloader = torch.utils.data.DataLoader(self.dataset,batch_size=batchsize,
82
+ shuffle=not opt.serial_batches,num_workers=int(opt.num_threads))
83
+ else:
84
+ self.train_sampler = torch.utils.data.distributed.DistributedSampler(self.dataset) ### 数据切分
85
+ self.dataloader = torch.utils.data.DataLoader(self.dataset, batch_size=batchsize, sampler=self.train_sampler, num_workers=int(opt.num_threads), pin_memory=True)
86
+
87
+ def load_data(self):
88
+ return self
89
+
90
+ def __len__(self):
91
+ """Return the number of data in the dataset"""
92
+ return min(len(self.dataset), self.opt.max_dataset_size)
93
+
94
+ def __iter__(self):
95
+ """Return a batch of data"""
96
+ for i, data in enumerate(self.dataloader):
97
+ if i * self.opt.batch_size >= self.opt.max_dataset_size:
98
+ break
99
+ yield data
landmark2face_wy/data/base_dataset.cpython-38-x86_64-linux-gnu.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9f9eca68615a251926ce113af4594a8dd1f50644c66be50ff5ab27020569c89
3
+ size 1093920
landmark2face_wy/data/image_folder.cpython-38-x86_64-linux-gnu.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:180bb0b0dc195aa073049a4c7630e071577f5607bbb3bd2c8247468ec84c7f6c
3
+ size 860856
landmark2face_wy/data/l2faceaudio512_dataset.py ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os.path
2
+ import random
3
+ from data.base_dataset import BaseDataset, get_params, get_transform
4
+ import torchvision.transforms as transforms
5
+ from data.image_folder import make_dataset
6
+ from PIL import Image, ImageEnhance
7
+ import numpy as np
8
+ import cv2
9
+ import torch
10
+
11
+
12
+ def get_idts(config_name):
13
+ idts = list()
14
+ with open(os.path.join('../config', config_name + '.txt')) as f:
15
+ for line in f:
16
+ line = line.strip()
17
+ idts.append(line)
18
+ return idts
19
+
20
+
21
+ class L2FaceAudio512Dataset(BaseDataset):
22
+ def __init__(self, opt, mode=None):
23
+ BaseDataset.__init__(self, opt)
24
+ img_size = opt.img_size
25
+ idts = get_idts(opt.name.split('_')[0])
26
+ print("---------load data list--------: ", idts)
27
+ if mode == 'train':
28
+ self.labels = []
29
+ for idt_name in idts:
30
+ # root = '../AnnVI/feature/{}'.format(idt_name)
31
+ root = os.path.join(opt.feature_path, idt_name)
32
+ if opt.audio_feature == "mfcc":
33
+ training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
34
+ else:
35
+ training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
36
+ training_data = torch.load(training_data_path)
37
+ img_paths = training_data['img_paths']
38
+ audio_features = training_data['audio_features']
39
+ index = [i[0].split('/')[-1] for i in img_paths]
40
+
41
+ image_dir = '{}/{}_dlib_crop'.format(root, img_size)
42
+ # label_dir = '{}/512_landmark_crop'.format(root)
43
+
44
+ # if 'man' in opt.name:
45
+ # imgs.sort(key=lambda x:int(x.split('.')[0]))
46
+ # else:
47
+ # imgs.sort(key=lambda x: (int(x.split('.')[0].split('-')[0]), int(x.split('.')[0].split('-')[1])))
48
+ for img in range(len(index)):
49
+ img_path = os.path.join(image_dir, index[img])
50
+ audio_feature = audio_features[img]
51
+ self.labels.append([img_path, audio_feature])
52
+ # transforms.Resize([img_size, img_size], Image.BICUBIC),
53
+ self.transforms_image = transforms.Compose([transforms.ToTensor(),
54
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
55
+ # transforms.Resize([img_size, img_size], Image.BICUBIC),
56
+ self.transforms_label = transforms.Compose([transforms.ToTensor(),
57
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
58
+ self.shuffle()
59
+ elif mode == 'test':
60
+ self.labels = []
61
+ for idt_name in idts:
62
+ # root = '../AnnVI/feature/{}'.format(idt_name)
63
+ root = os.path.join(opt.feature_path, idt_name)
64
+ if opt.audio_feature == "mfcc":
65
+ training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
66
+ else:
67
+ training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
68
+ training_data = torch.load(training_data_path)
69
+ img_paths = training_data['img_paths']
70
+ audio_features = training_data['audio_features']
71
+ index = [i[0].split('/')[-1] for i in img_paths]
72
+
73
+ image_dir = '{}/{}_dlib_crop'.format(root, img_size)
74
+ # label_dir = '{}/512_landmark_crop'.format(root)
75
+
76
+ # if 'man' in opt.name:
77
+ # imgs.sort(key=lambda x:int(x.split('.')[0]))
78
+ # else:
79
+ # imgs.sort(key=lambda x: (int(x.split('.')[0].split('-')[0]), int(x.split('.')[0].split('-')[1])))
80
+ for img in range(len(index)):
81
+ img_path = os.path.join(image_dir, index[img])
82
+ audio_feature = audio_features[img]
83
+ self.labels.append([img_path, audio_feature])
84
+ # transforms.Resize([img_size, img_size], Image.BICUBIC),
85
+ self.transforms_image = transforms.Compose([transforms.ToTensor(),
86
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
87
+ # transforms.Resize([img_size, img_size], Image.BICUBIC),
88
+ self.transforms_label = transforms.Compose([transforms.ToTensor(),
89
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
90
+ self.shuffle()
91
+
92
+ def shuffle(self):
93
+ random.shuffle(self.labels)
94
+
95
+ def add_mouth_mask2(self, img):
96
+ mask = np.ones_like(img)
97
+ rect_area = [img.shape[1] // 2 - np.random.randint(50, 60), np.random.randint(226, 246), 30, 256 - 30]
98
+ mask_rect_area = mask[rect_area[0]: rect_area[1], rect_area[2]:rect_area[3]]
99
+ x = np.tile(np.arange(rect_area[1] - rect_area[0])[:, np.newaxis], (1, rect_area[3] - rect_area[2]))
100
+ x = np.flip(x, 0)
101
+ y = np.tile(np.arange(rect_area[3] - rect_area[2])[:, np.newaxis], (1, rect_area[1] - rect_area[0])).transpose()
102
+ zz1 = -y - x + 88 > 0
103
+ zz2 = np.flip(zz1, 1)
104
+ zz = (zz1 + zz2) > 0
105
+ mask[rect_area[0]:rect_area[1], rect_area[2]:rect_area[3]] = np.tile(zz[:, :, np.newaxis], (1, 1, 3)) * 1
106
+ imgm = img * mask
107
+ return imgm
108
+
109
+ def __getitem__(self, index):
110
+ cv2.setNumThreads(0)
111
+ img_path, audio_feature = self.labels[index]
112
+ img = np.array(Image.open(img_path).convert('RGB'))
113
+ img = np.array(np.clip(img + np.random.randint(-20, 20, size=3, dtype='int8'), 0, 255), dtype='uint8')
114
+ cut_pad1 = np.random.randint(0, 20)
115
+ cut_pad2 = np.random.randint(0, 20)
116
+ img = img[cut_pad1:512 + cut_pad1, cut_pad2:512 + cut_pad2]
117
+
118
+ ####椭圆mask遮住衣领#####
119
+ '''
120
+ mask = np.zeros(img.shape, dtype=np.uint8)
121
+ cv2.ellipse(mask, (img.shape[1] // 2, img.shape[0] // 2 - 160 - cut_pad1), (img.shape[1] // 2 + 10, img.shape[0]), 0, 0, 360, (255, 255, 255), -1)
122
+ '''
123
+ ####mask遮眼睛#####
124
+ mask = np.ones(img.shape, dtype=np.uint8) * 255
125
+ mask[40 - cut_pad1:140 - cut_pad1, 110 - cut_pad2:-110 - cut_pad2] = 0
126
+ img = cv2.bitwise_and(img, mask)
127
+
128
+ mask_B = img.copy()
129
+ mask_B = cv2.resize(mask_B, (256, 256))
130
+ ##########脖子分割加mask#############
131
+ # img_edge = cv2.imread(img_path.replace("dlib_crop", "dlib_crop_neck"))
132
+ # img_edge = img_edge[cut_pad1:256 + cut_pad1, cut_pad2:256 + cut_pad2]
133
+ # mask_B = cv2.bitwise_and(img, 255 - img_edge)
134
+ # img_edge[:128, :, :] = img[:128, :, :]
135
+
136
+ ##########增加脖子椭圆mask#############
137
+ '''
138
+ maske = np.zeros(img.shape, dtype=np.uint8)
139
+ cv2.ellipse(maske, (img.shape[1] // 2, img.shape[0] // 2 + 50),
140
+ (img.shape[1] // 4 + np.random.randint(-5, 5), img.shape[0] // 3 + np.random.randint(-10, 10)),
141
+ 0, 0, 360, (255, 255, 255), -1)
142
+ maske[:img.shape[0] // 2, :, :] = 0
143
+ mask_B = cv2.bitwise_and(mask_B, 255-maske)
144
+ '''
145
+ ##########之前老的矩形mask#############
146
+ mask_end = np.random.randint(236, 256)
147
+ mask_B[mask_B.shape[1] // 2 - np.random.randint(40, 50):mask_end, 30:-30] = 0
148
+ ##########之前老的矩形mask#############
149
+ ##########蔡星宇三角mask#############
150
+ # mask_B = self.add_mouth_mask2(mask_B)
151
+ ##########蔡星宇三角mask#############
152
+ # mask_B[mask_B.shape[1] // 2 - 50:, 30:-30] = 0
153
+ img = Image.fromarray(img)
154
+ mask_B = Image.fromarray(mask_B)
155
+ img = self.transforms_image(img)
156
+ mask_B = self.transforms_image(mask_B)
157
+ # lab = Image.open(lab_path).convert('RGB')
158
+ # lab = self.transforms_label(lab)
159
+ audio = np.zeros((256, 256), dtype=np.float32)
160
+ audio_feature = np.array(audio_feature)
161
+ audio[:audio_feature.shape[0], :audio_feature.shape[1]] = audio_feature
162
+ audio = torch.tensor([audio])
163
+
164
+ imgA_path, _ = random.sample(self.labels, 1)[0]
165
+ imgA = np.array(Image.open(imgA_path).convert('RGB'))
166
+ cut_pad1 = np.random.randint(0, 20)
167
+ cut_pad2 = np.random.randint(0, 20)
168
+ imgA = imgA[cut_pad1:512 + cut_pad1, cut_pad2:512 + cut_pad2]
169
+ # mask = np.ones(imgA.shape, dtype=np.uint8) * 255
170
+ # mask[40 - cut_pad1:140 - cut_pad1, 110 - cut_pad2:-110 - cut_pad2] = 0
171
+ imgA = cv2.bitwise_and(imgA, mask)
172
+ imgA = Image.fromarray(imgA)
173
+ imgA = self.transforms_image(imgA)
174
+ return {'A': imgA, 'A_label': audio, 'B': img, 'B_label': audio, 'mask_B': mask_B}
175
+
176
+ def __len__(self):
177
+ """Return the total number of images in the dataset."""
178
+ return len(self.labels)
179
+
180
+
181
+ if __name__ == '__main__':
182
+ from options.train_options import TrainOptions
183
+
184
+ opt = TrainOptions().parse()
185
+ dataset = L2FaceDataset(opt)
186
+ dataset_size = len(dataset)
187
+ print(dataset_size)
188
+ for i, data in enumerate(dataset):
189
+ print(data)
landmark2face_wy/data/l2faceaudio_dataset.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os.path
2
+ import random
3
+ from data.base_dataset import BaseDataset, get_params, get_transform
4
+ import torchvision.transforms as transforms
5
+ from data.image_folder import make_dataset
6
+ from PIL import Image, ImageEnhance
7
+ import numpy as np
8
+ import cv2
9
+ import torch
10
+
11
+
12
+ def get_idts(config_name):
13
+ idts = list()
14
+ with open(os.path.join('../config', config_name + '.txt')) as f:
15
+ for line in f:
16
+ line = line.strip()
17
+ idts.append(line)
18
+ return idts
19
+
20
+
21
+ class L2FaceAudioDataset(BaseDataset):
22
+ def __init__(self, opt, mode=None):
23
+ BaseDataset.__init__(self, opt)
24
+ img_size = opt.img_size
25
+ idts = get_idts(opt.name.split('_')[0])
26
+ print("---------load data list--------: ", idts)
27
+ if mode == 'train':
28
+ self.labels = []
29
+ for idt_name in idts:
30
+ # root = '../AnnVI/feature/{}'.format(idt_name)
31
+ root = os.path.join(opt.feature_path, idt_name)
32
+ if opt.audio_feature == "mfcc":
33
+ training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
34
+ else:
35
+ training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
36
+ training_data = torch.load(training_data_path)
37
+ img_paths = training_data['img_paths']
38
+ audio_features = training_data['audio_features']
39
+ index = [i[0].split('/')[-1] for i in img_paths]
40
+
41
+ image_dir = '{}/{}_dlib_crop'.format(root, img_size)
42
+ # label_dir = '{}/512_landmark_crop'.format(root)
43
+
44
+ # if 'man' in opt.name:
45
+ # imgs.sort(key=lambda x:int(x.split('.')[0]))
46
+ # else:
47
+ # imgs.sort(key=lambda x: (int(x.split('.')[0].split('-')[0]), int(x.split('.')[0].split('-')[1])))
48
+ for img in range(len(index)):
49
+ img_path = os.path.join(image_dir, index[img])
50
+ audio_feature = audio_features[img]
51
+ self.labels.append([img_path, audio_feature])
52
+ # transforms.Resize([img_size, img_size], Image.BICUBIC),
53
+ self.transforms_image = transforms.Compose([transforms.ToTensor(),
54
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
55
+ # transforms.Resize([img_size, img_size], Image.BICUBIC),
56
+ self.transforms_label = transforms.Compose([transforms.ToTensor(),
57
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
58
+ self.shuffle()
59
+ elif mode == 'test':
60
+ self.labels = []
61
+ for idt_name in idts:
62
+ # root = '../AnnVI/feature/{}'.format(idt_name)
63
+ root = os.path.join(opt.feature_path, idt_name)
64
+ if opt.audio_feature == "mfcc":
65
+ training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
66
+ else:
67
+ training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
68
+ training_data = torch.load(training_data_path)
69
+ img_paths = training_data['img_paths']
70
+ audio_features = training_data['audio_features']
71
+ index = [i[0].split('/')[-1] for i in img_paths]
72
+
73
+ image_dir = '{}/{}_dlib_crop'.format(root, img_size)
74
+ # label_dir = '{}/512_landmark_crop'.format(root)
75
+
76
+ # if 'man' in opt.name:
77
+ # imgs.sort(key=lambda x:int(x.split('.')[0]))
78
+ # else:
79
+ # imgs.sort(key=lambda x: (int(x.split('.')[0].split('-')[0]), int(x.split('.')[0].split('-')[1])))
80
+ for img in range(len(index)):
81
+ img_path = os.path.join(image_dir, index[img])
82
+ audio_feature = audio_features[img]
83
+ self.labels.append([img_path, audio_feature])
84
+ # transforms.Resize([img_size, img_size], Image.BICUBIC),
85
+ self.transforms_image = transforms.Compose([transforms.ToTensor(),
86
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
87
+ # transforms.Resize([img_size, img_size], Image.BICUBIC),
88
+ self.transforms_label = transforms.Compose([transforms.ToTensor(),
89
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
90
+ self.shuffle()
91
+
92
+ def shuffle(self):
93
+ random.shuffle(self.labels)
94
+
95
+ def add_mouth_mask2(self, img):
96
+ mask = np.ones_like(img)
97
+ rect_area = [img.shape[1] // 2 - 60, np.random.randint(226, 246), 30, 256 - 30]
98
+ mask_rect_area = mask[rect_area[0]: rect_area[1], rect_area[2]:rect_area[3]]
99
+ x = np.tile(np.arange(rect_area[1] - rect_area[0])[:, np.newaxis], (1, rect_area[3] - rect_area[2]))
100
+ x = np.flip(x, 0)
101
+ y = np.tile(np.arange(rect_area[3] - rect_area[2])[:, np.newaxis], (1, rect_area[1] - rect_area[0])).transpose()
102
+ zz1 = -y - x + 88 > 0
103
+ zz2 = np.flip(zz1, 1)
104
+ zz = (zz1 + zz2) > 0
105
+ mask[rect_area[0]:rect_area[1], rect_area[2]:rect_area[3]] = np.tile(zz[:, :, np.newaxis], (1, 1, 3)) * 1
106
+ imgm = img * mask
107
+ return imgm
108
+
109
+ def __getitem__(self, index):
110
+ cv2.setNumThreads(0)
111
+ img_path, audio_feature = self.labels[index]
112
+ img = np.array(Image.open(img_path).convert('RGB'))
113
+ img = np.array(np.clip(img + np.random.randint(-20, 20, size=3, dtype='int8'), 0, 255), dtype='uint8')
114
+ cut_pad1 = np.random.randint(0, 10)
115
+ cut_pad2 = np.random.randint(0, 10)
116
+ img = img[cut_pad1:256 + cut_pad1, cut_pad2:256 + cut_pad2]
117
+
118
+ ####mask遮眼睛#####
119
+ mask = np.ones(img.shape, dtype=np.uint8) * 255
120
+ mask[20 - cut_pad1:70 - cut_pad1, 55 - cut_pad2:-55 - cut_pad2] = 0
121
+ img = cv2.bitwise_and(img, mask)
122
+
123
+ mask_B = img.copy()
124
+ mask_end = np.random.randint(236, 256)
125
+ ##########之前老的矩形mask#############
126
+ mask_B[mask_B.shape[1] // 2 - np.random.randint(40, 50):mask_end, 30:-30] = 0
127
+ ##########之前老的矩形mask#############
128
+ ##########蔡星宇三角mask#############
129
+ # mask_B = self.add_mouth_mask2(mask_B)
130
+ ##########蔡星宇三角mask#############
131
+ # mask_B[mask_B.shape[1] // 2 - 50:, 30:-30] = 0
132
+ img = Image.fromarray(img)
133
+ mask_B = Image.fromarray(mask_B)
134
+ img = self.transforms_image(img)
135
+ mask_B = self.transforms_image(mask_B)
136
+ # lab = Image.open(lab_path).convert('RGB')
137
+ # lab = self.transforms_label(lab)
138
+ audio = np.zeros((256, 256), dtype=np.float32)
139
+ audio_feature = np.array(audio_feature)
140
+ audio[:audio_feature.shape[0], :audio_feature.shape[1]] = audio_feature
141
+ audio = torch.tensor([audio])
142
+
143
+ imgA_path, _ = random.sample(self.labels, 1)[0]
144
+ imgA = np.array(Image.open(imgA_path).convert('RGB'))
145
+ cut_pad1 = np.random.randint(0, 10)
146
+ cut_pad2 = np.random.randint(0, 10)
147
+ imgA = imgA[cut_pad1:256 + cut_pad1, cut_pad2:256 + cut_pad2]
148
+ imgA = cv2.bitwise_and(imgA, mask)
149
+ imgA = Image.fromarray(imgA)
150
+ imgA = self.transforms_image(imgA)
151
+ return {'A': imgA, 'A_label': audio, 'B': img, 'B_label': audio, 'mask_B': mask_B}
152
+
153
+ def __len__(self):
154
+ """Return the total number of images in the dataset."""
155
+ return len(self.labels)
156
+
157
+
158
+ if __name__ == '__main__':
159
+ from options.train_options import TrainOptions
160
+
161
+ opt = TrainOptions().parse()
162
+ dataset = L2FaceDataset(opt)
163
+ dataset_size = len(dataset)
164
+ print(dataset_size)
165
+ for i, data in enumerate(dataset):
166
+ print(data)
landmark2face_wy/digitalhuman_interface.cpython-38-x86_64-linux-gnu.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1bc38a9e2a82a9022573da1e8326128a98a661a17d61283f3911c5ee3aa504a
3
+ size 5284104
landmark2face_wy/loss/__init__.py ADDED
File without changes
landmark2face_wy/loss/perceptual.cpython-38-x86_64-linux-gnu.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:784d25b825d83d4634932f2628747a4f7f9f6c8bfe84610bc757131810c2e412
3
+ size 2056248
landmark2face_wy/models/DINet.cpython-38-x86_64-linux-gnu.so ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71be733004b4ba60a93dee4971f4e69eefe575dfc99d8e3dffc3ed160d9ba4d4
3
+ size 10580992