Kelton commited on Apr 2, 2025

Commit

3d3198b

verified ·

1 Parent(s): e8238ae

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +82 -0
README.md +73 -0
README_en.md +66 -0
app.py +230 -0
config/config.ini +23 -0
download.sh +32 -0
example/audio.wav +3 -0
example/video.mp4 +3 -0
face_attr_detect/.DS_Store +0 -0
face_attr_detect/__init__.py +1 -0
face_attr_detect/face_attr.cpython-38-x86_64-linux-gnu.so +3 -0
face_attr_detect/face_attr_epoch_12_220318.onnx +3 -0
face_detect_utils/__init__.py +0 -0
face_detect_utils/face_detect.cpython-38-x86_64-linux-gnu.so +3 -0
face_detect_utils/head_pose.cpython-38-x86_64-linux-gnu.so +3 -0
face_detect_utils/resources/model_float32.onnx +3 -0
face_detect_utils/resources/pfpld_robust_sim_bs1_8003.onnx +3 -0
face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx +3 -0
face_detect_utils/scrfd.cpython-38-x86_64-linux-gnu.so +3 -0
face_lib/__init__.py +0 -0
face_lib/face_detect_and_align/__init__.py +3 -0
face_lib/face_detect_and_align/face_align_5_landmarks.cpython-38-x86_64-linux-gnu.so +3 -0
face_lib/face_detect_and_align/face_align_utils.cpython-38-x86_64-linux-gnu.so +3 -0
face_lib/face_detect_and_align/scrfd_insightface/__init__.py +5 -0
face_lib/face_detect_and_align/scrfd_insightface/scrfd.cpython-38-x86_64-linux-gnu.so +3 -0
face_lib/face_parsing/__init__.py +6 -0
face_lib/face_parsing/face_parsing_api.cpython-38-x86_64-linux-gnu.so +3 -0
face_lib/face_restore/__init__.py +2 -0
face_lib/face_restore/gfpgan_onnx/gfpgan_onnx_api.cpython-38-x86_64-linux-gnu.so +3 -0
h_utils/__init__.py +9 -0
h_utils/custom.cpython-38-x86_64-linux-gnu.so +3 -0
h_utils/obs_client.cpython-38-x86_64-linux-gnu.so +3 -0
h_utils/request_utils.cpython-38-x86_64-linux-gnu.so +3 -0
h_utils/sweep_bot.cpython-38-x86_64-linux-gnu.so +3 -0
h_utils/zip_utils.cpython-38-x86_64-linux-gnu.so +3 -0
landmark2face_wy/audio_handler.cpython-38-x86_64-linux-gnu.so +3 -0
landmark2face_wy/checkpoints/anylang/dinet_v1_20240131.pth +3 -0
landmark2face_wy/checkpoints/test/opt.txt +74 -0
landmark2face_wy/data/Facereala3dmm_dataset.cpython-38-x86_64-linux-gnu.so +3 -0
landmark2face_wy/data/Facereala3dmmexp512_dataset.py +212 -0
landmark2face_wy/data/Facereala3dmmexpwenet512_dataset.py +222 -0
landmark2face_wy/data/__init__.py +99 -0
landmark2face_wy/data/base_dataset.cpython-38-x86_64-linux-gnu.so +3 -0
landmark2face_wy/data/image_folder.cpython-38-x86_64-linux-gnu.so +3 -0
landmark2face_wy/data/l2faceaudio512_dataset.py +189 -0
landmark2face_wy/data/l2faceaudio_dataset.py +166 -0
landmark2face_wy/digitalhuman_interface.cpython-38-x86_64-linux-gnu.so +3 -0
landmark2face_wy/loss/__init__.py +0 -0
landmark2face_wy/loss/perceptual.cpython-38-x86_64-linux-gnu.so +3 -0
landmark2face_wy/models/DINet.cpython-38-x86_64-linux-gnu.so +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,85 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+example/audio.wav filter=lfs diff=lfs merge=lfs -text
+example/video.mp4 filter=lfs diff=lfs merge=lfs -text
+face_attr_detect/face_attr.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+face_detect_utils/face_detect.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+face_detect_utils/head_pose.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+face_detect_utils/scrfd.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+face_lib/face_detect_and_align/face_align_5_landmarks.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+face_lib/face_detect_and_align/face_align_utils.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+face_lib/face_detect_and_align/scrfd_insightface/scrfd.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+face_lib/face_parsing/face_parsing_api.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+face_lib/face_restore/gfpgan_onnx/gfpgan_onnx_api.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+h_utils/custom.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+h_utils/obs_client.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+h_utils/request_utils.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+h_utils/sweep_bot.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+h_utils/zip_utils.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/audio_handler.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/data/Facereala3dmm_dataset.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/data/base_dataset.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/data/image_folder.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/digitalhuman_interface.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/loss/perceptual.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/models/DINet.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/models/base_function.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/models/base_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/models/face3d2face_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/models/face_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/models/l2faceaudio_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/models/networks.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/models/networks_HD.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/models/networks_pix2pixHD.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/models/pirender_3dmm_mouth_hd_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/models/pirender_3dmm_mouth_hdv2_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/options/base_options.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/options/test_options.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/options/train_options.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/sync_batchnorm/batchnorm.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/sync_batchnorm/batchnorm_reimpl.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/sync_batchnorm/comm.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/sync_batchnorm/replicate.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/sync_batchnorm/unittest.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/test_3dmm_multi_exp_wenet.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/test_3dmm_multi_exp_wenet0.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/util/flow_util.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/util/get_data.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/util/html.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/util/image_pool.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/util/util.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/util/visualizer.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+model_lib/base_wrapper/onnx_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+preprocess_audio_and_3dmm.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+service/server.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+service/trans_dh_service.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/compute_ctc_att_bnf.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/transformer/asr_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/transformer/attention.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/transformer/cmvn.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/transformer/convolution.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/transformer/ctc.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/transformer/decoder.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/transformer/decoder_layer.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/transformer/embedding.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/transformer/encoder.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/transformer/encoder_layer.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/transformer/label_smoothing_loss.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/transformer/positionwise_feed_forward.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/transformer/subsampling.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/transformer/swish.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/utils/checkpoint.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/utils/common.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/utils/ctc_util.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/utils/executor.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/utils/mask.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/utils/scheduler.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+xseg/dfl_xseg_api.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+y_utils/config.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+y_utils/lcr.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+y_utils/liblcr.so filter=lfs diff=lfs merge=lfs -text
+y_utils/logger.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+y_utils/md5.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+y_utils/time_utils.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+y_utils/tools.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,73 @@

+[![License](https://img.shields.io/badge/License-View%20License-blue.svg)](https://github.com/GuijiAI/HeyGem.ai/blob/main/LICENSE)
+![Python](https://img.shields.io/badge/Python-3.8-blue.svg)
+![Linux](https://img.shields.io/badge/OS-Linux-brightgreen.svg)
+**[中文](#chinese-version)** | **[English](README_en.md)**
+---
+<a name="chinese-version"></a>
+# HeyGem-Linux-Python-Hack
+## 项目简介
+[HeyGem-Linux-Python-Hack] 是一个基于 Python 的数字人项目，它从 [HeyGem.ai](https://github.com/GuijiAI/HeyGem.ai) 中提取出来，它能够直接在 Linux 系统上运行，摆脱了对 Docker 和 Windows 系统的依赖。我们的目标是提供一个更易于部署和使用的数字人解决方案。
+**如果你觉得这个项目对你有帮助，欢迎给我们 Star！**
+**如果运行过程中遇到问题，在查阅已有 Issue 后，在查阅 Google/baidu/ai 后，欢迎提交 Issues！**
+## 主要特性
+* 无需 Docker: 直接在 Linux 系统上运行，简化部署流程。
+* 无需 Windows: 完全基于 Linux 开发和测试。
+* Python 驱动: 使用 Python 语言开发，易于理解和扩展。
+* 开发者友好: 易于使用和扩展。
+* 完全离线。
+## 开始使用
+### 安装
+本项目**支持且仅支持 Linux & python3.8 环境**
+请确保你的 Linux 系统上已经安装了 **Python 3.8**。然后，使用 pip 安装项目依赖项
+同时也提供一个备用的环境 [requirements_0.txt](requirements_0.txt)，遇到问题的话，你可以参考它来建立一个新的环境。
+**具体的 onnxruntime-gpu / torch 等需要结合你的机器上的 cuda 版本去尝试一些组合，否则仍旧可能遇到问题。**
+**请尽量不要询问任何关于 pip 的问题，感谢合作**
+```bash
+# 直接安装整个 requirements.txt 不一定成功，更建议跑代码观察报错信息，然后根据报错信息结合 requirements 去尝试安装，祝你顺利。
+# pip install -r requirements.txt
+```
+### 使用
+把项目克隆到本地
+```bash
+git clone https://github.com/Holasyb918/HeyGem-Linux-Python-Hack
+cd HeyGem-Linux-Python-Hack
+bash download.sh
+```
+#### 开始使用
+* repo 中已提供可以用于 demo 的音视频样例，代码可以直接运行。
+#### command:
+```bash
+python run.py
+```
+* 如果要使用自己的数据，可以外部传入参数，请注意，**path 是本地文件，且仅支持相对路径**.
+#### command:
+```bash
+python run.py --audio_path example/audio.wav --video_path example/video.mp4
+```
+#### gradio:
+```bash
+python app.py
+# 请等待模型初始化完成后提交任务
+```
+## Contributing
+欢迎贡献！
+## License
+参考 heyGem.ai 的协议.

README_en.md ADDED Viewed

	@@ -0,0 +1,66 @@

+[![License](https://img.shields.io/badge/License-View%20License-blue.svg)](https://github.com/GuijiAI/HeyGem.ai/blob/main/LICENSE)
+![Python](https://img.shields.io/badge/Python-3.8-blue.svg)
+![Linux](https://img.shields.io/badge/OS-Linux-brightgreen.svg)
+**[中文](./readme.md)** | **[English](#english-version)**
+---
+<a name="english-version"></a>
+# HeyGem-Linux-Python-Hack
+## Introduction
+[HeyGem-Linux-Python-Hack] is a Python-based digital human project extracted from HeyGem.ai. It is designed to run directly on Linux systems, eliminating the need for Docker and Windows. Our goal is to provide a easier-to-deploy, and user-friendly digital human solution.
+**Feel free to Star us if you find this project useful!**
+**Please submit an Issue if you run into any problems!**
+## Key Features
+* No Docker Required: Runs directly on Linux systems, simplifying the deployment process.
+* No Windows Required: Fully developed and tested on Linux.
+* Python Powered: Developed using the Python language, making it easy to understand and extend.
+* Developer-Friendly: Easy to use, and easy to extend.
+## Getting Started
+### Installation
+Please ensure that **Python 3.8** is installed on your Linux system. Then, you can install the project dependencies using pip:
+```bash
+pip install -r requirements.txt
+```
+### Usage
+Clone this repository to your local machine:
+```bash
+git clone https://github.com/Holasyb918/HeyGem-Linux-Python-Hack
+cd HeyGem-Linux-Python-Hack
+bash download.sh
+```
+#### Getting Started
+* Audio and video examples that can be used for the demo are already provided in the repo, and the code can be run directly.
+#### Command:
+```bash
+python run.py
+```
+* If you want to use your own data, you can pass parameters externally. **Please note that the path is a local file and only supports relative paths.**
+#### command:
+```bash
+python run.py --audio_path example/audio.wav --video_path example/video.mp4
+```
+#### gradio:
+```bash
+python app.py
+# Please wait until processor init done.
+```
+## Contributing
+Contributions are welcome!
+## License
+This project is licensed under the HeyGem.ai License.

app.py ADDED Viewed

	@@ -0,0 +1,230 @@

+import argparse
+import gc
+import json
+import os
+os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
+import subprocess
+import threading
+import time
+import traceback
+import uuid
+from enum import Enum
+import queue
+import shutil
+from functools import partial
+import cv2
+import gradio as gr
+from flask import Flask, request
+import service.trans_dh_service
+from h_utils.custom import CustomError
+from y_utils.config import GlobalConfig
+from y_utils.logger import logger
+def write_video_gradio(
+    output_imgs_queue,
+    temp_dir,
+    result_dir,
+    work_id,
+    audio_path,
+    result_queue,
+    width,
+    height,
+    fps,
+    watermark_switch=0,
+    digital_auth=0,
+    temp_queue=None,
+):
+    output_mp4 = os.path.join(temp_dir, "{}-t.mp4".format(work_id))
+    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+    result_path = os.path.join(result_dir, "{}-r.mp4".format(work_id))
+    video_write = cv2.VideoWriter(output_mp4, fourcc, fps, (width, height))
+    print("Custom VideoWriter init done")
+    try:
+        while True:
+            state, reason, value_ = output_imgs_queue.get()
+            if type(state) == bool and state == True:
+                logger.info(
+                    "Custom VideoWriter [{}]视频帧队列处理已结束".format(work_id)
+                )
+                logger.info(
+                    "Custom VideoWriter Silence Video saved in {}".format(
+                        os.path.realpath(output_mp4)
+                    )
+                )
+                video_write.release()
+                break
+            else:
+                if type(state) == bool and state == False:
+                    logger.error(
+                        "Custom VideoWriter [{}]任务视频帧队列 -> 异常原因:[{}]".format(
+                            work_id, reason
+                        )
+                    )
+                    raise CustomError(reason)
+                for result_img in value_:
+                    video_write.write(result_img)
+        if video_write is not None:
+            video_write.release()
+        if watermark_switch == 1 and digital_auth == 1:
+            logger.info(
+                "Custom VideoWriter [{}]任务需要水印和数字人标识".format(work_id)
+            )
+            if width > height:
+                command = 'ffmpeg -y -i {} -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:(main_h-overlay_h)-10,overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format(
+                    audio_path,
+                    output_mp4,
+                    GlobalConfig.instance().watermark_path,
+                    GlobalConfig.instance().digital_auth_path,
+                    result_path,
+                )
+                logger.info("command:{}".format(command))
+            else:
+                command = 'ffmpeg -y -i {} -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:(main_h-overlay_h)-10,overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format(
+                    audio_path,
+                    output_mp4,
+                    GlobalConfig.instance().watermark_path,
+                    GlobalConfig.instance().digital_auth_path,
+                    result_path,
+                )
+                logger.info("command:{}".format(command))
+        elif watermark_switch == 1 and digital_auth == 0:
+            logger.info("Custom VideoWriter [{}]任务需要水印".format(work_id))
+            command = 'ffmpeg -y -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:(main_h-overlay_h)-10" -c:a aac -crf 15 -strict -2 {}'.format(
+                audio_path,
+                output_mp4,
+                GlobalConfig.instance().watermark_path,
+                result_path,
+            )
+            logger.info("command:{}".format(command))
+        elif watermark_switch == 0 and digital_auth == 1:
+            logger.info("Custom VideoWriter [{}]任务需要数字人标识".format(work_id))
+            if width > height:
+                command = 'ffmpeg -loglevel warning -y -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format(
+                    audio_path,
+                    output_mp4,
+                    GlobalConfig.instance().digital_auth_path,
+                    result_path,
+                )
+                logger.info("command:{}".format(command))
+            else:
+                command = 'ffmpeg -loglevel warning -y -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format(
+                    audio_path,
+                    output_mp4,
+                    GlobalConfig.instance().digital_auth_path,
+                    result_path,
+                )
+                logger.info("command:{}".format(command))
+        else:
+            command = "ffmpeg -loglevel warning -y -i {} -i {} -c:a aac -c:v libx264 -crf 15 -strict -2 {}".format(
+                audio_path, output_mp4, result_path
+            )
+            logger.info("Custom command:{}".format(command))
+        subprocess.call(command, shell=True)
+        print("###### Custom Video Writer write over")
+        print(f"###### Video result saved in {os.path.realpath(result_path)}")
+        result_queue.put([True, result_path])
+        # temp_queue.put([True, result_path])
+    except Exception as e:
+        logger.error(
+            "Custom VideoWriter [{}]视频帧队列处理异常结束，异常原因:[{}]".format(
+                work_id, e.__str__()
+            )
+        )
+        result_queue.put(
+            [
+                False,
+                "[{}]视频帧队列处理异常结束，异常原因:[{}]".format(
+                    work_id, e.__str__()
+                ),
+            ]
+        )
+    logger.info("Custom VideoWriter 后处理进程结束")
+service.trans_dh_service.write_video = write_video_gradio
+class VideoProcessor:
+    def __init__(self):
+        self.task = service.trans_dh_service.TransDhTask()
+        self.basedir = GlobalConfig.instance().result_dir
+        self.is_initialized = False
+        self._initialize_service()
+        print("VideoProcessor init done")
+    def _initialize_service(self):
+        logger.info("开始初始化 trans_dh_service...")
+        try:
+            time.sleep(5)
+            logger.info("trans_dh_service 初始化完成。")
+            self.is_initialized = True
+        except Exception as e:
+            logger.error(f"初始化 trans_dh_service 失败: {e}")
+    def process_video(
+        self, audio_file, video_file, watermark=False, digital_auth=False
+    ):
+        while not self.is_initialized:
+            logger.info("服务尚未完成初始化，等待 1 秒...")
+            time.sleep(1)
+        work_id = str(uuid.uuid1())
+        code = work_id
+        temp_dir = os.path.join(GlobalConfig.instance().temp_dir, work_id)
+        result_dir = GlobalConfig.instance().result_dir
+        video_writer_thread = None
+        final_result = None
+        try:
+            cap = cv2.VideoCapture(video_file)
+            width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+            height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            fps = cap.get(cv2.CAP_PROP_FPS)
+            cap.release()
+            audio_path = audio_file
+            video_path = video_file
+            self.task.task_dic[code] = ""
+            self.task.work(audio_path, video_path, code, 0, 0, 0, 0)
+            result_path = self.task.task_dic[code][2]
+            final_result_dir = os.path.join("result", code)
+            os.makedirs(final_result_dir, exist_ok=True)
+            os.system(f"mv {result_path} {final_result_dir}")
+            os.system(
+                f"rm -rf {os.path.join(os.path.dirname(result_path), code + '*.*')}"
+            )
+            result_path = os.path.realpath(
+                os.path.join(final_result_dir, os.path.basename(result_path))
+            )
+            return result_path
+        except Exception as e:
+            logger.error(f"处理视频时发生错误: {e}")
+            raise gr.Error(str(e))
+if __name__ == "__main__":
+    processor = VideoProcessor()
+    inputs = [
+        gr.File(label="上传音频文件/upload audio file"),
+        gr.File(label="上传视频文件/upload video file"),
+    ]
+    outputs = gr.Video(label="生成的视频/Generated video")
+    title = "数字人视频生成/Digital Human Video Generation"
+    description = "上传音频和视频文件，即可生成数字人视频。/Upload audio and video files to generate digital human videos."
+    demo = gr.Interface(
+        fn=processor.process_video,
+        inputs=inputs,
+        outputs=outputs,
+        title=title,
+        description=description,
+    )
+    demo.queue().launch()

config/config.ini ADDED Viewed

	@@ -0,0 +1,23 @@

+[log]
+log_dir = ./log
+log_file = dh.log
+[http_server]
+server_ip = 0.0.0.0
+server_port = 8383
+[temp]
+temp_dir = ./
+clean_switch = 1
+[result]
+result_dir = ./result
+clean_switch = 0
+[digital]
+batch_size = 4
+[register]
+url = http://172.16.160.51:12120
+report_interval = 10
+enable=0

download.sh ADDED Viewed

	@@ -0,0 +1,32 @@

+set -e
+set -u
+# face attr
+mkdir -p face_attr_detect
+wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/face_attr_epoch_12_220318.onnx -O face_attr_detect/face_attr_epoch_12_220318.onnx
+# face detect
+mkdir -p face_detect_utils/resources
+wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/pfpld_robust_sim_bs1_8003.onnx -O face_detect_utils/resources/pfpld_robust_sim_bs1_8003.onnx
+wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/scrfd_500m_bnkps_shape640x640.onnx -O face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx
+wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/model_float32.onnx -O face_detect_utils/resources/model_float32.onnx
+# dh model
+mkdir -p landmark2face_wy/checkpoints/anylang
+wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/dinet_v1_20240131.pth -O landmark2face_wy/checkpoints/anylang/dinet_v1_20240131.pth
+# face parsing
+mkdir -p pretrain_models/face_lib/face_parsing
+wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/79999_iter.onnx -O pretrain_models/face_lib/face_parsing/79999_iter.onnx
+# gfpgan
+mkdir -p pretrain_models/face_lib/face_restore/gfpgan
+wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/GFPGANv1.4.onnx -O pretrain_models/face_lib/face_restore/gfpgan/GFPGANv1.4.onnx
+# xseg
+mkdir -p xseg
+wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/xseg_211104_4790000.onnx -O xseg/xseg_211104_4790000.onnx
+# wenet
+mkdir -p wenet/examples/aishell/aidata/exp/conformer
+wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/wenetmodel.pt -O wenet/examples/aishell/aidata/exp/conformer/wenetmodel.pt

example/audio.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:886f4e5cd90b79c8575c8bb18c93d41543b2619272f75841dac095a65c8f85dd
+size 192044

example/video.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a11e32bda4b3d15777ed8d481e66859805e71c5168221d0098eac2b31b3f4e7b
+size 7068410

face_attr_detect/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

face_attr_detect/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .face_attr import FaceAttr

face_attr_detect/face_attr.cpython-38-x86_64-linux-gnu.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0aa56c5cbaddc1bc7204823fd2252cf54d056365795737c846f876236a3e5056
+size 441864

face_attr_detect/face_attr_epoch_12_220318.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0fa6604beacd1e560ffc4cae6fa1537591d5f1a765a9f55473a295a1d22da3af
+size 3723167

face_detect_utils/__init__.py ADDED Viewed

File without changes

face_detect_utils/face_detect.cpython-38-x86_64-linux-gnu.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:68b5fd825eebc9421090c1daf3e940833b7bf5712ecee16deef937c87bbe666e
+size 1363368

face_detect_utils/head_pose.cpython-38-x86_64-linux-gnu.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1897346bf14dbbac7328a43598ba0c6d3f4db9ab6628dbebb381d4139aca179e
+size 1158712

face_detect_utils/resources/model_float32.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7e5dc9dd52836b2029a599e74134f1a0f03e416db3e40e932f69609adb0238ad
+size 17315434

face_detect_utils/resources/pfpld_robust_sim_bs1_8003.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8bd9913817152831562cccde7e51ed431d1cf4547d8f21e0876b58a0d82baa55
+size 6889235

face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b467f878e26ff1e7ee05cd9936fdff63fa2a5af5d732ed17ee231f2dd5cc07ae
+size 2524648

face_detect_utils/scrfd.cpython-38-x86_64-linux-gnu.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7fa07e8146248e1b4deaafdfa0cc6c1e1b7a9d641db536aa3ae9b9ee10ab3b01
+size 3178688

face_lib/__init__.py ADDED Viewed

File without changes

face_lib/face_detect_and_align/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from .face_align_5_landmarks import FaceDetect5Landmarks
2	+ from .face_align_utils import estimate_norm
3	+

face_lib/face_detect_and_align/face_align_5_landmarks.cpython-38-x86_64-linux-gnu.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7f0fd0fff60f8fb1fe6cbb1b549d5c43ae9bfaef1e5f4ee4edb27085d3023d22
+size 1321904

face_lib/face_detect_and_align/face_align_utils.cpython-38-x86_64-linux-gnu.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:da29cd727e8bf8f7107c322d5d40ef8596b29e2a858cad779362dbf2516c38a0
+size 1611632

face_lib/face_detect_and_align/scrfd_insightface/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+# -- coding: utf-8 --
+# @Time : 2021/11/10
+from .scrfd import SCRFD

face_lib/face_detect_and_align/scrfd_insightface/scrfd.cpython-38-x86_64-linux-gnu.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a8b9f64f8ef1c198e7d240ba6f1c9e4ff333c48f0259b71e74ca466d5ea274bb
+size 2529880

face_lib/face_parsing/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+# -- coding: utf-8 --
+# @Time : 2022/3/29
+from .face_parsing_api import FaceParsing
+# from .dfl_xseg_net import XsegNet

face_lib/face_parsing/face_parsing_api.cpython-38-x86_64-linux-gnu.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:04931709d9c22f909e7ead81acb06cae05b70162fbcb6d2055ac7315b61834d3
+size 1053792

face_lib/face_restore/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+
2	+ from .gfpgan_onnx.gfpgan_onnx_api import GFPGAN

face_lib/face_restore/gfpgan_onnx/gfpgan_onnx_api.cpython-38-x86_64-linux-gnu.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b2088f84d33b0a442e7dcb04135939e56d918b8edefd1de0b06340cb38573d1e
+size 567104

h_utils/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+#!/user/bin/env python
+# coding=utf-8
+"""
+@project : dhp-service
+@author  : huyi
+@file   : __init__.py.py
+@ide    : PyCharm
+@time   : 2021-08-18 15:45:13
+"""

h_utils/custom.cpython-38-x86_64-linux-gnu.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:37ecc1ed06eb9b804f9de3470dbe6780976514d120bde8fed20d67c4cd26fe0e
+size 259136

h_utils/obs_client.cpython-38-x86_64-linux-gnu.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7d881bc9a1303697bd912c005c71ccc74bab724f1ade6e1156c2d9ca0277e344
+size 982656

h_utils/request_utils.cpython-38-x86_64-linux-gnu.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:215847205ce3a0f416ee3f5d07b4406b88ac0815cdd3e671c0d317c649cf2420
+size 1304616

h_utils/sweep_bot.cpython-38-x86_64-linux-gnu.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2fcfcce02dcbc5f9740329c10ab5fd0bfb157f3e6eb2fa4622adb901ac1feab5
+size 607848

h_utils/zip_utils.cpython-38-x86_64-linux-gnu.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4baf454a9940278b6696977b71a489ccf7c920faf37340b9968dcddfa582c508
+size 594864

landmark2face_wy/audio_handler.cpython-38-x86_64-linux-gnu.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8ced0b512524155f205d2b4c6a46a1a63c2d347387b30550a1fd99ccd53df172
+size 1749648

landmark2face_wy/checkpoints/anylang/dinet_v1_20240131.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9c4568b1f1f2890b4a92edc3f9457af63f908065961233af2125d060cb56fd9e
+size 392392973

landmark2face_wy/checkpoints/test/opt.txt ADDED Viewed

	@@ -0,0 +1,74 @@

+----------------- Options ---------------
+             aspect_ratio: 1.0
+            audio_feature: 3dmm
+               batch_size: 16
+          checkpoints_dir: ./landmark2face_wy/checkpoints
+                crop_size: 256
+                 dataroot: ./data
+             dataset_mode: Facereala3dmm
+                direction: AtoB
+          display_winsize: 256
+              distributed: False
+                    epoch: latest
+                     eval: False
+                 feat_num: 3
+             feature_path: ../AnnI_deep3dface_256_contains_id/
+                     fp16: False
+                  gpu_ids: 0
+                 img_size: 256
+                init_gain: 0.02
+                init_type: normal
+                 input_nc: 3
+            instance_feat: False
+                  isTrain: False                         	[default: None]
+               label_feat: False
+                 lan_size: 1
+            load_features: False
+                load_iter: 0                             	[default: 0]
+                load_size: 286
+               local_rank: -1
+         max_dataset_size: inf
+               mfcc0_rate: 0.2
+                    model: pirender_3dmm_mouth_hd
+               model_path: ./landmark2face_wy/checkpoints/anylang/dinet_v1_20240131.pth
+                 n_blocks: 9
+          n_blocks_global: 9
+           n_blocks_local: 3
+               n_clusters: 10
+           n_downsample_E: 4
+      n_downsample_global: 4
+               n_layers_D: 3
+        n_local_enhancers: 1
+                     name: test
+                      ndf: 64
+                      nef: 16
+                     netD: basic
+                     netG: pirender
+                      ngf: 64
+         niter_fix_global: 0
+               no_dropout: True
+                  no_flip: False
+          no_ganFeat_loss: False
+              no_instance: False
+                     norm: instance
+                    ntest: inf
+                    num_D: 2
+                 num_test: 50
+              num_threads: 4
+                output_nc: 3
+        perceptual_layers: ['relu_1_1', 'relu_2_1', 'relu_3_1', 'relu_4_1', 'relu_5_1']
+       perceptual_network: vgg19
+    perceptual_num_scales: 4
+perceptual_use_style_loss: True
+       perceptual_weights: [4, 4, 4, 4, 4]
+                    phase: test
+               preprocess: resize_and_crop
+              resize_size: 512
+              results_dir: ./results/
+           serial_batches: False
+                   suffix:
+          test_audio_path: None
+               test_muban: None
+                  verbose: False
+weight_style_to_perceptual: 250
+----------------- End -------------------

landmark2face_wy/data/Facereala3dmm_dataset.cpython-38-x86_64-linux-gnu.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bfb97b75c48cdbf3cde9f0d6cb9c2e0665b8a0f8f6870a78480263638f8b2bd9
+size 3479728

landmark2face_wy/data/Facereala3dmmexp512_dataset.py ADDED Viewed

	@@ -0,0 +1,212 @@

+import os.path
+import random
+from data.base_dataset import BaseDataset, get_params, get_transform
+import torchvision.transforms as transforms
+from data.image_folder import make_dataset
+from PIL import Image, ImageEnhance
+import numpy as np
+import cv2
+import torch
+import time
+def get_idts(config_name):
+    idts = list()
+    with open(os.path.join('../config', config_name + '.txt')) as f:
+        for line in f:
+            line = line.strip()
+            video_name = line.split(':')[0]
+            idts.append(video_name)
+    return idts
+def obtain_seq_index(index, num_frames):
+    seq = list(range(index - 13, index + 13 + 1))
+    seq = [min(max(item, 0), num_frames - 1) for item in seq]
+    return seq
+def get_3dmm_feature(img_path, idx, new_dict):
+    id = img_path.split('/')[-3]
+    features = new_dict[id]
+    idx_list = obtain_seq_index(idx, features.shape[0])
+    feature = features[idx_list, 80:144]
+#    feature[:, -1] = 50
+    return np.transpose(feature, (1, 0))
+class Facereala3dmmexp512Dataset(BaseDataset):
+    def __init__(self, opt, mode=None):
+        BaseDataset.__init__(self, opt)
+        img_size = opt.img_size
+        idts = get_idts(opt.name.split('_')[0])
+        print("---------load data list--------: ", idts)
+        self.new_dict = {}
+        if mode == 'train':
+            self.labels = []
+            self.label_starts = []
+            self.label_ends = []
+            count = 0
+            for idt_name in idts:
+                # root = '../AnnVI/feature/{}'.format(idt_name)
+                root = os.path.join(opt.feature_path, idt_name)
+                feature = np.load(os.path.join(root, '%s.npy' % opt.audio_feature))
+                self.new_dict[idt_name] = feature
+                if opt.audio_feature == "3dmm":
+                    training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
+                else:
+                    training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
+                training_data = torch.load(training_data_path)
+                img_paths = training_data['img_paths']
+                features_3dmm = training_data['features_3dmm']
+                index = [i[0].split('/')[-1] for i in img_paths]
+                image_dir = '{}/{}_dlib_crop'.format(root, img_size)
+                self.label_starts.append(count)
+                for img in range(len(index)):
+                    img_path = os.path.join(image_dir, index[img])
+                    # idx_list = obtain_seq_index(img, feature.shape[0])
+                    # self.labels.append([img_path, np.transpose(feature[idx_list, ...], (1, 0))])
+                    self.labels.append([img_path, features_3dmm[img]])
+                    count = count + 1
+                self.label_ends.append(count)
+            self.label_starts = np.array(self.label_starts)
+            self.label_ends = np.array(self.label_ends)
+            self.transforms_image = transforms.Compose([transforms.ToTensor(),
+                                                        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+            self.transforms_label = transforms.Compose([transforms.ToTensor(),
+                                                        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+            self.shuffle()
+        elif mode == 'test':
+            self.labels = []
+            self.label_starts = []
+            self.label_ends = []
+            count = 0
+            for idt_name in idts:
+                # root = '../AnnVI/feature/{}'.format(idt_name)
+                root = os.path.join(opt.feature_path, idt_name)
+                feature = np.load(os.path.join(root, '%s.npy' % opt.audio_feature))
+                self.new_dict[idt_name] = feature
+                if opt.audio_feature == "3dmm":
+                    training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
+                else:
+                    training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
+                training_data = torch.load(training_data_path)
+                img_paths = training_data['img_paths']
+                features_3dmm = training_data['features_3dmm']
+                index = [i[0].split('/')[-1] for i in img_paths]
+                image_dir = '{}/{}_dlib_crop'.format(root, img_size)
+                self.label_starts.append(count)
+                for img in range(len(index)):
+                    img_path = os.path.join(image_dir, index[img])
+                    # idx_list = obtain_seq_index(img, feature.shape[0])
+                    # self.labels.append([img_path, np.transpose(feature[idx_list, ...], (1, 0))])
+                    self.labels.append([img_path, features_3dmm[img]])
+                    count = count + 1
+                self.label_ends.append(count)
+            self.label_starts = np.array(self.label_starts)
+            self.label_ends = np.array(self.label_ends)
+            self.transforms_image = transforms.Compose([transforms.ToTensor(),
+                                                        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+            self.transforms_label = transforms.Compose([transforms.ToTensor(),
+                                                        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+            self.shuffle()
+    def shuffle(self):
+        self.labels_index = list(range(len(self.labels)))
+        random.shuffle(self.labels_index)
+    def add_mouth_mask2(self, img):
+        mask = np.ones_like(img)
+        rect_area = [img.shape[1] // 2 - 60, np.random.randint(226, 246), 30, 256 - 30]
+        mask_rect_area = mask[rect_area[0]: rect_area[1], rect_area[2]:rect_area[3]]
+        x = np.tile(np.arange(rect_area[1] - rect_area[0])[:, np.newaxis], (1, rect_area[3] - rect_area[2]))
+        x = np.flip(x, 0)
+        y = np.tile(np.arange(rect_area[3] - rect_area[2])[:, np.newaxis], (1, rect_area[1] - rect_area[0])).transpose()
+        zz1 = -y - x + 88 > 0
+        zz2 = np.flip(zz1, 1)
+        zz = (zz1 + zz2) > 0
+        mask[rect_area[0]:rect_area[1], rect_area[2]:rect_area[3]] = np.tile(zz[:, :, np.newaxis], (1, 1, 3)) * 1
+        imgm = img * mask
+        return imgm
+    def __getitem__(self, index):
+        # s1= time.time()
+        idx = self.labels_index[index]
+        img_path, feature_3dmm_idx= self.labels[idx]
+       # print(img_path, feature_3dmm_idx)
+        feature_3dmm = get_3dmm_feature(img_path, feature_3dmm_idx, self.new_dict)
+        #print(img_path, feature_3dmm_idx, feature_3dmm.shape)
+        img = np.array(Image.open(img_path).convert('RGB'))
+        img = np.array(np.clip(img + np.random.randint(-20, 20, size=3, dtype='int8'), 0, 255), dtype='uint8')
+        cut_pad1 = np.random.randint(0, 20)
+        cut_pad2 = np.random.randint(0, 20)
+        img = img[cut_pad1:512 + cut_pad1, cut_pad2:512 + cut_pad2]
+        # s2 =time.time()
+        # print('get data and read data ', s2-s1)
+        mask_B = img.copy()
+        # mask_end = np.random.randint(236*2, 250*2)
+        # index = np.random.randint(80, 90)
+        # mask_B[mask_B.shape[1] // 2 - index:mask_end, 30:-30] = 0
+        mask_end = np.random.randint(480, 500)
+        index = np.random.randint(15, 30)
+        mask_B[index:mask_end, 70:-70] = 0
+        img = Image.fromarray(img)
+        mask_B = Image.fromarray(mask_B)
+        img = self.transforms_image(img)
+        mask_B = self.transforms_image(mask_B)
+        x = np.where((idx >= self.label_starts) * (idx < self.label_ends))[0]
+        audio = torch.tensor(feature_3dmm)
+        # s3 = time.time()
+        # print('get 3dmm and mask ', s3 - s2)
+        # 保证real_A_index不是idx
+        max_i = 0
+        real_A_index = random.randint(self.label_starts[x], self.label_ends[x] - 1)
+        while real_A_index == idx:
+            max_i += 1
+            real_A_index = random.randint(self.label_starts[x], self.label_ends[x] - 1)
+            if max_i > 5:
+                break
+        imgA_path, _ = self.labels[real_A_index]
+        imgA = np.array(Image.open(imgA_path).convert('RGB'))
+        cut_pad1 = np.random.randint(0, 20)
+        cut_pad2 = np.random.randint(0, 20)
+        imgA = imgA[cut_pad1:256*2 + cut_pad1, cut_pad2:256*2 + cut_pad2]
+        ########椭圆##########
+        # mask = np.zeros(imgA.shape, dtype=np.uint8)
+        # cv2.ellipse(mask, (imgA.shape[1] // 2, imgA.shape[0] // 2 - 165 - cut_pad1),
+        #             (imgA.shape[1] // 2 + 25, imgA.shape[0]), 0, 0, 360, (255, 255, 255), -1)
+        # ROI = cv2.bitwise_and(imgA, mask)
+        # imgA = Image.fromarray(ROI)
+        #############################
+        # imgA[:imgA.shape[1] // 2 - 40 - index2, :] = 0
+        imgA = Image.fromarray(imgA)
+        imgA = self.transforms_image(imgA)
+        # s4 = time.time()
+        # print('end time reala ', s4 - s3)
+        return {'A': imgA, 'A_label': audio, 'B': img, 'B_label': audio, 'mask_B': mask_B}
+    def __len__(self):
+        """Return the total number of images in the dataset."""
+        return len(self.labels)
+if __name__ == '__main__':
+    from options.train_options import TrainOptions
+    opt = TrainOptions().parse()
+    dataset = Facereala3dmmDataset(opt)
+    dataset_size = len(dataset)
+    print(dataset_size)
+    for i, data in enumerate(dataset):
+        print(data)

landmark2face_wy/data/Facereala3dmmexpwenet512_dataset.py ADDED Viewed

	@@ -0,0 +1,222 @@

+import os.path
+import random
+from data.base_dataset import BaseDataset, get_params, get_transform
+import torchvision.transforms as transforms
+from data.image_folder import make_dataset
+from PIL import Image, ImageEnhance
+import numpy as np
+import cv2
+import torch
+import time
+def get_idts(config_name):
+    idts = list()
+    with open(os.path.join('../config', config_name + '.txt')) as f:
+        for line in f:
+            line = line.strip()
+            video_name = line.split(':')[0]
+            idts.append(video_name)
+    return idts
+def obtain_seq_index(index, num_frames):
+    seq = list(range(index - 10, index + 9 + 1))
+    seq = [min(max(item, 0), num_frames - 1) for item in seq]
+    return seq
+def get_3dmm_feature(img_path, idx, audio_feature, new_dict):
+    id = img_path.split('/')[-3]
+    features, features1, features1 = new_dict[id]
+    idx_list = obtain_seq_index(idx, features.shape[0])
+    feature = features[idx_list, 80:144]
+    feature1 = features1[:,audio_feature[0]:audio_feature[1]]
+    feature = np.concatenate([feature, features[idx_list, -3:], np.transpose(feature1, (1, 0))], 1)
+    # print(feature.shape)
+    return np.transpose(feature, (1, 0))
+    # return feature
+class Facereala3dmmexpwenet512Dataset(BaseDataset):
+    def __init__(self, opt, mode=None):
+        BaseDataset.__init__(self, opt)
+        img_size = opt.img_size
+        idts = get_idts(opt.name.split('_')[0])
+        print("---------load data list--------: ", idts)
+        self.new_dict = {}
+        if mode == 'train':
+            self.labels = []
+            self.label_starts = []
+            self.label_ends = []
+            count = 0
+            for idt_name in idts:
+                # root = '../AnnVI/feature/{}'.format(idt_name)
+                root = os.path.join(opt.feature_path, idt_name)
+                feature = np.load(os.path.join(root, '%s.npy' % opt.audio_feature))
+                feature1 = np.load(os.path.join(root,'audio_wenet_feature.npy'))
+                self.new_dict[idt_name] = [feature, feature1, feature1]
+                if opt.audio_feature == "3dmm":
+                    training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
+                else:
+                    training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
+                training_data = torch.load(training_data_path)
+                img_paths = training_data['img_paths']
+                features_3dmm = training_data['features_3dmm']
+                audio_features = np.load(os.path.join(root, 'audio_data.npy'), allow_pickle=True)
+                audio_features = audio_features.tolist()
+                index = [i[0].split('/')[-1] for i in img_paths]
+                image_dir = '{}/{}_dlib_crop'.format(root, img_size)
+                self.label_starts.append(count)
+                for img in range(len(index)):
+                    img_path = os.path.join(image_dir, index[img])
+                    # idx_list = obtain_seq_index(img, feature.shape[0])
+                    # self.labels.append([img_path, np.transpose(feature[idx_list, ...], (1, 0))])
+                    if type(features_3dmm[img]) != int:
+                        print(img_path)
+                    audio_feature = audio_features[img]
+                    self.labels.append([img_path, features_3dmm[img], audio_feature])
+                    count = count + 1
+                self.label_ends.append(count)
+            self.label_starts = np.array(self.label_starts)
+            self.label_ends = np.array(self.label_ends)
+            self.transforms_image = transforms.Compose([transforms.ToTensor(),
+                                                        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+            self.transforms_label = transforms.Compose([transforms.ToTensor(),
+                                                        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+            self.shuffle()
+        elif mode == 'test':
+            self.labels = []
+            self.label_starts = []
+            self.label_ends = []
+            count = 0
+            for idt_name in idts:
+                # root = '../AnnVI/feature/{}'.format(idt_name)
+                root = os.path.join(opt.feature_path, idt_name)
+                feature = np.load(os.path.join(root, '%s.npy' % opt.audio_feature))
+                self.new_dict[idt_name] = feature
+                if opt.audio_feature == "3dmm":
+                    training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
+                else:
+                    training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
+                training_data = torch.load(training_data_path)
+                img_paths = training_data['img_paths']
+                features_3dmm = training_data['features_3dmm']
+                index = [i[0].split('/')[-1] for i in img_paths]
+                image_dir = '{}/{}_dlib_crop'.format(root, img_size)
+                self.label_starts.append(count)
+                for img in range(len(index)):
+                    img_path = os.path.join(image_dir, index[img])
+                    # idx_list = obtain_seq_index(img, feature.shape[0])
+                    # self.labels.append([img_path, np.transpose(feature[idx_list, ...], (1, 0))])
+                    self.labels.append([img_path, features_3dmm[img]])
+                    count = count + 1
+                self.label_ends.append(count)
+            self.label_starts = np.array(self.label_starts)
+            self.label_ends = np.array(self.label_ends)
+            self.transforms_image = transforms.Compose([transforms.ToTensor(),
+                                                        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+            self.transforms_label = transforms.Compose([transforms.ToTensor(),
+                                                        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+            self.shuffle()
+    def shuffle(self):
+        self.labels_index = list(range(len(self.labels)))
+        random.shuffle(self.labels_index)
+    def add_mouth_mask2(self, img):
+        mask = np.ones_like(img)
+        rect_area = [img.shape[1] // 2 - 60, np.random.randint(226, 246), 30, 256 - 30]
+        mask_rect_area = mask[rect_area[0]: rect_area[1], rect_area[2]:rect_area[3]]
+        x = np.tile(np.arange(rect_area[1] - rect_area[0])[:, np.newaxis], (1, rect_area[3] - rect_area[2]))
+        x = np.flip(x, 0)
+        y = np.tile(np.arange(rect_area[3] - rect_area[2])[:, np.newaxis], (1, rect_area[1] - rect_area[0])).transpose()
+        zz1 = -y - x + 88 > 0
+        zz2 = np.flip(zz1, 1)
+        zz = (zz1 + zz2) > 0
+        mask[rect_area[0]:rect_area[1], rect_area[2]:rect_area[3]] = np.tile(zz[:, :, np.newaxis], (1, 1, 3)) * 1
+        imgm = img * mask
+        return imgm
+    def __getitem__(self, index):
+        # s1= time.time()
+        idx = self.labels_index[index]
+        img_path, feature_3dmm_idx, audio_feature= self.labels[idx]
+       # print(img_path, feature_3dmm_idx)
+        feature_3dmm = get_3dmm_feature(img_path, feature_3dmm_idx, audio_feature, self.new_dict)
+        #print(img_path, feature_3dmm_idx, feature_3dmm.shape)
+        img = np.array(Image.open(img_path).convert('RGB'))
+        img = np.array(np.clip(img + np.random.randint(-20, 20, size=3, dtype='int8'), 0, 255), dtype='uint8')
+        cut_pad1 = np.random.randint(0, 20)
+        cut_pad2 = np.random.randint(0, 20)
+        img = img[cut_pad1:512 + cut_pad1, cut_pad2:512 + cut_pad2]
+        # s2 =time.time()
+        # print('get data and read data ', s2-s1)
+        mask_B = img.copy()
+        # mask_end = np.random.randint(236*2, 250*2)
+        # index = np.random.randint(80, 90)
+        # mask_B[mask_B.shape[1] // 2 - index:mask_end, 30:-30] = 0
+        mask_end = np.random.randint(480, 500)
+        index = np.random.randint(15, 30)
+        # index = np.random.randint(90, 100)
+        mask_B[index:mask_end, 70:-70] = 0
+        img = Image.fromarray(img)
+        mask_B = Image.fromarray(mask_B)
+        img = self.transforms_image(img)
+        mask_B = self.transforms_image(mask_B)
+        x = np.where((idx >= self.label_starts) * (idx < self.label_ends))[0]
+        audio = torch.tensor(feature_3dmm)
+        # s3 = time.time()
+        # print('get 3dmm and mask ', s3 - s2)
+        # 保证real_A_index不是idx
+        max_i = 0
+        real_A_index = random.randint(self.label_starts[x], self.label_ends[x] - 1)
+        while real_A_index == idx:
+            max_i += 1
+            real_A_index = random.randint(self.label_starts[x], self.label_ends[x] - 1)
+            if max_i > 5:
+                break
+        imgA_path, _, _ = self.labels[real_A_index]
+        imgA = np.array(Image.open(imgA_path).convert('RGB'))
+        cut_pad1 = np.random.randint(0, 20)
+        cut_pad2 = np.random.randint(0, 20)
+        imgA = imgA[cut_pad1:256*2 + cut_pad1, cut_pad2:256*2 + cut_pad2]
+        ########椭圆##########
+        # mask = np.zeros(imgA.shape, dtype=np.uint8)
+        # cv2.ellipse(mask, (imgA.shape[1] // 2, imgA.shape[0] // 2 - 165 - cut_pad1),
+        #             (imgA.shape[1] // 2 + 25, imgA.shape[0]), 0, 0, 360, (255, 255, 255), -1)
+        # ROI = cv2.bitwise_and(imgA, mask)
+        # imgA = Image.fromarray(ROI)
+        #############################
+        # imgA[:imgA.shape[1] // 2 - 40 - index2, :] = 0
+        imgA = Image.fromarray(imgA)
+        imgA = self.transforms_image(imgA)
+        # s4 = time.time()
+        # print('end time reala ', s4 - s3)
+        return {'A': imgA, 'A_label': audio, 'B': img, 'B_label': audio, 'mask_B': mask_B}
+    def __len__(self):
+        """Return the total number of images in the dataset."""
+        return len(self.labels)
+if __name__ == '__main__':
+    from options.train_options import TrainOptions
+    opt = TrainOptions().parse()
+    dataset = Facereala3dmmDataset(opt)
+    dataset_size = len(dataset)
+    print(dataset_size)
+    for i, data in enumerate(dataset):
+        print(data)

landmark2face_wy/data/__init__.py ADDED Viewed

	@@ -0,0 +1,99 @@

+"""This package includes all the modules related to data loading and preprocessing
+ To add a custom dataset class called 'dummy', you need to add a file called 'dummy_dataset.py' and define a subclass 'DummyDataset' inherited from BaseDataset.
+ You need to implement four functions:
+    -- <__init__>:                      initialize the class, first call BaseDataset.__init__(self, opt).
+    -- <__len__>:                       return the size of dataset.
+    -- <__getitem__>:                   get a data point from data loader.
+    -- <modify_commandline_options>:    (optionally) add dataset-specific options and set default options.
+Now you can use the dataset class by specifying flag '--dataset_mode dummy'.
+See our template dataset class 'template_dataset.py' for more details.
+"""
+import importlib
+import torch.utils.data
+from landmark2face_wy.data.base_dataset import BaseDataset
+def find_dataset_using_name(dataset_name):
+    """Import the module "data/[dataset_name]_dataset.py".
+    In the file, the class called DatasetNameDataset() will
+    be instantiated. It has to be a subclass of BaseDataset,
+    and it is case-insensitive.
+    """
+    dataset_filename = "landmark2face_wy.data." + dataset_name + "_dataset"
+    datasetlib = importlib.import_module(dataset_filename)
+    dataset = None
+    target_dataset_name = dataset_name.replace('_', '') + 'dataset'
+    for name, cls in datasetlib.__dict__.items():
+        if name.lower() == target_dataset_name.lower() \
+           and issubclass(cls, BaseDataset):
+            dataset = cls
+    if dataset is None:
+        raise NotImplementedError("In %s.py, there should be a subclass of BaseDataset with class name that matches %s in lowercase." % (dataset_filename, target_dataset_name))
+    return dataset
+def get_option_setter(dataset_name):
+    """Return the static method <modify_commandline_options> of the dataset class."""
+    dataset_class = find_dataset_using_name(dataset_name)
+    return dataset_class.modify_commandline_options
+def create_dataset(opt, mode='train'):
+    """Create a dataset given the option.
+    This function wraps the class CustomDatasetDataLoader.
+        This is the main interface between this package and 'train.py'/'test.py'
+    Example:
+        >>> from data import create_dataset
+        >>> dataset = create_dataset(opt)
+    """
+    data_loader = CustomDatasetDataLoader(opt, mode)
+    dataset = data_loader.load_data()
+    return dataset
+class CustomDatasetDataLoader():
+    """Wrapper class of Dataset class that performs multi-threaded data loading"""
+    def __init__(self, opt, mode):
+        """Initialize this class
+        Step 1: create a dataset instance given the name [dataset_mode]
+        Step 2: create a multi-threaded data loader.
+        """
+        self.opt = opt
+        dataset_class = find_dataset_using_name(opt.dataset_mode)
+        self.dataset = dataset_class(opt, mode)
+        print("dataset [%s] was created" % type(self.dataset).__name__)
+        if mode == 'test':
+            batchsize = opt.batch_size // 2
+        else:
+            batchsize = opt.batch_size
+        print(opt.batch_size)
+        if not opt.distributed:
+            self.dataloader = torch.utils.data.DataLoader(self.dataset,batch_size=batchsize,
+                    shuffle=not opt.serial_batches,num_workers=int(opt.num_threads))
+        else:
+            self.train_sampler = torch.utils.data.distributed.DistributedSampler(self.dataset) ### 数据切分
+            self.dataloader = torch.utils.data.DataLoader(self.dataset, batch_size=batchsize, sampler=self.train_sampler, num_workers=int(opt.num_threads), pin_memory=True)
+    def load_data(self):
+        return self
+    def __len__(self):
+        """Return the number of data in the dataset"""
+        return min(len(self.dataset), self.opt.max_dataset_size)
+    def __iter__(self):
+        """Return a batch of data"""
+        for i, data in enumerate(self.dataloader):
+            if i * self.opt.batch_size >= self.opt.max_dataset_size:
+                break
+            yield data

landmark2face_wy/data/base_dataset.cpython-38-x86_64-linux-gnu.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b9f9eca68615a251926ce113af4594a8dd1f50644c66be50ff5ab27020569c89
+size 1093920

landmark2face_wy/data/image_folder.cpython-38-x86_64-linux-gnu.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:180bb0b0dc195aa073049a4c7630e071577f5607bbb3bd2c8247468ec84c7f6c
+size 860856

landmark2face_wy/data/l2faceaudio512_dataset.py ADDED Viewed

	@@ -0,0 +1,189 @@

+import os.path
+import random
+from data.base_dataset import BaseDataset, get_params, get_transform
+import torchvision.transforms as transforms
+from data.image_folder import make_dataset
+from PIL import Image, ImageEnhance
+import numpy as np
+import cv2
+import torch
+def get_idts(config_name):
+    idts = list()
+    with open(os.path.join('../config', config_name + '.txt')) as f:
+        for line in f:
+            line = line.strip()
+            idts.append(line)
+    return idts
+class L2FaceAudio512Dataset(BaseDataset):
+    def __init__(self, opt, mode=None):
+        BaseDataset.__init__(self, opt)
+        img_size = opt.img_size
+        idts = get_idts(opt.name.split('_')[0])
+        print("---------load data list--------: ", idts)
+        if mode == 'train':
+            self.labels = []
+            for idt_name in idts:
+                # root = '../AnnVI/feature/{}'.format(idt_name)
+                root = os.path.join(opt.feature_path, idt_name)
+                if opt.audio_feature == "mfcc":
+                    training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
+                else:
+                    training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
+                training_data = torch.load(training_data_path)
+                img_paths = training_data['img_paths']
+                audio_features = training_data['audio_features']
+                index = [i[0].split('/')[-1] for i in img_paths]
+                image_dir = '{}/{}_dlib_crop'.format(root, img_size)
+                # label_dir = '{}/512_landmark_crop'.format(root)
+                # if 'man' in opt.name:
+                #     imgs.sort(key=lambda x:int(x.split('.')[0]))
+                # else:
+                #     imgs.sort(key=lambda x: (int(x.split('.')[0].split('-')[0]), int(x.split('.')[0].split('-')[1])))
+                for img in range(len(index)):
+                    img_path = os.path.join(image_dir, index[img])
+                    audio_feature = audio_features[img]
+                    self.labels.append([img_path, audio_feature])
+            # transforms.Resize([img_size, img_size], Image.BICUBIC),
+            self.transforms_image = transforms.Compose([transforms.ToTensor(),
+                                                        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+            # transforms.Resize([img_size, img_size], Image.BICUBIC),
+            self.transforms_label = transforms.Compose([transforms.ToTensor(),
+                                                        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+            self.shuffle()
+        elif mode == 'test':
+            self.labels = []
+            for idt_name in idts:
+                # root = '../AnnVI/feature/{}'.format(idt_name)
+                root = os.path.join(opt.feature_path, idt_name)
+                if opt.audio_feature == "mfcc":
+                    training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
+                else:
+                    training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
+                training_data = torch.load(training_data_path)
+                img_paths = training_data['img_paths']
+                audio_features = training_data['audio_features']
+                index = [i[0].split('/')[-1] for i in img_paths]
+                image_dir = '{}/{}_dlib_crop'.format(root, img_size)
+                # label_dir = '{}/512_landmark_crop'.format(root)
+                # if 'man' in opt.name:
+                #     imgs.sort(key=lambda x:int(x.split('.')[0]))
+                # else:
+                #     imgs.sort(key=lambda x: (int(x.split('.')[0].split('-')[0]), int(x.split('.')[0].split('-')[1])))
+                for img in range(len(index)):
+                    img_path = os.path.join(image_dir, index[img])
+                    audio_feature = audio_features[img]
+                    self.labels.append([img_path, audio_feature])
+                # transforms.Resize([img_size, img_size], Image.BICUBIC),
+            self.transforms_image = transforms.Compose([transforms.ToTensor(),
+                                                        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+            # transforms.Resize([img_size, img_size], Image.BICUBIC),
+            self.transforms_label = transforms.Compose([transforms.ToTensor(),
+                                                        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+            self.shuffle()
+    def shuffle(self):
+        random.shuffle(self.labels)
+    def add_mouth_mask2(self, img):
+        mask = np.ones_like(img)
+        rect_area = [img.shape[1] // 2 - np.random.randint(50, 60), np.random.randint(226, 246), 30, 256 - 30]
+        mask_rect_area = mask[rect_area[0]: rect_area[1], rect_area[2]:rect_area[3]]
+        x = np.tile(np.arange(rect_area[1] - rect_area[0])[:, np.newaxis], (1, rect_area[3] - rect_area[2]))
+        x = np.flip(x, 0)
+        y = np.tile(np.arange(rect_area[3] - rect_area[2])[:, np.newaxis], (1, rect_area[1] - rect_area[0])).transpose()
+        zz1 = -y - x + 88 > 0
+        zz2 = np.flip(zz1, 1)
+        zz = (zz1 + zz2) > 0
+        mask[rect_area[0]:rect_area[1], rect_area[2]:rect_area[3]] = np.tile(zz[:, :, np.newaxis], (1, 1, 3)) * 1
+        imgm = img * mask
+        return imgm
+    def __getitem__(self, index):
+        cv2.setNumThreads(0)
+        img_path, audio_feature = self.labels[index]
+        img = np.array(Image.open(img_path).convert('RGB'))
+        img = np.array(np.clip(img + np.random.randint(-20, 20, size=3, dtype='int8'), 0, 255), dtype='uint8')
+        cut_pad1 = np.random.randint(0, 20)
+        cut_pad2 = np.random.randint(0, 20)
+        img = img[cut_pad1:512 + cut_pad1, cut_pad2:512 + cut_pad2]
+        ####椭圆mask遮住衣领#####
+        '''
+        mask = np.zeros(img.shape, dtype=np.uint8)
+        cv2.ellipse(mask, (img.shape[1] // 2, img.shape[0] // 2 - 160 - cut_pad1), (img.shape[1] // 2 + 10, img.shape[0]), 0, 0, 360, (255, 255, 255), -1)
+        '''
+        ####mask遮眼睛#####
+        mask = np.ones(img.shape, dtype=np.uint8) * 255
+        mask[40 - cut_pad1:140 - cut_pad1, 110 - cut_pad2:-110 - cut_pad2] = 0
+        img = cv2.bitwise_and(img, mask)
+        mask_B = img.copy()
+        mask_B = cv2.resize(mask_B, (256, 256))
+        ##########脖子分割加mask#############
+        # img_edge = cv2.imread(img_path.replace("dlib_crop", "dlib_crop_neck"))
+        # img_edge = img_edge[cut_pad1:256 + cut_pad1, cut_pad2:256 + cut_pad2]
+        # mask_B = cv2.bitwise_and(img, 255 - img_edge)
+        # img_edge[:128, :, :] = img[:128, :, :]
+        ##########增加脖子椭圆mask#############
+        '''
+        maske = np.zeros(img.shape, dtype=np.uint8)
+        cv2.ellipse(maske, (img.shape[1] // 2, img.shape[0] // 2 + 50),
+                    (img.shape[1] // 4 + np.random.randint(-5, 5), img.shape[0] // 3 + np.random.randint(-10, 10)),
+                    0, 0, 360, (255, 255, 255), -1)
+        maske[:img.shape[0] // 2, :, :] = 0
+        mask_B = cv2.bitwise_and(mask_B, 255-maske)
+        '''
+        ##########之前老的矩形mask#############
+        mask_end = np.random.randint(236, 256)
+        mask_B[mask_B.shape[1] // 2 - np.random.randint(40, 50):mask_end, 30:-30] = 0
+        ##########之前老的矩形mask#############
+        ##########蔡星宇三角mask#############
+        # mask_B = self.add_mouth_mask2(mask_B)
+        ##########蔡星宇三角mask#############
+        # mask_B[mask_B.shape[1] // 2 - 50:, 30:-30] = 0
+        img = Image.fromarray(img)
+        mask_B = Image.fromarray(mask_B)
+        img = self.transforms_image(img)
+        mask_B = self.transforms_image(mask_B)
+        # lab = Image.open(lab_path).convert('RGB')
+        # lab = self.transforms_label(lab)
+        audio = np.zeros((256, 256), dtype=np.float32)
+        audio_feature = np.array(audio_feature)
+        audio[:audio_feature.shape[0], :audio_feature.shape[1]] = audio_feature
+        audio = torch.tensor([audio])
+        imgA_path, _ = random.sample(self.labels, 1)[0]
+        imgA = np.array(Image.open(imgA_path).convert('RGB'))
+        cut_pad1 = np.random.randint(0, 20)
+        cut_pad2 = np.random.randint(0, 20)
+        imgA = imgA[cut_pad1:512 + cut_pad1, cut_pad2:512 + cut_pad2]
+        # mask = np.ones(imgA.shape, dtype=np.uint8) * 255
+        # mask[40 - cut_pad1:140 - cut_pad1, 110 - cut_pad2:-110 - cut_pad2] = 0
+        imgA = cv2.bitwise_and(imgA, mask)
+        imgA = Image.fromarray(imgA)
+        imgA = self.transforms_image(imgA)
+        return {'A': imgA, 'A_label': audio, 'B': img, 'B_label': audio, 'mask_B': mask_B}
+    def __len__(self):
+        """Return the total number of images in the dataset."""
+        return len(self.labels)
+if __name__ == '__main__':
+    from options.train_options import TrainOptions
+    opt = TrainOptions().parse()
+    dataset = L2FaceDataset(opt)
+    dataset_size = len(dataset)
+    print(dataset_size)
+    for i, data in enumerate(dataset):
+        print(data)

landmark2face_wy/data/l2faceaudio_dataset.py ADDED Viewed

	@@ -0,0 +1,166 @@

+import os.path
+import random
+from data.base_dataset import BaseDataset, get_params, get_transform
+import torchvision.transforms as transforms
+from data.image_folder import make_dataset
+from PIL import Image, ImageEnhance
+import numpy as np
+import cv2
+import torch
+def get_idts(config_name):
+    idts = list()
+    with open(os.path.join('../config', config_name + '.txt')) as f:
+        for line in f:
+            line = line.strip()
+            idts.append(line)
+    return idts
+class L2FaceAudioDataset(BaseDataset):
+    def __init__(self, opt, mode=None):
+        BaseDataset.__init__(self, opt)
+        img_size = opt.img_size
+        idts = get_idts(opt.name.split('_')[0])
+        print("---------load data list--------: ", idts)
+        if mode == 'train':
+            self.labels = []
+            for idt_name in idts:
+                # root = '../AnnVI/feature/{}'.format(idt_name)
+                root = os.path.join(opt.feature_path, idt_name)
+                if opt.audio_feature == "mfcc":
+                    training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
+                else:
+                    training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
+                training_data = torch.load(training_data_path)
+                img_paths = training_data['img_paths']
+                audio_features = training_data['audio_features']
+                index = [i[0].split('/')[-1] for i in img_paths]
+                image_dir = '{}/{}_dlib_crop'.format(root, img_size)
+                # label_dir = '{}/512_landmark_crop'.format(root)
+                # if 'man' in opt.name:
+                #     imgs.sort(key=lambda x:int(x.split('.')[0]))
+                # else:
+                #     imgs.sort(key=lambda x: (int(x.split('.')[0].split('-')[0]), int(x.split('.')[0].split('-')[1])))
+                for img in range(len(index)):
+                    img_path = os.path.join(image_dir, index[img])
+                    audio_feature = audio_features[img]
+                    self.labels.append([img_path, audio_feature])
+            # transforms.Resize([img_size, img_size], Image.BICUBIC),
+            self.transforms_image = transforms.Compose([transforms.ToTensor(),
+                                                        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+            # transforms.Resize([img_size, img_size], Image.BICUBIC),
+            self.transforms_label = transforms.Compose([transforms.ToTensor(),
+                                                        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+            self.shuffle()
+        elif mode == 'test':
+            self.labels = []
+            for idt_name in idts:
+                # root = '../AnnVI/feature/{}'.format(idt_name)
+                root = os.path.join(opt.feature_path, idt_name)
+                if opt.audio_feature == "mfcc":
+                    training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
+                else:
+                    training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
+                training_data = torch.load(training_data_path)
+                img_paths = training_data['img_paths']
+                audio_features = training_data['audio_features']
+                index = [i[0].split('/')[-1] for i in img_paths]
+                image_dir = '{}/{}_dlib_crop'.format(root, img_size)
+                # label_dir = '{}/512_landmark_crop'.format(root)
+                # if 'man' in opt.name:
+                #     imgs.sort(key=lambda x:int(x.split('.')[0]))
+                # else:
+                #     imgs.sort(key=lambda x: (int(x.split('.')[0].split('-')[0]), int(x.split('.')[0].split('-')[1])))
+                for img in range(len(index)):
+                    img_path = os.path.join(image_dir, index[img])
+                    audio_feature = audio_features[img]
+                    self.labels.append([img_path, audio_feature])
+                # transforms.Resize([img_size, img_size], Image.BICUBIC),
+            self.transforms_image = transforms.Compose([transforms.ToTensor(),
+                                                        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+            # transforms.Resize([img_size, img_size], Image.BICUBIC),
+            self.transforms_label = transforms.Compose([transforms.ToTensor(),
+                                                        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+            self.shuffle()
+    def shuffle(self):
+        random.shuffle(self.labels)
+    def add_mouth_mask2(self, img):
+        mask = np.ones_like(img)
+        rect_area = [img.shape[1] // 2 - 60, np.random.randint(226, 246), 30, 256 - 30]
+        mask_rect_area = mask[rect_area[0]: rect_area[1], rect_area[2]:rect_area[3]]
+        x = np.tile(np.arange(rect_area[1] - rect_area[0])[:, np.newaxis], (1, rect_area[3] - rect_area[2]))
+        x = np.flip(x, 0)
+        y = np.tile(np.arange(rect_area[3] - rect_area[2])[:, np.newaxis], (1, rect_area[1] - rect_area[0])).transpose()
+        zz1 = -y - x + 88 > 0
+        zz2 = np.flip(zz1, 1)
+        zz = (zz1 + zz2) > 0
+        mask[rect_area[0]:rect_area[1], rect_area[2]:rect_area[3]] = np.tile(zz[:, :, np.newaxis], (1, 1, 3)) * 1
+        imgm = img * mask
+        return imgm
+    def __getitem__(self, index):
+        cv2.setNumThreads(0)
+        img_path, audio_feature = self.labels[index]
+        img = np.array(Image.open(img_path).convert('RGB'))
+        img = np.array(np.clip(img + np.random.randint(-20, 20, size=3, dtype='int8'), 0, 255), dtype='uint8')
+        cut_pad1 = np.random.randint(0, 10)
+        cut_pad2 = np.random.randint(0, 10)
+        img = img[cut_pad1:256 + cut_pad1, cut_pad2:256 + cut_pad2]
+        ####mask遮眼睛#####
+        mask = np.ones(img.shape, dtype=np.uint8) * 255
+        mask[20 - cut_pad1:70 - cut_pad1, 55 - cut_pad2:-55 - cut_pad2] = 0
+        img = cv2.bitwise_and(img, mask)
+        mask_B = img.copy()
+        mask_end = np.random.randint(236, 256)
+        ##########之前老的矩形mask#############
+        mask_B[mask_B.shape[1] // 2 - np.random.randint(40, 50):mask_end, 30:-30] = 0
+        ##########之前老的矩形mask#############
+        ##########蔡星宇三角mask#############
+        # mask_B = self.add_mouth_mask2(mask_B)
+        ##########蔡星宇三角mask#############
+        # mask_B[mask_B.shape[1] // 2 - 50:, 30:-30] = 0
+        img = Image.fromarray(img)
+        mask_B = Image.fromarray(mask_B)
+        img = self.transforms_image(img)
+        mask_B = self.transforms_image(mask_B)
+        # lab = Image.open(lab_path).convert('RGB')
+        # lab = self.transforms_label(lab)
+        audio = np.zeros((256, 256), dtype=np.float32)
+        audio_feature = np.array(audio_feature)
+        audio[:audio_feature.shape[0], :audio_feature.shape[1]] = audio_feature
+        audio = torch.tensor([audio])
+        imgA_path, _ = random.sample(self.labels, 1)[0]
+        imgA = np.array(Image.open(imgA_path).convert('RGB'))
+        cut_pad1 = np.random.randint(0, 10)
+        cut_pad2 = np.random.randint(0, 10)
+        imgA = imgA[cut_pad1:256 + cut_pad1, cut_pad2:256 + cut_pad2]
+        imgA = cv2.bitwise_and(imgA, mask)
+        imgA = Image.fromarray(imgA)
+        imgA = self.transforms_image(imgA)
+        return {'A': imgA, 'A_label': audio, 'B': img, 'B_label': audio, 'mask_B': mask_B}
+    def __len__(self):
+        """Return the total number of images in the dataset."""
+        return len(self.labels)
+if __name__ == '__main__':
+    from options.train_options import TrainOptions
+    opt = TrainOptions().parse()
+    dataset = L2FaceDataset(opt)
+    dataset_size = len(dataset)
+    print(dataset_size)
+    for i, data in enumerate(dataset):
+        print(data)

landmark2face_wy/digitalhuman_interface.cpython-38-x86_64-linux-gnu.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f1bc38a9e2a82a9022573da1e8326128a98a661a17d61283f3911c5ee3aa504a
+size 5284104

landmark2face_wy/loss/__init__.py ADDED Viewed

File without changes

landmark2face_wy/loss/perceptual.cpython-38-x86_64-linux-gnu.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:784d25b825d83d4634932f2628747a4f7f9f6c8bfe84610bc757131810c2e412
+size 2056248

landmark2face_wy/models/DINet.cpython-38-x86_64-linux-gnu.so ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:71be733004b4ba60a93dee4971f4e69eefe575dfc99d8e3dffc3ed160d9ba4d4
+size 10580992