diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..826c3f13661f416cf095dcb91b1f0e92c42b7d02 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,85 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
+example/audio.wav filter=lfs diff=lfs merge=lfs -text
+example/video.mp4 filter=lfs diff=lfs merge=lfs -text
+face_attr_detect/face_attr.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+face_detect_utils/face_detect.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+face_detect_utils/head_pose.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+face_detect_utils/scrfd.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+face_lib/face_detect_and_align/face_align_5_landmarks.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+face_lib/face_detect_and_align/face_align_utils.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+face_lib/face_detect_and_align/scrfd_insightface/scrfd.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+face_lib/face_parsing/face_parsing_api.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+face_lib/face_restore/gfpgan_onnx/gfpgan_onnx_api.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+h_utils/custom.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+h_utils/obs_client.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+h_utils/request_utils.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+h_utils/sweep_bot.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+h_utils/zip_utils.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/audio_handler.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/data/Facereala3dmm_dataset.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/data/base_dataset.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/data/image_folder.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/digitalhuman_interface.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/loss/perceptual.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/models/DINet.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/models/base_function.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/models/base_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/models/face3d2face_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/models/face_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/models/l2faceaudio_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/models/networks.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/models/networks_HD.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/models/networks_pix2pixHD.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/models/pirender_3dmm_mouth_hd_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/models/pirender_3dmm_mouth_hdv2_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/options/base_options.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/options/test_options.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/options/train_options.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/sync_batchnorm/batchnorm.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/sync_batchnorm/batchnorm_reimpl.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/sync_batchnorm/comm.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/sync_batchnorm/replicate.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/sync_batchnorm/unittest.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/test_3dmm_multi_exp_wenet.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/test_3dmm_multi_exp_wenet0.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/util/flow_util.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/util/get_data.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/util/html.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/util/image_pool.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/util/util.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+landmark2face_wy/util/visualizer.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+model_lib/base_wrapper/onnx_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+preprocess_audio_and_3dmm.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+service/server.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+service/trans_dh_service.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/compute_ctc_att_bnf.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/transformer/asr_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/transformer/attention.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/transformer/cmvn.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/transformer/convolution.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/transformer/ctc.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/transformer/decoder.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/transformer/decoder_layer.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/transformer/embedding.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/transformer/encoder.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/transformer/encoder_layer.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/transformer/label_smoothing_loss.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/transformer/positionwise_feed_forward.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/transformer/subsampling.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/transformer/swish.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/utils/checkpoint.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/utils/common.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/utils/ctc_util.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/utils/executor.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/utils/mask.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+wenet/utils/scheduler.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+xseg/dfl_xseg_api.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+y_utils/config.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+y_utils/lcr.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+y_utils/liblcr.so filter=lfs diff=lfs merge=lfs -text
+y_utils/logger.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+y_utils/md5.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+y_utils/time_utils.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+y_utils/tools.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..f4235a4031d64a8a09c6ad961dbddab385412534
--- /dev/null
+++ b/README.md
@@ -0,0 +1,73 @@
+
+[](https://github.com/GuijiAI/HeyGem.ai/blob/main/LICENSE)
+
+
+
+**[中文](#chinese-version)** | **[English](README_en.md)**
+
+---
+
+
+
+# HeyGem-Linux-Python-Hack
+
+## 项目简介
+
+[HeyGem-Linux-Python-Hack] 是一个基于 Python 的数字人项目,它从 [HeyGem.ai](https://github.com/GuijiAI/HeyGem.ai) 中提取出来,它能够直接在 Linux 系统上运行,摆脱了对 Docker 和 Windows 系统的依赖。我们的目标是提供一个更易于部署和使用的数字人解决方案。
+
+**如果你觉得这个项目对你有帮助,欢迎给我们 Star!**
+**如果运行过程中遇到问题,在查阅已有 Issue 后,在查阅 Google/baidu/ai 后,欢迎提交 Issues!**
+
+## 主要特性
+
+* 无需 Docker: 直接在 Linux 系统上运行,简化部署流程。
+* 无需 Windows: 完全基于 Linux 开发和测试。
+* Python 驱动: 使用 Python 语言开发,易于理解和扩展。
+* 开发者友好: 易于使用和扩展。
+* 完全离线。
+
+## 开始使用
+
+### 安装
+本项目**支持且仅支持 Linux & python3.8 环境**
+请确保你的 Linux 系统上已经安装了 **Python 3.8**。然后,使用 pip 安装项目依赖项
+同时也提供一个备用的环境 [requirements_0.txt](requirements_0.txt),遇到问题的话,你可以参考它来建立一个新的环境。
+**具体的 onnxruntime-gpu / torch 等需要结合你的机器上的 cuda 版本去尝试一些组合,否则仍旧可能遇到问题。**
+**请尽量不要询问任何关于 pip 的问题,感谢合作**
+
+```bash
+# 直接安装整个 requirements.txt 不一定成功,更建议跑代码观察报错信息,然后根据报错信息结合 requirements 去尝试安装,祝你顺利。
+# pip install -r requirements.txt
+```
+
+### 使用
+把项目克隆到本地
+```bash
+git clone https://github.com/Holasyb918/HeyGem-Linux-Python-Hack
+cd HeyGem-Linux-Python-Hack
+bash download.sh
+```
+#### 开始使用
+* repo 中已提供可以用于 demo 的音视频样例,代码可以直接运行。
+#### command:
+```bash
+python run.py
+```
+
+* 如果要使用自己的数据,可以外部传入参数,请注意,**path 是本地文件,且仅支持相对路径**.
+
+#### command:
+```bash
+python run.py --audio_path example/audio.wav --video_path example/video.mp4
+```
+#### gradio:
+```bash
+python app.py
+# 请等待模型初始化完成后提交任务
+```
+
+## Contributing
+欢迎贡献!
+
+## License
+参考 heyGem.ai 的协议.
diff --git a/README_en.md b/README_en.md
new file mode 100644
index 0000000000000000000000000000000000000000..243c4567271ec87e8f82ad844db64c3ece8fb5ca
--- /dev/null
+++ b/README_en.md
@@ -0,0 +1,66 @@
+
+[](https://github.com/GuijiAI/HeyGem.ai/blob/main/LICENSE)
+
+
+
+**[中文](./readme.md)** | **[English](#english-version)**
+
+---
+
+
+
+# HeyGem-Linux-Python-Hack
+
+## Introduction
+
+[HeyGem-Linux-Python-Hack] is a Python-based digital human project extracted from HeyGem.ai. It is designed to run directly on Linux systems, eliminating the need for Docker and Windows. Our goal is to provide a easier-to-deploy, and user-friendly digital human solution.
+
+**Feel free to Star us if you find this project useful!**
+**Please submit an Issue if you run into any problems!**
+
+## Key Features
+
+* No Docker Required: Runs directly on Linux systems, simplifying the deployment process.
+* No Windows Required: Fully developed and tested on Linux.
+* Python Powered: Developed using the Python language, making it easy to understand and extend.
+* Developer-Friendly: Easy to use, and easy to extend.
+
+## Getting Started
+
+### Installation
+
+Please ensure that **Python 3.8** is installed on your Linux system. Then, you can install the project dependencies using pip:
+
+```bash
+pip install -r requirements.txt
+```
+
+### Usage
+Clone this repository to your local machine:
+```bash
+git clone https://github.com/Holasyb918/HeyGem-Linux-Python-Hack
+cd HeyGem-Linux-Python-Hack
+bash download.sh
+```
+#### Getting Started
+* Audio and video examples that can be used for the demo are already provided in the repo, and the code can be run directly.
+#### Command:
+```bash
+python run.py
+```
+* If you want to use your own data, you can pass parameters externally. **Please note that the path is a local file and only supports relative paths.**
+#### command:
+```bash
+python run.py --audio_path example/audio.wav --video_path example/video.mp4
+```
+#### gradio:
+```bash
+python app.py
+# Please wait until processor init done.
+```
+
+## Contributing
+Contributions are welcome!
+
+## License
+This project is licensed under the HeyGem.ai License.
diff --git a/app.py b/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..bb75637941f58c701a4f9e859ed5c84c6e15336a
--- /dev/null
+++ b/app.py
@@ -0,0 +1,230 @@
+import argparse
+import gc
+import json
+import os
+
+os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0"
+import subprocess
+import threading
+import time
+import traceback
+import uuid
+from enum import Enum
+import queue
+import shutil
+from functools import partial
+
+import cv2
+import gradio as gr
+from flask import Flask, request
+
+import service.trans_dh_service
+from h_utils.custom import CustomError
+from y_utils.config import GlobalConfig
+from y_utils.logger import logger
+
+
+def write_video_gradio(
+ output_imgs_queue,
+ temp_dir,
+ result_dir,
+ work_id,
+ audio_path,
+ result_queue,
+ width,
+ height,
+ fps,
+ watermark_switch=0,
+ digital_auth=0,
+ temp_queue=None,
+):
+ output_mp4 = os.path.join(temp_dir, "{}-t.mp4".format(work_id))
+ fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+ result_path = os.path.join(result_dir, "{}-r.mp4".format(work_id))
+ video_write = cv2.VideoWriter(output_mp4, fourcc, fps, (width, height))
+ print("Custom VideoWriter init done")
+ try:
+ while True:
+ state, reason, value_ = output_imgs_queue.get()
+ if type(state) == bool and state == True:
+ logger.info(
+ "Custom VideoWriter [{}]视频帧队列处理已结束".format(work_id)
+ )
+ logger.info(
+ "Custom VideoWriter Silence Video saved in {}".format(
+ os.path.realpath(output_mp4)
+ )
+ )
+ video_write.release()
+ break
+ else:
+ if type(state) == bool and state == False:
+ logger.error(
+ "Custom VideoWriter [{}]任务视频帧队列 -> 异常原因:[{}]".format(
+ work_id, reason
+ )
+ )
+ raise CustomError(reason)
+ for result_img in value_:
+ video_write.write(result_img)
+ if video_write is not None:
+ video_write.release()
+ if watermark_switch == 1 and digital_auth == 1:
+ logger.info(
+ "Custom VideoWriter [{}]任务需要水印和数字人标识".format(work_id)
+ )
+ if width > height:
+ command = 'ffmpeg -y -i {} -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:(main_h-overlay_h)-10,overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format(
+ audio_path,
+ output_mp4,
+ GlobalConfig.instance().watermark_path,
+ GlobalConfig.instance().digital_auth_path,
+ result_path,
+ )
+ logger.info("command:{}".format(command))
+ else:
+ command = 'ffmpeg -y -i {} -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:(main_h-overlay_h)-10,overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format(
+ audio_path,
+ output_mp4,
+ GlobalConfig.instance().watermark_path,
+ GlobalConfig.instance().digital_auth_path,
+ result_path,
+ )
+ logger.info("command:{}".format(command))
+ elif watermark_switch == 1 and digital_auth == 0:
+ logger.info("Custom VideoWriter [{}]任务需要水印".format(work_id))
+ command = 'ffmpeg -y -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:(main_h-overlay_h)-10" -c:a aac -crf 15 -strict -2 {}'.format(
+ audio_path,
+ output_mp4,
+ GlobalConfig.instance().watermark_path,
+ result_path,
+ )
+ logger.info("command:{}".format(command))
+ elif watermark_switch == 0 and digital_auth == 1:
+ logger.info("Custom VideoWriter [{}]任务需要数字人标识".format(work_id))
+ if width > height:
+ command = 'ffmpeg -loglevel warning -y -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format(
+ audio_path,
+ output_mp4,
+ GlobalConfig.instance().digital_auth_path,
+ result_path,
+ )
+ logger.info("command:{}".format(command))
+ else:
+ command = 'ffmpeg -loglevel warning -y -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format(
+ audio_path,
+ output_mp4,
+ GlobalConfig.instance().digital_auth_path,
+ result_path,
+ )
+ logger.info("command:{}".format(command))
+ else:
+ command = "ffmpeg -loglevel warning -y -i {} -i {} -c:a aac -c:v libx264 -crf 15 -strict -2 {}".format(
+ audio_path, output_mp4, result_path
+ )
+ logger.info("Custom command:{}".format(command))
+ subprocess.call(command, shell=True)
+ print("###### Custom Video Writer write over")
+ print(f"###### Video result saved in {os.path.realpath(result_path)}")
+ result_queue.put([True, result_path])
+ # temp_queue.put([True, result_path])
+ except Exception as e:
+ logger.error(
+ "Custom VideoWriter [{}]视频帧队列处理异常结束,异常原因:[{}]".format(
+ work_id, e.__str__()
+ )
+ )
+ result_queue.put(
+ [
+ False,
+ "[{}]视频帧队列处理异常结束,异常原因:[{}]".format(
+ work_id, e.__str__()
+ ),
+ ]
+ )
+ logger.info("Custom VideoWriter 后处理进程结束")
+
+
+service.trans_dh_service.write_video = write_video_gradio
+
+
+class VideoProcessor:
+ def __init__(self):
+ self.task = service.trans_dh_service.TransDhTask()
+ self.basedir = GlobalConfig.instance().result_dir
+ self.is_initialized = False
+ self._initialize_service()
+ print("VideoProcessor init done")
+
+ def _initialize_service(self):
+ logger.info("开始初始化 trans_dh_service...")
+ try:
+ time.sleep(5)
+ logger.info("trans_dh_service 初始化完成。")
+ self.is_initialized = True
+ except Exception as e:
+ logger.error(f"初始化 trans_dh_service 失败: {e}")
+
+ def process_video(
+ self, audio_file, video_file, watermark=False, digital_auth=False
+ ):
+ while not self.is_initialized:
+ logger.info("服务尚未完成初始化,等待 1 秒...")
+ time.sleep(1)
+ work_id = str(uuid.uuid1())
+ code = work_id
+ temp_dir = os.path.join(GlobalConfig.instance().temp_dir, work_id)
+ result_dir = GlobalConfig.instance().result_dir
+ video_writer_thread = None
+ final_result = None
+
+ try:
+ cap = cv2.VideoCapture(video_file)
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+ fps = cap.get(cv2.CAP_PROP_FPS)
+ cap.release()
+
+ audio_path = audio_file
+ video_path = video_file
+
+ self.task.task_dic[code] = ""
+ self.task.work(audio_path, video_path, code, 0, 0, 0, 0)
+
+ result_path = self.task.task_dic[code][2]
+ final_result_dir = os.path.join("result", code)
+ os.makedirs(final_result_dir, exist_ok=True)
+ os.system(f"mv {result_path} {final_result_dir}")
+ os.system(
+ f"rm -rf {os.path.join(os.path.dirname(result_path), code + '*.*')}"
+ )
+ result_path = os.path.realpath(
+ os.path.join(final_result_dir, os.path.basename(result_path))
+ )
+ return result_path
+
+ except Exception as e:
+ logger.error(f"处理视频时发生错误: {e}")
+ raise gr.Error(str(e))
+
+
+if __name__ == "__main__":
+ processor = VideoProcessor()
+
+ inputs = [
+ gr.File(label="上传音频文件/upload audio file"),
+ gr.File(label="上传视频文件/upload video file"),
+ ]
+ outputs = gr.Video(label="生成的视频/Generated video")
+
+ title = "数字人视频生成/Digital Human Video Generation"
+ description = "上传音频和视频文件,即可生成数字人视频。/Upload audio and video files to generate digital human videos."
+
+ demo = gr.Interface(
+ fn=processor.process_video,
+ inputs=inputs,
+ outputs=outputs,
+ title=title,
+ description=description,
+ )
+ demo.queue().launch()
diff --git a/config/config.ini b/config/config.ini
new file mode 100644
index 0000000000000000000000000000000000000000..5ad541391190e67578940d63eb907f7b54488434
--- /dev/null
+++ b/config/config.ini
@@ -0,0 +1,23 @@
+[log]
+log_dir = ./log
+log_file = dh.log
+
+[http_server]
+server_ip = 0.0.0.0
+server_port = 8383
+
+[temp]
+temp_dir = ./
+clean_switch = 1
+
+[result]
+result_dir = ./result
+clean_switch = 0
+
+[digital]
+batch_size = 4
+
+[register]
+url = http://172.16.160.51:12120
+report_interval = 10
+enable=0
diff --git a/download.sh b/download.sh
new file mode 100644
index 0000000000000000000000000000000000000000..2bc2fa5bc45c01a6b3f45bfe4ec058ebdead8429
--- /dev/null
+++ b/download.sh
@@ -0,0 +1,32 @@
+set -e
+set -u
+
+# face attr
+mkdir -p face_attr_detect
+wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/face_attr_epoch_12_220318.onnx -O face_attr_detect/face_attr_epoch_12_220318.onnx
+
+# face detect
+mkdir -p face_detect_utils/resources
+wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/pfpld_robust_sim_bs1_8003.onnx -O face_detect_utils/resources/pfpld_robust_sim_bs1_8003.onnx
+wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/scrfd_500m_bnkps_shape640x640.onnx -O face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx
+wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/model_float32.onnx -O face_detect_utils/resources/model_float32.onnx
+
+# dh model
+mkdir -p landmark2face_wy/checkpoints/anylang
+wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/dinet_v1_20240131.pth -O landmark2face_wy/checkpoints/anylang/dinet_v1_20240131.pth
+
+# face parsing
+mkdir -p pretrain_models/face_lib/face_parsing
+wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/79999_iter.onnx -O pretrain_models/face_lib/face_parsing/79999_iter.onnx
+
+# gfpgan
+mkdir -p pretrain_models/face_lib/face_restore/gfpgan
+wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/GFPGANv1.4.onnx -O pretrain_models/face_lib/face_restore/gfpgan/GFPGANv1.4.onnx
+
+# xseg
+mkdir -p xseg
+wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/xseg_211104_4790000.onnx -O xseg/xseg_211104_4790000.onnx
+
+# wenet
+mkdir -p wenet/examples/aishell/aidata/exp/conformer
+wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/wenetmodel.pt -O wenet/examples/aishell/aidata/exp/conformer/wenetmodel.pt
\ No newline at end of file
diff --git a/example/audio.wav b/example/audio.wav
new file mode 100644
index 0000000000000000000000000000000000000000..96521569dd0506b15ce95d3b9791ce10bf88b052
--- /dev/null
+++ b/example/audio.wav
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:886f4e5cd90b79c8575c8bb18c93d41543b2619272f75841dac095a65c8f85dd
+size 192044
diff --git a/example/video.mp4 b/example/video.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..f02062222f7ac4286eedcde24372117655701db9
--- /dev/null
+++ b/example/video.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a11e32bda4b3d15777ed8d481e66859805e71c5168221d0098eac2b31b3f4e7b
+size 7068410
diff --git a/face_attr_detect/.DS_Store b/face_attr_detect/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..38734ca2de71d90578b12a191d5ff30a57f26d5c
Binary files /dev/null and b/face_attr_detect/.DS_Store differ
diff --git a/face_attr_detect/__init__.py b/face_attr_detect/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..5a7bb5272a2b009694971a124620c7f8cccb4563
--- /dev/null
+++ b/face_attr_detect/__init__.py
@@ -0,0 +1 @@
+from .face_attr import FaceAttr
diff --git a/face_attr_detect/face_attr.cpython-38-x86_64-linux-gnu.so b/face_attr_detect/face_attr.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..ef3c63b0d4794d279a14a5dcd68e69ea4f7549ce
--- /dev/null
+++ b/face_attr_detect/face_attr.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0aa56c5cbaddc1bc7204823fd2252cf54d056365795737c846f876236a3e5056
+size 441864
diff --git a/face_attr_detect/face_attr_epoch_12_220318.onnx b/face_attr_detect/face_attr_epoch_12_220318.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..cddcb2d37f06e7c8b307864c4b8e4e8a167c5158
--- /dev/null
+++ b/face_attr_detect/face_attr_epoch_12_220318.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0fa6604beacd1e560ffc4cae6fa1537591d5f1a765a9f55473a295a1d22da3af
+size 3723167
diff --git a/face_detect_utils/__init__.py b/face_detect_utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/face_detect_utils/face_detect.cpython-38-x86_64-linux-gnu.so b/face_detect_utils/face_detect.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..2ff5e75da52735d4728d2148a2e7fe7fc578312c
--- /dev/null
+++ b/face_detect_utils/face_detect.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:68b5fd825eebc9421090c1daf3e940833b7bf5712ecee16deef937c87bbe666e
+size 1363368
diff --git a/face_detect_utils/head_pose.cpython-38-x86_64-linux-gnu.so b/face_detect_utils/head_pose.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..c230fe24694ff027d9e98eca344bd2d4bebc6d13
--- /dev/null
+++ b/face_detect_utils/head_pose.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1897346bf14dbbac7328a43598ba0c6d3f4db9ab6628dbebb381d4139aca179e
+size 1158712
diff --git a/face_detect_utils/resources/model_float32.onnx b/face_detect_utils/resources/model_float32.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..5a3232dae666298ddda0bebeb63db2dbf1ea7c1f
--- /dev/null
+++ b/face_detect_utils/resources/model_float32.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7e5dc9dd52836b2029a599e74134f1a0f03e416db3e40e932f69609adb0238ad
+size 17315434
diff --git a/face_detect_utils/resources/pfpld_robust_sim_bs1_8003.onnx b/face_detect_utils/resources/pfpld_robust_sim_bs1_8003.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..b1bb1378eea30fcd79a3ae28f11c55aefab2f17c
--- /dev/null
+++ b/face_detect_utils/resources/pfpld_robust_sim_bs1_8003.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8bd9913817152831562cccde7e51ed431d1cf4547d8f21e0876b58a0d82baa55
+size 6889235
diff --git a/face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx b/face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..be85dd4610c207a52ba07b98d4ea4f0fdfc0aee0
--- /dev/null
+++ b/face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b467f878e26ff1e7ee05cd9936fdff63fa2a5af5d732ed17ee231f2dd5cc07ae
+size 2524648
diff --git a/face_detect_utils/scrfd.cpython-38-x86_64-linux-gnu.so b/face_detect_utils/scrfd.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..7b9ca27f4d993b16034f9fafebcd4f18ca183d80
--- /dev/null
+++ b/face_detect_utils/scrfd.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7fa07e8146248e1b4deaafdfa0cc6c1e1b7a9d641db536aa3ae9b9ee10ab3b01
+size 3178688
diff --git a/face_lib/__init__.py b/face_lib/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/face_lib/face_detect_and_align/__init__.py b/face_lib/face_detect_and_align/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e8e58ddd298d9a9776e3a74822b57c500609155
--- /dev/null
+++ b/face_lib/face_detect_and_align/__init__.py
@@ -0,0 +1,3 @@
+from .face_align_5_landmarks import FaceDetect5Landmarks
+from .face_align_utils import estimate_norm
+
diff --git a/face_lib/face_detect_and_align/face_align_5_landmarks.cpython-38-x86_64-linux-gnu.so b/face_lib/face_detect_and_align/face_align_5_landmarks.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..e8dfa4558957ca42fd45cfe4a75f8e9ed54ce5e4
--- /dev/null
+++ b/face_lib/face_detect_and_align/face_align_5_landmarks.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7f0fd0fff60f8fb1fe6cbb1b549d5c43ae9bfaef1e5f4ee4edb27085d3023d22
+size 1321904
diff --git a/face_lib/face_detect_and_align/face_align_utils.cpython-38-x86_64-linux-gnu.so b/face_lib/face_detect_and_align/face_align_utils.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..86c01e70c7021de5baaeb2743edfde9a720dec9e
--- /dev/null
+++ b/face_lib/face_detect_and_align/face_align_utils.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da29cd727e8bf8f7107c322d5d40ef8596b29e2a858cad779362dbf2516c38a0
+size 1611632
diff --git a/face_lib/face_detect_and_align/scrfd_insightface/__init__.py b/face_lib/face_detect_and_align/scrfd_insightface/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..c1b34f01e048af40146ef07235a612629890b594
--- /dev/null
+++ b/face_lib/face_detect_and_align/scrfd_insightface/__init__.py
@@ -0,0 +1,5 @@
+# -- coding: utf-8 --
+# @Time : 2021/11/10
+
+
+from .scrfd import SCRFD
\ No newline at end of file
diff --git a/face_lib/face_detect_and_align/scrfd_insightface/scrfd.cpython-38-x86_64-linux-gnu.so b/face_lib/face_detect_and_align/scrfd_insightface/scrfd.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..2829e1566dd0737b7884b5f2a8b3d96ab776caf0
--- /dev/null
+++ b/face_lib/face_detect_and_align/scrfd_insightface/scrfd.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a8b9f64f8ef1c198e7d240ba6f1c9e4ff333c48f0259b71e74ca466d5ea274bb
+size 2529880
diff --git a/face_lib/face_parsing/__init__.py b/face_lib/face_parsing/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2830c3f2937803943038b5b64337387dc0faafd5
--- /dev/null
+++ b/face_lib/face_parsing/__init__.py
@@ -0,0 +1,6 @@
+# -- coding: utf-8 --
+# @Time : 2022/3/29
+
+
+from .face_parsing_api import FaceParsing
+# from .dfl_xseg_net import XsegNet
diff --git a/face_lib/face_parsing/face_parsing_api.cpython-38-x86_64-linux-gnu.so b/face_lib/face_parsing/face_parsing_api.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..6cfd45471143586b109da23be31c45a86eb07fa5
--- /dev/null
+++ b/face_lib/face_parsing/face_parsing_api.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:04931709d9c22f909e7ead81acb06cae05b70162fbcb6d2055ac7315b61834d3
+size 1053792
diff --git a/face_lib/face_restore/__init__.py b/face_lib/face_restore/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..76500b3b83375ff989281e919fb8e6046c94e3f6
--- /dev/null
+++ b/face_lib/face_restore/__init__.py
@@ -0,0 +1,2 @@
+
+from .gfpgan_onnx.gfpgan_onnx_api import GFPGAN
diff --git a/face_lib/face_restore/gfpgan_onnx/gfpgan_onnx_api.cpython-38-x86_64-linux-gnu.so b/face_lib/face_restore/gfpgan_onnx/gfpgan_onnx_api.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..3101bdfc574235256bf5f7a5972bd4ce9f612f51
--- /dev/null
+++ b/face_lib/face_restore/gfpgan_onnx/gfpgan_onnx_api.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b2088f84d33b0a442e7dcb04135939e56d918b8edefd1de0b06340cb38573d1e
+size 567104
diff --git a/h_utils/__init__.py b/h_utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c7305e619fd17580d4403c0764f4c8e7627427c
--- /dev/null
+++ b/h_utils/__init__.py
@@ -0,0 +1,9 @@
+#!/user/bin/env python
+# coding=utf-8
+"""
+@project : dhp-service
+@author : huyi
+@file : __init__.py.py
+@ide : PyCharm
+@time : 2021-08-18 15:45:13
+"""
\ No newline at end of file
diff --git a/h_utils/custom.cpython-38-x86_64-linux-gnu.so b/h_utils/custom.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..5a600ab8e7aa02c037f74df97072d983833a4e9e
--- /dev/null
+++ b/h_utils/custom.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:37ecc1ed06eb9b804f9de3470dbe6780976514d120bde8fed20d67c4cd26fe0e
+size 259136
diff --git a/h_utils/obs_client.cpython-38-x86_64-linux-gnu.so b/h_utils/obs_client.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..fe6959bc611d429f7c1b318de7fc35be94984122
--- /dev/null
+++ b/h_utils/obs_client.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7d881bc9a1303697bd912c005c71ccc74bab724f1ade6e1156c2d9ca0277e344
+size 982656
diff --git a/h_utils/request_utils.cpython-38-x86_64-linux-gnu.so b/h_utils/request_utils.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..1be50bc25ede8efe01873d1a6cb66cf3535eabf2
--- /dev/null
+++ b/h_utils/request_utils.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:215847205ce3a0f416ee3f5d07b4406b88ac0815cdd3e671c0d317c649cf2420
+size 1304616
diff --git a/h_utils/sweep_bot.cpython-38-x86_64-linux-gnu.so b/h_utils/sweep_bot.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..c417c2c1adeb6877f8937837d5e909815035970f
--- /dev/null
+++ b/h_utils/sweep_bot.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2fcfcce02dcbc5f9740329c10ab5fd0bfb157f3e6eb2fa4622adb901ac1feab5
+size 607848
diff --git a/h_utils/zip_utils.cpython-38-x86_64-linux-gnu.so b/h_utils/zip_utils.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..0f9ee726b3745ac8b63694e053d70cdf12543243
--- /dev/null
+++ b/h_utils/zip_utils.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4baf454a9940278b6696977b71a489ccf7c920faf37340b9968dcddfa582c508
+size 594864
diff --git a/landmark2face_wy/audio_handler.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/audio_handler.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..4e80ff7a18dbbcd19124ee6fd9c85c6305607db6
--- /dev/null
+++ b/landmark2face_wy/audio_handler.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8ced0b512524155f205d2b4c6a46a1a63c2d347387b30550a1fd99ccd53df172
+size 1749648
diff --git a/landmark2face_wy/checkpoints/anylang/dinet_v1_20240131.pth b/landmark2face_wy/checkpoints/anylang/dinet_v1_20240131.pth
new file mode 100644
index 0000000000000000000000000000000000000000..14e330fceadf0b678821a4e6be4d94c68cef65c2
--- /dev/null
+++ b/landmark2face_wy/checkpoints/anylang/dinet_v1_20240131.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c4568b1f1f2890b4a92edc3f9457af63f908065961233af2125d060cb56fd9e
+size 392392973
diff --git a/landmark2face_wy/checkpoints/test/opt.txt b/landmark2face_wy/checkpoints/test/opt.txt
new file mode 100644
index 0000000000000000000000000000000000000000..785e8614bc4466272be2b1176f4e559b76ac34e4
--- /dev/null
+++ b/landmark2face_wy/checkpoints/test/opt.txt
@@ -0,0 +1,74 @@
+----------------- Options ---------------
+ aspect_ratio: 1.0
+ audio_feature: 3dmm
+ batch_size: 16
+ checkpoints_dir: ./landmark2face_wy/checkpoints
+ crop_size: 256
+ dataroot: ./data
+ dataset_mode: Facereala3dmm
+ direction: AtoB
+ display_winsize: 256
+ distributed: False
+ epoch: latest
+ eval: False
+ feat_num: 3
+ feature_path: ../AnnI_deep3dface_256_contains_id/
+ fp16: False
+ gpu_ids: 0
+ img_size: 256
+ init_gain: 0.02
+ init_type: normal
+ input_nc: 3
+ instance_feat: False
+ isTrain: False [default: None]
+ label_feat: False
+ lan_size: 1
+ load_features: False
+ load_iter: 0 [default: 0]
+ load_size: 286
+ local_rank: -1
+ max_dataset_size: inf
+ mfcc0_rate: 0.2
+ model: pirender_3dmm_mouth_hd
+ model_path: ./landmark2face_wy/checkpoints/anylang/dinet_v1_20240131.pth
+ n_blocks: 9
+ n_blocks_global: 9
+ n_blocks_local: 3
+ n_clusters: 10
+ n_downsample_E: 4
+ n_downsample_global: 4
+ n_layers_D: 3
+ n_local_enhancers: 1
+ name: test
+ ndf: 64
+ nef: 16
+ netD: basic
+ netG: pirender
+ ngf: 64
+ niter_fix_global: 0
+ no_dropout: True
+ no_flip: False
+ no_ganFeat_loss: False
+ no_instance: False
+ norm: instance
+ ntest: inf
+ num_D: 2
+ num_test: 50
+ num_threads: 4
+ output_nc: 3
+ perceptual_layers: ['relu_1_1', 'relu_2_1', 'relu_3_1', 'relu_4_1', 'relu_5_1']
+ perceptual_network: vgg19
+ perceptual_num_scales: 4
+perceptual_use_style_loss: True
+ perceptual_weights: [4, 4, 4, 4, 4]
+ phase: test
+ preprocess: resize_and_crop
+ resize_size: 512
+ results_dir: ./results/
+ serial_batches: False
+ suffix:
+ test_audio_path: None
+ test_muban: None
+ verbose: False
+weight_style_to_perceptual: 250
+----------------- End -------------------
diff --git a/landmark2face_wy/data/Facereala3dmm_dataset.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/data/Facereala3dmm_dataset.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..fd3e48d9c1fb9bd699443bbef1a04ad9db4b44cd
--- /dev/null
+++ b/landmark2face_wy/data/Facereala3dmm_dataset.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bfb97b75c48cdbf3cde9f0d6cb9c2e0665b8a0f8f6870a78480263638f8b2bd9
+size 3479728
diff --git a/landmark2face_wy/data/Facereala3dmmexp512_dataset.py b/landmark2face_wy/data/Facereala3dmmexp512_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..34ce72992f34f98f6ca1cb1254b48bc252b294fc
--- /dev/null
+++ b/landmark2face_wy/data/Facereala3dmmexp512_dataset.py
@@ -0,0 +1,212 @@
+import os.path
+import random
+from data.base_dataset import BaseDataset, get_params, get_transform
+import torchvision.transforms as transforms
+from data.image_folder import make_dataset
+from PIL import Image, ImageEnhance
+import numpy as np
+import cv2
+import torch
+import time
+
+def get_idts(config_name):
+ idts = list()
+ with open(os.path.join('../config', config_name + '.txt')) as f:
+ for line in f:
+ line = line.strip()
+ video_name = line.split(':')[0]
+ idts.append(video_name)
+ return idts
+
+
+def obtain_seq_index(index, num_frames):
+ seq = list(range(index - 13, index + 13 + 1))
+ seq = [min(max(item, 0), num_frames - 1) for item in seq]
+ return seq
+
+def get_3dmm_feature(img_path, idx, new_dict):
+ id = img_path.split('/')[-3]
+ features = new_dict[id]
+ idx_list = obtain_seq_index(idx, features.shape[0])
+ feature = features[idx_list, 80:144]
+# feature[:, -1] = 50
+ return np.transpose(feature, (1, 0))
+
+
+
+class Facereala3dmmexp512Dataset(BaseDataset):
+ def __init__(self, opt, mode=None):
+ BaseDataset.__init__(self, opt)
+ img_size = opt.img_size
+ idts = get_idts(opt.name.split('_')[0])
+ print("---------load data list--------: ", idts)
+ self.new_dict = {}
+ if mode == 'train':
+ self.labels = []
+ self.label_starts = []
+ self.label_ends = []
+ count = 0
+ for idt_name in idts:
+ # root = '../AnnVI/feature/{}'.format(idt_name)
+ root = os.path.join(opt.feature_path, idt_name)
+ feature = np.load(os.path.join(root, '%s.npy' % opt.audio_feature))
+ self.new_dict[idt_name] = feature
+ if opt.audio_feature == "3dmm":
+ training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
+ else:
+ training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
+ training_data = torch.load(training_data_path)
+ img_paths = training_data['img_paths']
+ features_3dmm = training_data['features_3dmm']
+ index = [i[0].split('/')[-1] for i in img_paths]
+
+ image_dir = '{}/{}_dlib_crop'.format(root, img_size)
+ self.label_starts.append(count)
+ for img in range(len(index)):
+ img_path = os.path.join(image_dir, index[img])
+ # idx_list = obtain_seq_index(img, feature.shape[0])
+ # self.labels.append([img_path, np.transpose(feature[idx_list, ...], (1, 0))])
+ self.labels.append([img_path, features_3dmm[img]])
+ count = count + 1
+ self.label_ends.append(count)
+
+ self.label_starts = np.array(self.label_starts)
+ self.label_ends = np.array(self.label_ends)
+ self.transforms_image = transforms.Compose([transforms.ToTensor(),
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+
+ self.transforms_label = transforms.Compose([transforms.ToTensor(),
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+ self.shuffle()
+ elif mode == 'test':
+ self.labels = []
+ self.label_starts = []
+ self.label_ends = []
+ count = 0
+ for idt_name in idts:
+ # root = '../AnnVI/feature/{}'.format(idt_name)
+ root = os.path.join(opt.feature_path, idt_name)
+ feature = np.load(os.path.join(root, '%s.npy' % opt.audio_feature))
+ self.new_dict[idt_name] = feature
+ if opt.audio_feature == "3dmm":
+ training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
+ else:
+ training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
+ training_data = torch.load(training_data_path)
+ img_paths = training_data['img_paths']
+ features_3dmm = training_data['features_3dmm']
+ index = [i[0].split('/')[-1] for i in img_paths]
+
+ image_dir = '{}/{}_dlib_crop'.format(root, img_size)
+ self.label_starts.append(count)
+ for img in range(len(index)):
+ img_path = os.path.join(image_dir, index[img])
+ # idx_list = obtain_seq_index(img, feature.shape[0])
+ # self.labels.append([img_path, np.transpose(feature[idx_list, ...], (1, 0))])
+ self.labels.append([img_path, features_3dmm[img]])
+ count = count + 1
+ self.label_ends.append(count)
+
+ self.label_starts = np.array(self.label_starts)
+ self.label_ends = np.array(self.label_ends)
+ self.transforms_image = transforms.Compose([transforms.ToTensor(),
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+
+ self.transforms_label = transforms.Compose([transforms.ToTensor(),
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+ self.shuffle()
+
+ def shuffle(self):
+ self.labels_index = list(range(len(self.labels)))
+ random.shuffle(self.labels_index)
+
+ def add_mouth_mask2(self, img):
+ mask = np.ones_like(img)
+ rect_area = [img.shape[1] // 2 - 60, np.random.randint(226, 246), 30, 256 - 30]
+ mask_rect_area = mask[rect_area[0]: rect_area[1], rect_area[2]:rect_area[3]]
+ x = np.tile(np.arange(rect_area[1] - rect_area[0])[:, np.newaxis], (1, rect_area[3] - rect_area[2]))
+ x = np.flip(x, 0)
+ y = np.tile(np.arange(rect_area[3] - rect_area[2])[:, np.newaxis], (1, rect_area[1] - rect_area[0])).transpose()
+ zz1 = -y - x + 88 > 0
+ zz2 = np.flip(zz1, 1)
+ zz = (zz1 + zz2) > 0
+ mask[rect_area[0]:rect_area[1], rect_area[2]:rect_area[3]] = np.tile(zz[:, :, np.newaxis], (1, 1, 3)) * 1
+ imgm = img * mask
+ return imgm
+
+ def __getitem__(self, index):
+ # s1= time.time()
+ idx = self.labels_index[index]
+ img_path, feature_3dmm_idx= self.labels[idx]
+ # print(img_path, feature_3dmm_idx)
+ feature_3dmm = get_3dmm_feature(img_path, feature_3dmm_idx, self.new_dict)
+ #print(img_path, feature_3dmm_idx, feature_3dmm.shape)
+
+ img = np.array(Image.open(img_path).convert('RGB'))
+ img = np.array(np.clip(img + np.random.randint(-20, 20, size=3, dtype='int8'), 0, 255), dtype='uint8')
+ cut_pad1 = np.random.randint(0, 20)
+ cut_pad2 = np.random.randint(0, 20)
+ img = img[cut_pad1:512 + cut_pad1, cut_pad2:512 + cut_pad2]
+ # s2 =time.time()
+ # print('get data and read data ', s2-s1)
+ mask_B = img.copy()
+ # mask_end = np.random.randint(236*2, 250*2)
+ # index = np.random.randint(80, 90)
+ # mask_B[mask_B.shape[1] // 2 - index:mask_end, 30:-30] = 0
+ mask_end = np.random.randint(480, 500)
+ index = np.random.randint(15, 30)
+ mask_B[index:mask_end, 70:-70] = 0
+ img = Image.fromarray(img)
+
+ mask_B = Image.fromarray(mask_B)
+ img = self.transforms_image(img)
+ mask_B = self.transforms_image(mask_B)
+
+ x = np.where((idx >= self.label_starts) * (idx < self.label_ends))[0]
+
+ audio = torch.tensor(feature_3dmm)
+ # s3 = time.time()
+ # print('get 3dmm and mask ', s3 - s2)
+ # 保证real_A_index不是idx
+ max_i = 0
+ real_A_index = random.randint(self.label_starts[x], self.label_ends[x] - 1)
+ while real_A_index == idx:
+ max_i += 1
+ real_A_index = random.randint(self.label_starts[x], self.label_ends[x] - 1)
+ if max_i > 5:
+ break
+
+ imgA_path, _ = self.labels[real_A_index]
+ imgA = np.array(Image.open(imgA_path).convert('RGB'))
+ cut_pad1 = np.random.randint(0, 20)
+ cut_pad2 = np.random.randint(0, 20)
+ imgA = imgA[cut_pad1:256*2 + cut_pad1, cut_pad2:256*2 + cut_pad2]
+
+ ########椭圆##########
+ # mask = np.zeros(imgA.shape, dtype=np.uint8)
+ # cv2.ellipse(mask, (imgA.shape[1] // 2, imgA.shape[0] // 2 - 165 - cut_pad1),
+ # (imgA.shape[1] // 2 + 25, imgA.shape[0]), 0, 0, 360, (255, 255, 255), -1)
+ # ROI = cv2.bitwise_and(imgA, mask)
+ # imgA = Image.fromarray(ROI)
+ #############################
+ # imgA[:imgA.shape[1] // 2 - 40 - index2, :] = 0
+ imgA = Image.fromarray(imgA)
+ imgA = self.transforms_image(imgA)
+ # s4 = time.time()
+ # print('end time reala ', s4 - s3)
+ return {'A': imgA, 'A_label': audio, 'B': img, 'B_label': audio, 'mask_B': mask_B}
+
+ def __len__(self):
+ """Return the total number of images in the dataset."""
+ return len(self.labels)
+
+
+if __name__ == '__main__':
+ from options.train_options import TrainOptions
+
+ opt = TrainOptions().parse()
+ dataset = Facereala3dmmDataset(opt)
+ dataset_size = len(dataset)
+ print(dataset_size)
+ for i, data in enumerate(dataset):
+ print(data)
diff --git a/landmark2face_wy/data/Facereala3dmmexpwenet512_dataset.py b/landmark2face_wy/data/Facereala3dmmexpwenet512_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..3c63a0e7f5b14b80769c7ef4987814ce12e8ff38
--- /dev/null
+++ b/landmark2face_wy/data/Facereala3dmmexpwenet512_dataset.py
@@ -0,0 +1,222 @@
+import os.path
+import random
+from data.base_dataset import BaseDataset, get_params, get_transform
+import torchvision.transforms as transforms
+from data.image_folder import make_dataset
+from PIL import Image, ImageEnhance
+import numpy as np
+import cv2
+import torch
+import time
+
+def get_idts(config_name):
+ idts = list()
+ with open(os.path.join('../config', config_name + '.txt')) as f:
+ for line in f:
+ line = line.strip()
+ video_name = line.split(':')[0]
+ idts.append(video_name)
+ return idts
+
+
+def obtain_seq_index(index, num_frames):
+ seq = list(range(index - 10, index + 9 + 1))
+ seq = [min(max(item, 0), num_frames - 1) for item in seq]
+ return seq
+
+def get_3dmm_feature(img_path, idx, audio_feature, new_dict):
+ id = img_path.split('/')[-3]
+ features, features1, features1 = new_dict[id]
+ idx_list = obtain_seq_index(idx, features.shape[0])
+ feature = features[idx_list, 80:144]
+ feature1 = features1[:,audio_feature[0]:audio_feature[1]]
+ feature = np.concatenate([feature, features[idx_list, -3:], np.transpose(feature1, (1, 0))], 1)
+ # print(feature.shape)
+ return np.transpose(feature, (1, 0))
+ # return feature
+
+
+
+class Facereala3dmmexpwenet512Dataset(BaseDataset):
+ def __init__(self, opt, mode=None):
+ BaseDataset.__init__(self, opt)
+ img_size = opt.img_size
+ idts = get_idts(opt.name.split('_')[0])
+ print("---------load data list--------: ", idts)
+ self.new_dict = {}
+ if mode == 'train':
+ self.labels = []
+ self.label_starts = []
+ self.label_ends = []
+ count = 0
+ for idt_name in idts:
+ # root = '../AnnVI/feature/{}'.format(idt_name)
+ root = os.path.join(opt.feature_path, idt_name)
+ feature = np.load(os.path.join(root, '%s.npy' % opt.audio_feature))
+ feature1 = np.load(os.path.join(root,'audio_wenet_feature.npy'))
+ self.new_dict[idt_name] = [feature, feature1, feature1]
+ if opt.audio_feature == "3dmm":
+ training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
+ else:
+ training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
+ training_data = torch.load(training_data_path)
+ img_paths = training_data['img_paths']
+ features_3dmm = training_data['features_3dmm']
+ audio_features = np.load(os.path.join(root, 'audio_data.npy'), allow_pickle=True)
+ audio_features = audio_features.tolist()
+ index = [i[0].split('/')[-1] for i in img_paths]
+
+ image_dir = '{}/{}_dlib_crop'.format(root, img_size)
+ self.label_starts.append(count)
+ for img in range(len(index)):
+ img_path = os.path.join(image_dir, index[img])
+ # idx_list = obtain_seq_index(img, feature.shape[0])
+ # self.labels.append([img_path, np.transpose(feature[idx_list, ...], (1, 0))])
+ if type(features_3dmm[img]) != int:
+ print(img_path)
+ audio_feature = audio_features[img]
+ self.labels.append([img_path, features_3dmm[img], audio_feature])
+ count = count + 1
+ self.label_ends.append(count)
+
+ self.label_starts = np.array(self.label_starts)
+ self.label_ends = np.array(self.label_ends)
+ self.transforms_image = transforms.Compose([transforms.ToTensor(),
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+
+ self.transforms_label = transforms.Compose([transforms.ToTensor(),
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+ self.shuffle()
+ elif mode == 'test':
+ self.labels = []
+ self.label_starts = []
+ self.label_ends = []
+ count = 0
+ for idt_name in idts:
+ # root = '../AnnVI/feature/{}'.format(idt_name)
+ root = os.path.join(opt.feature_path, idt_name)
+ feature = np.load(os.path.join(root, '%s.npy' % opt.audio_feature))
+ self.new_dict[idt_name] = feature
+ if opt.audio_feature == "3dmm":
+ training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
+ else:
+ training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
+ training_data = torch.load(training_data_path)
+ img_paths = training_data['img_paths']
+ features_3dmm = training_data['features_3dmm']
+ index = [i[0].split('/')[-1] for i in img_paths]
+
+ image_dir = '{}/{}_dlib_crop'.format(root, img_size)
+ self.label_starts.append(count)
+ for img in range(len(index)):
+ img_path = os.path.join(image_dir, index[img])
+ # idx_list = obtain_seq_index(img, feature.shape[0])
+ # self.labels.append([img_path, np.transpose(feature[idx_list, ...], (1, 0))])
+ self.labels.append([img_path, features_3dmm[img]])
+ count = count + 1
+ self.label_ends.append(count)
+
+ self.label_starts = np.array(self.label_starts)
+ self.label_ends = np.array(self.label_ends)
+ self.transforms_image = transforms.Compose([transforms.ToTensor(),
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+
+ self.transforms_label = transforms.Compose([transforms.ToTensor(),
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+ self.shuffle()
+
+ def shuffle(self):
+ self.labels_index = list(range(len(self.labels)))
+ random.shuffle(self.labels_index)
+
+ def add_mouth_mask2(self, img):
+ mask = np.ones_like(img)
+ rect_area = [img.shape[1] // 2 - 60, np.random.randint(226, 246), 30, 256 - 30]
+ mask_rect_area = mask[rect_area[0]: rect_area[1], rect_area[2]:rect_area[3]]
+ x = np.tile(np.arange(rect_area[1] - rect_area[0])[:, np.newaxis], (1, rect_area[3] - rect_area[2]))
+ x = np.flip(x, 0)
+ y = np.tile(np.arange(rect_area[3] - rect_area[2])[:, np.newaxis], (1, rect_area[1] - rect_area[0])).transpose()
+ zz1 = -y - x + 88 > 0
+ zz2 = np.flip(zz1, 1)
+ zz = (zz1 + zz2) > 0
+ mask[rect_area[0]:rect_area[1], rect_area[2]:rect_area[3]] = np.tile(zz[:, :, np.newaxis], (1, 1, 3)) * 1
+ imgm = img * mask
+ return imgm
+
+ def __getitem__(self, index):
+ # s1= time.time()
+ idx = self.labels_index[index]
+ img_path, feature_3dmm_idx, audio_feature= self.labels[idx]
+ # print(img_path, feature_3dmm_idx)
+ feature_3dmm = get_3dmm_feature(img_path, feature_3dmm_idx, audio_feature, self.new_dict)
+ #print(img_path, feature_3dmm_idx, feature_3dmm.shape)
+
+ img = np.array(Image.open(img_path).convert('RGB'))
+ img = np.array(np.clip(img + np.random.randint(-20, 20, size=3, dtype='int8'), 0, 255), dtype='uint8')
+ cut_pad1 = np.random.randint(0, 20)
+ cut_pad2 = np.random.randint(0, 20)
+ img = img[cut_pad1:512 + cut_pad1, cut_pad2:512 + cut_pad2]
+ # s2 =time.time()
+ # print('get data and read data ', s2-s1)
+ mask_B = img.copy()
+ # mask_end = np.random.randint(236*2, 250*2)
+ # index = np.random.randint(80, 90)
+ # mask_B[mask_B.shape[1] // 2 - index:mask_end, 30:-30] = 0
+ mask_end = np.random.randint(480, 500)
+ index = np.random.randint(15, 30)
+ # index = np.random.randint(90, 100)
+ mask_B[index:mask_end, 70:-70] = 0
+ img = Image.fromarray(img)
+
+ mask_B = Image.fromarray(mask_B)
+ img = self.transforms_image(img)
+ mask_B = self.transforms_image(mask_B)
+
+ x = np.where((idx >= self.label_starts) * (idx < self.label_ends))[0]
+
+ audio = torch.tensor(feature_3dmm)
+ # s3 = time.time()
+ # print('get 3dmm and mask ', s3 - s2)
+ # 保证real_A_index不是idx
+ max_i = 0
+ real_A_index = random.randint(self.label_starts[x], self.label_ends[x] - 1)
+ while real_A_index == idx:
+ max_i += 1
+ real_A_index = random.randint(self.label_starts[x], self.label_ends[x] - 1)
+ if max_i > 5:
+ break
+
+ imgA_path, _, _ = self.labels[real_A_index]
+ imgA = np.array(Image.open(imgA_path).convert('RGB'))
+ cut_pad1 = np.random.randint(0, 20)
+ cut_pad2 = np.random.randint(0, 20)
+ imgA = imgA[cut_pad1:256*2 + cut_pad1, cut_pad2:256*2 + cut_pad2]
+
+ ########椭圆##########
+ # mask = np.zeros(imgA.shape, dtype=np.uint8)
+ # cv2.ellipse(mask, (imgA.shape[1] // 2, imgA.shape[0] // 2 - 165 - cut_pad1),
+ # (imgA.shape[1] // 2 + 25, imgA.shape[0]), 0, 0, 360, (255, 255, 255), -1)
+ # ROI = cv2.bitwise_and(imgA, mask)
+ # imgA = Image.fromarray(ROI)
+ #############################
+ # imgA[:imgA.shape[1] // 2 - 40 - index2, :] = 0
+ imgA = Image.fromarray(imgA)
+ imgA = self.transforms_image(imgA)
+ # s4 = time.time()
+ # print('end time reala ', s4 - s3)
+ return {'A': imgA, 'A_label': audio, 'B': img, 'B_label': audio, 'mask_B': mask_B}
+
+ def __len__(self):
+ """Return the total number of images in the dataset."""
+ return len(self.labels)
+
+
+if __name__ == '__main__':
+ from options.train_options import TrainOptions
+
+ opt = TrainOptions().parse()
+ dataset = Facereala3dmmDataset(opt)
+ dataset_size = len(dataset)
+ print(dataset_size)
+ for i, data in enumerate(dataset):
+ print(data)
diff --git a/landmark2face_wy/data/__init__.py b/landmark2face_wy/data/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef09ef89810a9de201c234e79cef5a7ff2b37d8e
--- /dev/null
+++ b/landmark2face_wy/data/__init__.py
@@ -0,0 +1,99 @@
+"""This package includes all the modules related to data loading and preprocessing
+
+ To add a custom dataset class called 'dummy', you need to add a file called 'dummy_dataset.py' and define a subclass 'DummyDataset' inherited from BaseDataset.
+ You need to implement four functions:
+ -- <__init__>: initialize the class, first call BaseDataset.__init__(self, opt).
+ -- <__len__>: return the size of dataset.
+ -- <__getitem__>: get a data point from data loader.
+ -- : (optionally) add dataset-specific options and set default options.
+
+Now you can use the dataset class by specifying flag '--dataset_mode dummy'.
+See our template dataset class 'template_dataset.py' for more details.
+"""
+import importlib
+import torch.utils.data
+from landmark2face_wy.data.base_dataset import BaseDataset
+
+
+def find_dataset_using_name(dataset_name):
+ """Import the module "data/[dataset_name]_dataset.py".
+
+ In the file, the class called DatasetNameDataset() will
+ be instantiated. It has to be a subclass of BaseDataset,
+ and it is case-insensitive.
+ """
+ dataset_filename = "landmark2face_wy.data." + dataset_name + "_dataset"
+ datasetlib = importlib.import_module(dataset_filename)
+
+ dataset = None
+ target_dataset_name = dataset_name.replace('_', '') + 'dataset'
+ for name, cls in datasetlib.__dict__.items():
+ if name.lower() == target_dataset_name.lower() \
+ and issubclass(cls, BaseDataset):
+ dataset = cls
+
+ if dataset is None:
+ raise NotImplementedError("In %s.py, there should be a subclass of BaseDataset with class name that matches %s in lowercase." % (dataset_filename, target_dataset_name))
+
+ return dataset
+
+
+def get_option_setter(dataset_name):
+ """Return the static method of the dataset class."""
+ dataset_class = find_dataset_using_name(dataset_name)
+ return dataset_class.modify_commandline_options
+
+
+def create_dataset(opt, mode='train'):
+ """Create a dataset given the option.
+
+ This function wraps the class CustomDatasetDataLoader.
+ This is the main interface between this package and 'train.py'/'test.py'
+
+ Example:
+ >>> from data import create_dataset
+ >>> dataset = create_dataset(opt)
+ """
+ data_loader = CustomDatasetDataLoader(opt, mode)
+ dataset = data_loader.load_data()
+ return dataset
+
+
+class CustomDatasetDataLoader():
+ """Wrapper class of Dataset class that performs multi-threaded data loading"""
+
+ def __init__(self, opt, mode):
+ """Initialize this class
+
+ Step 1: create a dataset instance given the name [dataset_mode]
+ Step 2: create a multi-threaded data loader.
+ """
+ self.opt = opt
+ dataset_class = find_dataset_using_name(opt.dataset_mode)
+ self.dataset = dataset_class(opt, mode)
+ print("dataset [%s] was created" % type(self.dataset).__name__)
+ if mode == 'test':
+ batchsize = opt.batch_size // 2
+ else:
+ batchsize = opt.batch_size
+ print(opt.batch_size)
+ if not opt.distributed:
+ self.dataloader = torch.utils.data.DataLoader(self.dataset,batch_size=batchsize,
+ shuffle=not opt.serial_batches,num_workers=int(opt.num_threads))
+ else:
+ self.train_sampler = torch.utils.data.distributed.DistributedSampler(self.dataset) ### 数据切分
+ self.dataloader = torch.utils.data.DataLoader(self.dataset, batch_size=batchsize, sampler=self.train_sampler, num_workers=int(opt.num_threads), pin_memory=True)
+
+ def load_data(self):
+ return self
+
+ def __len__(self):
+ """Return the number of data in the dataset"""
+ return min(len(self.dataset), self.opt.max_dataset_size)
+
+ def __iter__(self):
+ """Return a batch of data"""
+ for i, data in enumerate(self.dataloader):
+ if i * self.opt.batch_size >= self.opt.max_dataset_size:
+ break
+ yield data
diff --git a/landmark2face_wy/data/base_dataset.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/data/base_dataset.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..fc6c4ca8f30d9105f63cd455086fc2807b39fed9
--- /dev/null
+++ b/landmark2face_wy/data/base_dataset.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b9f9eca68615a251926ce113af4594a8dd1f50644c66be50ff5ab27020569c89
+size 1093920
diff --git a/landmark2face_wy/data/image_folder.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/data/image_folder.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..cf8ba59170e04c0c1e9acc574d8f5104b2ecaf59
--- /dev/null
+++ b/landmark2face_wy/data/image_folder.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:180bb0b0dc195aa073049a4c7630e071577f5607bbb3bd2c8247468ec84c7f6c
+size 860856
diff --git a/landmark2face_wy/data/l2faceaudio512_dataset.py b/landmark2face_wy/data/l2faceaudio512_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..060bf5e086923b20b1e4fb631ae6ef09dbc3f03b
--- /dev/null
+++ b/landmark2face_wy/data/l2faceaudio512_dataset.py
@@ -0,0 +1,189 @@
+import os.path
+import random
+from data.base_dataset import BaseDataset, get_params, get_transform
+import torchvision.transforms as transforms
+from data.image_folder import make_dataset
+from PIL import Image, ImageEnhance
+import numpy as np
+import cv2
+import torch
+
+
+def get_idts(config_name):
+ idts = list()
+ with open(os.path.join('../config', config_name + '.txt')) as f:
+ for line in f:
+ line = line.strip()
+ idts.append(line)
+ return idts
+
+
+class L2FaceAudio512Dataset(BaseDataset):
+ def __init__(self, opt, mode=None):
+ BaseDataset.__init__(self, opt)
+ img_size = opt.img_size
+ idts = get_idts(opt.name.split('_')[0])
+ print("---------load data list--------: ", idts)
+ if mode == 'train':
+ self.labels = []
+ for idt_name in idts:
+ # root = '../AnnVI/feature/{}'.format(idt_name)
+ root = os.path.join(opt.feature_path, idt_name)
+ if opt.audio_feature == "mfcc":
+ training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
+ else:
+ training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
+ training_data = torch.load(training_data_path)
+ img_paths = training_data['img_paths']
+ audio_features = training_data['audio_features']
+ index = [i[0].split('/')[-1] for i in img_paths]
+
+ image_dir = '{}/{}_dlib_crop'.format(root, img_size)
+ # label_dir = '{}/512_landmark_crop'.format(root)
+
+ # if 'man' in opt.name:
+ # imgs.sort(key=lambda x:int(x.split('.')[0]))
+ # else:
+ # imgs.sort(key=lambda x: (int(x.split('.')[0].split('-')[0]), int(x.split('.')[0].split('-')[1])))
+ for img in range(len(index)):
+ img_path = os.path.join(image_dir, index[img])
+ audio_feature = audio_features[img]
+ self.labels.append([img_path, audio_feature])
+ # transforms.Resize([img_size, img_size], Image.BICUBIC),
+ self.transforms_image = transforms.Compose([transforms.ToTensor(),
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+ # transforms.Resize([img_size, img_size], Image.BICUBIC),
+ self.transforms_label = transforms.Compose([transforms.ToTensor(),
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+ self.shuffle()
+ elif mode == 'test':
+ self.labels = []
+ for idt_name in idts:
+ # root = '../AnnVI/feature/{}'.format(idt_name)
+ root = os.path.join(opt.feature_path, idt_name)
+ if opt.audio_feature == "mfcc":
+ training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
+ else:
+ training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
+ training_data = torch.load(training_data_path)
+ img_paths = training_data['img_paths']
+ audio_features = training_data['audio_features']
+ index = [i[0].split('/')[-1] for i in img_paths]
+
+ image_dir = '{}/{}_dlib_crop'.format(root, img_size)
+ # label_dir = '{}/512_landmark_crop'.format(root)
+
+ # if 'man' in opt.name:
+ # imgs.sort(key=lambda x:int(x.split('.')[0]))
+ # else:
+ # imgs.sort(key=lambda x: (int(x.split('.')[0].split('-')[0]), int(x.split('.')[0].split('-')[1])))
+ for img in range(len(index)):
+ img_path = os.path.join(image_dir, index[img])
+ audio_feature = audio_features[img]
+ self.labels.append([img_path, audio_feature])
+ # transforms.Resize([img_size, img_size], Image.BICUBIC),
+ self.transforms_image = transforms.Compose([transforms.ToTensor(),
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+ # transforms.Resize([img_size, img_size], Image.BICUBIC),
+ self.transforms_label = transforms.Compose([transforms.ToTensor(),
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+ self.shuffle()
+
+ def shuffle(self):
+ random.shuffle(self.labels)
+
+ def add_mouth_mask2(self, img):
+ mask = np.ones_like(img)
+ rect_area = [img.shape[1] // 2 - np.random.randint(50, 60), np.random.randint(226, 246), 30, 256 - 30]
+ mask_rect_area = mask[rect_area[0]: rect_area[1], rect_area[2]:rect_area[3]]
+ x = np.tile(np.arange(rect_area[1] - rect_area[0])[:, np.newaxis], (1, rect_area[3] - rect_area[2]))
+ x = np.flip(x, 0)
+ y = np.tile(np.arange(rect_area[3] - rect_area[2])[:, np.newaxis], (1, rect_area[1] - rect_area[0])).transpose()
+ zz1 = -y - x + 88 > 0
+ zz2 = np.flip(zz1, 1)
+ zz = (zz1 + zz2) > 0
+ mask[rect_area[0]:rect_area[1], rect_area[2]:rect_area[3]] = np.tile(zz[:, :, np.newaxis], (1, 1, 3)) * 1
+ imgm = img * mask
+ return imgm
+
+ def __getitem__(self, index):
+ cv2.setNumThreads(0)
+ img_path, audio_feature = self.labels[index]
+ img = np.array(Image.open(img_path).convert('RGB'))
+ img = np.array(np.clip(img + np.random.randint(-20, 20, size=3, dtype='int8'), 0, 255), dtype='uint8')
+ cut_pad1 = np.random.randint(0, 20)
+ cut_pad2 = np.random.randint(0, 20)
+ img = img[cut_pad1:512 + cut_pad1, cut_pad2:512 + cut_pad2]
+
+ ####椭圆mask遮住衣领#####
+ '''
+ mask = np.zeros(img.shape, dtype=np.uint8)
+ cv2.ellipse(mask, (img.shape[1] // 2, img.shape[0] // 2 - 160 - cut_pad1), (img.shape[1] // 2 + 10, img.shape[0]), 0, 0, 360, (255, 255, 255), -1)
+ '''
+ ####mask遮眼睛#####
+ mask = np.ones(img.shape, dtype=np.uint8) * 255
+ mask[40 - cut_pad1:140 - cut_pad1, 110 - cut_pad2:-110 - cut_pad2] = 0
+ img = cv2.bitwise_and(img, mask)
+
+ mask_B = img.copy()
+ mask_B = cv2.resize(mask_B, (256, 256))
+ ##########脖子分割加mask#############
+ # img_edge = cv2.imread(img_path.replace("dlib_crop", "dlib_crop_neck"))
+ # img_edge = img_edge[cut_pad1:256 + cut_pad1, cut_pad2:256 + cut_pad2]
+ # mask_B = cv2.bitwise_and(img, 255 - img_edge)
+ # img_edge[:128, :, :] = img[:128, :, :]
+
+ ##########增加脖子椭圆mask#############
+ '''
+ maske = np.zeros(img.shape, dtype=np.uint8)
+ cv2.ellipse(maske, (img.shape[1] // 2, img.shape[0] // 2 + 50),
+ (img.shape[1] // 4 + np.random.randint(-5, 5), img.shape[0] // 3 + np.random.randint(-10, 10)),
+ 0, 0, 360, (255, 255, 255), -1)
+ maske[:img.shape[0] // 2, :, :] = 0
+ mask_B = cv2.bitwise_and(mask_B, 255-maske)
+ '''
+ ##########之前老的矩形mask#############
+ mask_end = np.random.randint(236, 256)
+ mask_B[mask_B.shape[1] // 2 - np.random.randint(40, 50):mask_end, 30:-30] = 0
+ ##########之前老的矩形mask#############
+ ##########蔡星宇三角mask#############
+ # mask_B = self.add_mouth_mask2(mask_B)
+ ##########蔡星宇三角mask#############
+ # mask_B[mask_B.shape[1] // 2 - 50:, 30:-30] = 0
+ img = Image.fromarray(img)
+ mask_B = Image.fromarray(mask_B)
+ img = self.transforms_image(img)
+ mask_B = self.transforms_image(mask_B)
+ # lab = Image.open(lab_path).convert('RGB')
+ # lab = self.transforms_label(lab)
+ audio = np.zeros((256, 256), dtype=np.float32)
+ audio_feature = np.array(audio_feature)
+ audio[:audio_feature.shape[0], :audio_feature.shape[1]] = audio_feature
+ audio = torch.tensor([audio])
+
+ imgA_path, _ = random.sample(self.labels, 1)[0]
+ imgA = np.array(Image.open(imgA_path).convert('RGB'))
+ cut_pad1 = np.random.randint(0, 20)
+ cut_pad2 = np.random.randint(0, 20)
+ imgA = imgA[cut_pad1:512 + cut_pad1, cut_pad2:512 + cut_pad2]
+ # mask = np.ones(imgA.shape, dtype=np.uint8) * 255
+ # mask[40 - cut_pad1:140 - cut_pad1, 110 - cut_pad2:-110 - cut_pad2] = 0
+ imgA = cv2.bitwise_and(imgA, mask)
+ imgA = Image.fromarray(imgA)
+ imgA = self.transforms_image(imgA)
+ return {'A': imgA, 'A_label': audio, 'B': img, 'B_label': audio, 'mask_B': mask_B}
+
+ def __len__(self):
+ """Return the total number of images in the dataset."""
+ return len(self.labels)
+
+
+if __name__ == '__main__':
+ from options.train_options import TrainOptions
+
+ opt = TrainOptions().parse()
+ dataset = L2FaceDataset(opt)
+ dataset_size = len(dataset)
+ print(dataset_size)
+ for i, data in enumerate(dataset):
+ print(data)
\ No newline at end of file
diff --git a/landmark2face_wy/data/l2faceaudio_dataset.py b/landmark2face_wy/data/l2faceaudio_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..1be5a8587551eb96b7be0998e3719b0ba5987e5e
--- /dev/null
+++ b/landmark2face_wy/data/l2faceaudio_dataset.py
@@ -0,0 +1,166 @@
+import os.path
+import random
+from data.base_dataset import BaseDataset, get_params, get_transform
+import torchvision.transforms as transforms
+from data.image_folder import make_dataset
+from PIL import Image, ImageEnhance
+import numpy as np
+import cv2
+import torch
+
+
+def get_idts(config_name):
+ idts = list()
+ with open(os.path.join('../config', config_name + '.txt')) as f:
+ for line in f:
+ line = line.strip()
+ idts.append(line)
+ return idts
+
+
+class L2FaceAudioDataset(BaseDataset):
+ def __init__(self, opt, mode=None):
+ BaseDataset.__init__(self, opt)
+ img_size = opt.img_size
+ idts = get_idts(opt.name.split('_')[0])
+ print("---------load data list--------: ", idts)
+ if mode == 'train':
+ self.labels = []
+ for idt_name in idts:
+ # root = '../AnnVI/feature/{}'.format(idt_name)
+ root = os.path.join(opt.feature_path, idt_name)
+ if opt.audio_feature == "mfcc":
+ training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
+ else:
+ training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
+ training_data = torch.load(training_data_path)
+ img_paths = training_data['img_paths']
+ audio_features = training_data['audio_features']
+ index = [i[0].split('/')[-1] for i in img_paths]
+
+ image_dir = '{}/{}_dlib_crop'.format(root, img_size)
+ # label_dir = '{}/512_landmark_crop'.format(root)
+
+ # if 'man' in opt.name:
+ # imgs.sort(key=lambda x:int(x.split('.')[0]))
+ # else:
+ # imgs.sort(key=lambda x: (int(x.split('.')[0].split('-')[0]), int(x.split('.')[0].split('-')[1])))
+ for img in range(len(index)):
+ img_path = os.path.join(image_dir, index[img])
+ audio_feature = audio_features[img]
+ self.labels.append([img_path, audio_feature])
+ # transforms.Resize([img_size, img_size], Image.BICUBIC),
+ self.transforms_image = transforms.Compose([transforms.ToTensor(),
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+ # transforms.Resize([img_size, img_size], Image.BICUBIC),
+ self.transforms_label = transforms.Compose([transforms.ToTensor(),
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+ self.shuffle()
+ elif mode == 'test':
+ self.labels = []
+ for idt_name in idts:
+ # root = '../AnnVI/feature/{}'.format(idt_name)
+ root = os.path.join(opt.feature_path, idt_name)
+ if opt.audio_feature == "mfcc":
+ training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode))
+ else:
+ training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature))
+ training_data = torch.load(training_data_path)
+ img_paths = training_data['img_paths']
+ audio_features = training_data['audio_features']
+ index = [i[0].split('/')[-1] for i in img_paths]
+
+ image_dir = '{}/{}_dlib_crop'.format(root, img_size)
+ # label_dir = '{}/512_landmark_crop'.format(root)
+
+ # if 'man' in opt.name:
+ # imgs.sort(key=lambda x:int(x.split('.')[0]))
+ # else:
+ # imgs.sort(key=lambda x: (int(x.split('.')[0].split('-')[0]), int(x.split('.')[0].split('-')[1])))
+ for img in range(len(index)):
+ img_path = os.path.join(image_dir, index[img])
+ audio_feature = audio_features[img]
+ self.labels.append([img_path, audio_feature])
+ # transforms.Resize([img_size, img_size], Image.BICUBIC),
+ self.transforms_image = transforms.Compose([transforms.ToTensor(),
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+ # transforms.Resize([img_size, img_size], Image.BICUBIC),
+ self.transforms_label = transforms.Compose([transforms.ToTensor(),
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
+ self.shuffle()
+
+ def shuffle(self):
+ random.shuffle(self.labels)
+
+ def add_mouth_mask2(self, img):
+ mask = np.ones_like(img)
+ rect_area = [img.shape[1] // 2 - 60, np.random.randint(226, 246), 30, 256 - 30]
+ mask_rect_area = mask[rect_area[0]: rect_area[1], rect_area[2]:rect_area[3]]
+ x = np.tile(np.arange(rect_area[1] - rect_area[0])[:, np.newaxis], (1, rect_area[3] - rect_area[2]))
+ x = np.flip(x, 0)
+ y = np.tile(np.arange(rect_area[3] - rect_area[2])[:, np.newaxis], (1, rect_area[1] - rect_area[0])).transpose()
+ zz1 = -y - x + 88 > 0
+ zz2 = np.flip(zz1, 1)
+ zz = (zz1 + zz2) > 0
+ mask[rect_area[0]:rect_area[1], rect_area[2]:rect_area[3]] = np.tile(zz[:, :, np.newaxis], (1, 1, 3)) * 1
+ imgm = img * mask
+ return imgm
+
+ def __getitem__(self, index):
+ cv2.setNumThreads(0)
+ img_path, audio_feature = self.labels[index]
+ img = np.array(Image.open(img_path).convert('RGB'))
+ img = np.array(np.clip(img + np.random.randint(-20, 20, size=3, dtype='int8'), 0, 255), dtype='uint8')
+ cut_pad1 = np.random.randint(0, 10)
+ cut_pad2 = np.random.randint(0, 10)
+ img = img[cut_pad1:256 + cut_pad1, cut_pad2:256 + cut_pad2]
+
+ ####mask遮眼睛#####
+ mask = np.ones(img.shape, dtype=np.uint8) * 255
+ mask[20 - cut_pad1:70 - cut_pad1, 55 - cut_pad2:-55 - cut_pad2] = 0
+ img = cv2.bitwise_and(img, mask)
+
+ mask_B = img.copy()
+ mask_end = np.random.randint(236, 256)
+ ##########之前老的矩形mask#############
+ mask_B[mask_B.shape[1] // 2 - np.random.randint(40, 50):mask_end, 30:-30] = 0
+ ##########之前老的矩形mask#############
+ ##########蔡星宇三角mask#############
+ # mask_B = self.add_mouth_mask2(mask_B)
+ ##########蔡星宇三角mask#############
+ # mask_B[mask_B.shape[1] // 2 - 50:, 30:-30] = 0
+ img = Image.fromarray(img)
+ mask_B = Image.fromarray(mask_B)
+ img = self.transforms_image(img)
+ mask_B = self.transforms_image(mask_B)
+ # lab = Image.open(lab_path).convert('RGB')
+ # lab = self.transforms_label(lab)
+ audio = np.zeros((256, 256), dtype=np.float32)
+ audio_feature = np.array(audio_feature)
+ audio[:audio_feature.shape[0], :audio_feature.shape[1]] = audio_feature
+ audio = torch.tensor([audio])
+
+ imgA_path, _ = random.sample(self.labels, 1)[0]
+ imgA = np.array(Image.open(imgA_path).convert('RGB'))
+ cut_pad1 = np.random.randint(0, 10)
+ cut_pad2 = np.random.randint(0, 10)
+ imgA = imgA[cut_pad1:256 + cut_pad1, cut_pad2:256 + cut_pad2]
+ imgA = cv2.bitwise_and(imgA, mask)
+ imgA = Image.fromarray(imgA)
+ imgA = self.transforms_image(imgA)
+ return {'A': imgA, 'A_label': audio, 'B': img, 'B_label': audio, 'mask_B': mask_B}
+
+ def __len__(self):
+ """Return the total number of images in the dataset."""
+ return len(self.labels)
+
+
+if __name__ == '__main__':
+ from options.train_options import TrainOptions
+
+ opt = TrainOptions().parse()
+ dataset = L2FaceDataset(opt)
+ dataset_size = len(dataset)
+ print(dataset_size)
+ for i, data in enumerate(dataset):
+ print(data)
\ No newline at end of file
diff --git a/landmark2face_wy/digitalhuman_interface.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/digitalhuman_interface.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..737b99937dc263c3e9dc6b28d24700be619a6b4d
--- /dev/null
+++ b/landmark2face_wy/digitalhuman_interface.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1bc38a9e2a82a9022573da1e8326128a98a661a17d61283f3911c5ee3aa504a
+size 5284104
diff --git a/landmark2face_wy/loss/__init__.py b/landmark2face_wy/loss/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/landmark2face_wy/loss/perceptual.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/loss/perceptual.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..8537e214367585168d34a351b6e3136238a24d93
--- /dev/null
+++ b/landmark2face_wy/loss/perceptual.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:784d25b825d83d4634932f2628747a4f7f9f6c8bfe84610bc757131810c2e412
+size 2056248
diff --git a/landmark2face_wy/models/DINet.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/models/DINet.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..1d344846b7ac61659a74b47c355914dc89ae58d6
--- /dev/null
+++ b/landmark2face_wy/models/DINet.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:71be733004b4ba60a93dee4971f4e69eefe575dfc99d8e3dffc3ed160d9ba4d4
+size 10580992
diff --git a/landmark2face_wy/models/__init__.py b/landmark2face_wy/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..6c740092bfde331bd18642cdc6abaf1768e3b2f1
--- /dev/null
+++ b/landmark2face_wy/models/__init__.py
@@ -0,0 +1,67 @@
+"""This package contains modules related to objective functions, optimizations, and network architectures.
+
+To add a custom model class called 'dummy', you need to add a file called 'dummy_model.py' and define a subclass DummyModel inherited from BaseModel.
+You need to implement the following five functions:
+ -- <__init__>: initialize the class; first call BaseModel.__init__(self, opt).
+ -- : unpack data from dataset and apply preprocessing.
+ -- : produce intermediate results.
+ -- : calculate loss, gradients, and update network weights.
+ -- : (optionally) add model-specific options and set default options.
+
+In the function <__init__>, you need to define four lists:
+ -- self.loss_names (str list): specify the training losses that you want to plot and save.
+ -- self.model_names (str list): define networks used in our training.
+ -- self.visual_names (str list): specify the images that you want to display and save.
+ -- self.optimizers (optimizer list): define and initialize optimizers. You can define one optimizer for each network. If two networks are updated at the same time, you can use itertools.chain to group them. See cycle_gan_model.py for an usage.
+
+Now you can use the model class by specifying flag '--model dummy'.
+See our template model class 'template_model.py' for more details.
+"""
+
+import importlib
+from landmark2face_wy.models.base_model import BaseModel
+
+
+def find_model_using_name(model_name):
+ """Import the module "models/[model_name]_model.py".
+
+ In the file, the class called DatasetNameModel() will
+ be instantiated. It has to be a subclass of BaseModel,
+ and it is case-insensitive.
+ """
+ model_filename = "landmark2face_wy.models." + model_name + "_model"
+ modellib = importlib.import_module(model_filename)
+ model = None
+ target_model_name = model_name.replace('_', '') + 'model'
+ for name, cls in modellib.__dict__.items():
+ if name.lower() == target_model_name.lower() \
+ and issubclass(cls, BaseModel):
+ model = cls
+
+ if model is None:
+ print("In %s.py, there should be a subclass of BaseModel with class name that matches %s in lowercase." % (model_filename, target_model_name))
+ exit(0)
+
+ return model
+
+
+def get_option_setter(model_name):
+ """Return the static method of the model class."""
+ model_class = find_model_using_name(model_name)
+ return model_class.modify_commandline_options
+
+
+def create_model(opt):
+ """Create a model given the option.
+
+ This function warps the class CustomDatasetDataLoader.
+ This is the main interface between this package and 'train.py'/'test.py'
+
+ Example:
+ >>> from landmark2face_wy.models import create_model
+ >>> model = create_model(opt)
+ """
+ model = find_model_using_name(opt.model)
+ instance = model(opt)
+ print("model [%s] was created" % type(instance).__name__)
+ return instance
diff --git a/landmark2face_wy/models/base_function.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/models/base_function.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..8e20be9aac5ae561b8eb2e14b5a0634c87c43521
--- /dev/null
+++ b/landmark2face_wy/models/base_function.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:77518b79b14d01bc4bc57332bcad738cb9391a60a1978147138126d362d7cc70
+size 4592584
diff --git a/landmark2face_wy/models/base_model.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/models/base_model.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..76b9be7039b72ff7346333ab81e8e50be678d941
--- /dev/null
+++ b/landmark2face_wy/models/base_model.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:65b491eaaf49cd76c0e4685cd0a43508ad4bab0a1dc106c6ea7cc1c00ca3863f
+size 1519752
diff --git a/landmark2face_wy/models/face3d2face_model.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/models/face3d2face_model.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..f5b53411d62c2d9539cf5f552f0561207df26700
--- /dev/null
+++ b/landmark2face_wy/models/face3d2face_model.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4d21fbec65244a28de7db1acbcd059a5c09b27e6e4d3089da57954d5536fece7
+size 3049016
diff --git a/landmark2face_wy/models/face_model.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/models/face_model.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..69d8a4241f7e9a4678fa1ba3c6f6f337d9b0e280
--- /dev/null
+++ b/landmark2face_wy/models/face_model.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3fe9f6e3d20cd00922e6d343365f3dfef1b2afe131c3e33d54bc648a057014cb
+size 1400824
diff --git a/landmark2face_wy/models/l2faceaudio_model.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/models/l2faceaudio_model.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..b20fa5e748ba6255067c7d74e06687656f755f4b
--- /dev/null
+++ b/landmark2face_wy/models/l2faceaudio_model.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:05bf1f0ae2cb6289d387bbe144f0ffb0b6fc14946ca3c2b7791e7763732c3ade
+size 1165520
diff --git a/landmark2face_wy/models/networks.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/models/networks.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..afdaad5cf35e6d55705be5a5c1b993ce0478362c
--- /dev/null
+++ b/landmark2face_wy/models/networks.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3d0b6c7860980c0e5f7ea959cd92df4c5f959ff7ce7480db55ec63a82660c662
+size 3533456
diff --git a/landmark2face_wy/models/networks_HD.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/models/networks_HD.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..a295d7115b5719429a01c45c4c961bdabb339ad8
--- /dev/null
+++ b/landmark2face_wy/models/networks_HD.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4e38ddd0e588ed94a3fc805f98ca00a18f38a738370f2a250da7bc1f52821402
+size 5950296
diff --git a/landmark2face_wy/models/networks_pix2pixHD.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/models/networks_pix2pixHD.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..14e0beb5901e1c18f1f831117b44f8ecae34474d
--- /dev/null
+++ b/landmark2face_wy/models/networks_pix2pixHD.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:02d32e66441f9b49d1251855626fde459c253a7efbb84b041004ad583528e323
+size 12729352
diff --git a/landmark2face_wy/models/pirender_3dmm_mouth_hd_model.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/models/pirender_3dmm_mouth_hd_model.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..4b52d50ec8827e71664f25da8b75b6438d59c101
--- /dev/null
+++ b/landmark2face_wy/models/pirender_3dmm_mouth_hd_model.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5dc4ad4b811689cffe59a4a8ac0dddf96527c36e4e191acdfff70d9f8f70f699
+size 3121488
diff --git a/landmark2face_wy/models/pirender_3dmm_mouth_hdv2_model.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/models/pirender_3dmm_mouth_hdv2_model.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..f48d95b8d0dbb4ad222ae88ff53b364e7c6bf4c4
--- /dev/null
+++ b/landmark2face_wy/models/pirender_3dmm_mouth_hdv2_model.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a3f9d8c05b21d9b50748ecb8b1a4acd276fbe4f09f934b2f0c19b822e5696792
+size 3119104
diff --git a/landmark2face_wy/options/__init__.py b/landmark2face_wy/options/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e7eedebe54aa70169fd25951b3034d819e396c90
--- /dev/null
+++ b/landmark2face_wy/options/__init__.py
@@ -0,0 +1 @@
+"""This package options includes option modules: training options, test options, and basic options (used in both training and test)."""
diff --git a/landmark2face_wy/options/base_options.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/options/base_options.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..65775edfb7a76db412c5d37b0f2840c2b60323a7
--- /dev/null
+++ b/landmark2face_wy/options/base_options.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:03aaa24b77f771ec2d43299d549d837b519ed78856c7cecd93f5f20fad70da0a
+size 1831608
diff --git a/landmark2face_wy/options/test_options.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/options/test_options.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..3dc7fa192ee9833a89275e7685385d54f8da708d
--- /dev/null
+++ b/landmark2face_wy/options/test_options.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4fcdacdf5f87f5e021452fcb1701e1de6f41d05fe19aa81325e56d147ff72ffa
+size 418552
diff --git a/landmark2face_wy/options/train_options.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/options/train_options.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..f19b0e4b38ab1e3851bd4cbc819d67dce335f05c
--- /dev/null
+++ b/landmark2face_wy/options/train_options.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:697c5d54710eeb5378eaa12dededefa102c5fd1a3d6504e4dd3747222d83164a
+size 500864
diff --git a/landmark2face_wy/sync_batchnorm/__init__.py b/landmark2face_wy/sync_batchnorm/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..6d9b36c74b1808b56ded68cf080a689db7e0ee4e
--- /dev/null
+++ b/landmark2face_wy/sync_batchnorm/__init__.py
@@ -0,0 +1,14 @@
+# -*- coding: utf-8 -*-
+# File : __init__.py
+# Author : Jiayuan Mao
+# Email : maojiayuan@gmail.com
+# Date : 27/01/2018
+#
+# This file is part of Synchronized-BatchNorm-PyTorch.
+# https://github.com/vacancy/Synchronized-BatchNorm-PyTorch
+# Distributed under MIT License.
+
+from .batchnorm import set_sbn_eps_mode
+from .batchnorm import SynchronizedBatchNorm1d, SynchronizedBatchNorm2d, SynchronizedBatchNorm3d
+from .batchnorm import patch_sync_batchnorm, convert_model
+from .replicate import DataParallelWithCallback, patch_replication_callback
diff --git a/landmark2face_wy/sync_batchnorm/batchnorm.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/sync_batchnorm/batchnorm.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..8b1507d167cb21a19dfb99a2b9cd32e9cdfd56c2
--- /dev/null
+++ b/landmark2face_wy/sync_batchnorm/batchnorm.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:665e6144d64cca7bfe5072dc9a211e13349ef285253cd6a57ff6eaf56d274f5d
+size 2167608
diff --git a/landmark2face_wy/sync_batchnorm/batchnorm_reimpl.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/sync_batchnorm/batchnorm_reimpl.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..3e3777efa28fe25b93e9724fa404219e97a955da
--- /dev/null
+++ b/landmark2face_wy/sync_batchnorm/batchnorm_reimpl.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4fa6d7a220684fac32aaa7a14c68145ddcbd781777a3c7a0240db6ecf0f98f23
+size 931240
diff --git a/landmark2face_wy/sync_batchnorm/comm.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/sync_batchnorm/comm.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..601839b71900478e98445b613f1c266a60d95c7f
--- /dev/null
+++ b/landmark2face_wy/sync_batchnorm/comm.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:980f9ffc65140759448eb3448c68a419a2be0c4b93d41edc062c9d50d08f3beb
+size 909744
diff --git a/landmark2face_wy/sync_batchnorm/replicate.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/sync_batchnorm/replicate.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..7989bd825ea52c40a50f28f50f833ca388e9093f
--- /dev/null
+++ b/landmark2face_wy/sync_batchnorm/replicate.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:11b8c7903814a08d26a000a790445e33f80041c4d0955bcd6472cfe39bc90b01
+size 557920
diff --git a/landmark2face_wy/sync_batchnorm/unittest.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/sync_batchnorm/unittest.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..869bfe21ae394595f62e0c52cb7b055f029fbfe8
--- /dev/null
+++ b/landmark2face_wy/sync_batchnorm/unittest.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c6319f7a4a2c6822c673223c080e98457f9014d86ec7027adaf69d382abb7200
+size 427336
diff --git a/landmark2face_wy/test_3dmm_multi_exp_wenet.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/test_3dmm_multi_exp_wenet.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..0d5d253d29365548277f76572503ed9fbe444fc2
--- /dev/null
+++ b/landmark2face_wy/test_3dmm_multi_exp_wenet.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d10d5f1833370bffe3c5348eedd2b8c16c9034cc7d58467dccd7cd1320b1c349
+size 1978808
diff --git a/landmark2face_wy/test_3dmm_multi_exp_wenet0.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/test_3dmm_multi_exp_wenet0.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..95c6c071c258bfdeb66770090d8c55592ff390ca
--- /dev/null
+++ b/landmark2face_wy/test_3dmm_multi_exp_wenet0.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ec3590c44f8e988965c904fada8a42edffad31346e33b6e8ad1a3168e020ac0a
+size 1932008
diff --git a/landmark2face_wy/util/__init__.py b/landmark2face_wy/util/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ae36f63d8859ec0c60dcbfe67c4ac324e751ddf7
--- /dev/null
+++ b/landmark2face_wy/util/__init__.py
@@ -0,0 +1 @@
+"""This package includes a miscellaneous collection of useful helper functions."""
diff --git a/landmark2face_wy/util/flow_util.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/util/flow_util.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..2e137f8a3088dd91286049869da222a3576d2477
--- /dev/null
+++ b/landmark2face_wy/util/flow_util.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe5f28f3f8e56f11ca64a356cc4e05d3d986156ec9ec6c44e136a2d947deece4
+size 647432
diff --git a/landmark2face_wy/util/get_data.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/util/get_data.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..f4550eeb873075ebf27966cbdb4948f2c95dd7bc
--- /dev/null
+++ b/landmark2face_wy/util/get_data.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fbc71c56031e8980a02c8f3d87a34a049532e5b1150f6ddd79e8ef8cf6d6a0c0
+size 906216
diff --git a/landmark2face_wy/util/html.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/util/html.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..10442f42c15cf124c4fb17c9400fb92d0c63e611
--- /dev/null
+++ b/landmark2face_wy/util/html.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b904436c9878cd81d9bf94d7f900a99806dfa6fb2837c5e011fd1531df352e99
+size 1575136
diff --git a/landmark2face_wy/util/image_pool.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/util/image_pool.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..fc9da5f9f85e1db9e79b5ba41d61b08ad91a874d
--- /dev/null
+++ b/landmark2face_wy/util/image_pool.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f05925441e2483e4b8aaa81de93d5ce302875bbf4d11533295994f57b7d467cf
+size 459208
diff --git a/landmark2face_wy/util/util.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/util/util.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..6cd522eb51ef320a9b102674e74c58a18b8a2fb7
--- /dev/null
+++ b/landmark2face_wy/util/util.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c57e9889e8bcb49b289f7ac8c2167265d044ce74916f3d4c6d09b72c8d1cd8ff
+size 757944
diff --git a/landmark2face_wy/util/visualizer.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/util/visualizer.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..1388dcec9b48678e11418152ca46776443f74752
--- /dev/null
+++ b/landmark2face_wy/util/visualizer.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3289991e476fea0132803f7e9489b7c327152f7f67ab247066822da13f1668b2
+size 3234024
diff --git a/license.txt b/license.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/log/dh.log b/log/dh.log
new file mode 100644
index 0000000000000000000000000000000000000000..cfa73460aa98f768c833161992954afcb43c5872
--- /dev/null
+++ b/log/dh.log
@@ -0,0 +1,186 @@
+[2025-03-18 12:50:40,644] [run.py[line:153]] [INFO] [TransDhTask init]
+[2025-03-18 12:50:41,729] [run.py[line:158]] [INFO] [任务:1002 -> audio_url:./temp/example/audio.wav video_url:./temp/example/video.mp4]
+[2025-03-18 12:50:41,732] [run.py[line:158]] [INFO] [[1002] -> ffmpeg video: ffmpeg -loglevel warning -i ./temp/example/video.mp4 -c:v libx264 -crf 15 -an -y ./temp/1002_format.mp4]
+[2025-03-18 12:50:41,790] [run.py[line:158]] [ERROR] [[1002]预处理失败,异常信息:[format video error]]
+[2025-03-18 12:50:41,790] [run.py[line:158]] [ERROR] [[1002]任务执行失败,异常信息:[[1002]预处理失败,异常信息:[format video error]]]
+[2025-03-18 12:50:41,791] [run.py[line:158]] [INFO] [>>> 任务:1002 耗时:0.06167912483215332 ]
+[2025-03-18 12:50:57,817] [run.py[line:143]] [INFO] [TransDhTask init]
+[2025-03-18 12:50:58,906] [run.py[line:147]] [INFO] [任务:1002 -> audio_url:./temp/example/audio.wav video_url:./temp/example/video.mp4]
+[2025-03-18 12:50:58,908] [run.py[line:147]] [INFO] [[1002] -> ffmpeg video: ffmpeg -loglevel warning -i ./temp/example/video.mp4 -c:v libx264 -crf 15 -an -y ./temp/1002_format.mp4]
+[2025-03-18 12:50:58,964] [run.py[line:147]] [ERROR] [[1002]预处理失败,异常信息:[format video error]]
+[2025-03-18 12:50:58,965] [run.py[line:147]] [ERROR] [[1002]任务执行失败,异常信息:[[1002]预处理失败,异常信息:[format video error]]]
+[2025-03-18 12:50:58,966] [run.py[line:147]] [INFO] [>>> 任务:1002 耗时:0.059505462646484375 ]
+[2025-03-18 12:52:06,385] [run.py[line:143]] [INFO] [TransDhTask init]
+[2025-03-18 12:52:07,560] [run.py[line:147]] [INFO] [任务:1002 -> audio_url:./example/audio.wav video_url:./example/video.mp4]
+[2025-03-18 12:52:07,646] [run.py[line:147]] [INFO] [[1002] -> ffmpeg video: ffmpeg -loglevel warning -i ./example/video.mp4 -crf 15 -vcodec copy -an -y ./1002_format.mp4]
+[2025-03-18 12:52:07,801] [run.py[line:147]] [INFO] [[1002] -> ffmpeg audio: ffmpeg -loglevel warning -i ./example/audio.wav -ac 1 -ar 16000 -acodec pcm_s16le -y ./1002_format.wav]
+[2025-03-18 12:52:07,922] [run.py[line:147]] [INFO] [[1002] -> 预处理耗时:0.35927414894104004s]
+[2025-03-18 12:52:10,169] [run.py[line:147]] [INFO] [[1002] -> get_aud_feat1 cost:2.245649576187134s]
+[2025-03-18 12:52:11,702] [process.py[line:108]] [INFO] [>>> init_wh_process进程启动]
+[2025-03-18 12:52:20,087] [process.py[line:108]] [INFO] [[1002]init_wh result :[0.8809176216714891], cost: 8.382684469223022 s]
+[2025-03-18 12:52:20,090] [run.py[line:147]] [INFO] [[1002] -> wh: [0.8809176216714891]]
+[2025-03-18 12:52:21,453] [process.py[line:108]] [INFO] [>>> 数字人图片处理进程启动]
+[2025-03-18 12:52:24,015] [process.py[line:108]] [INFO] [[1002]任务视频驱动队列启动 batch_size:4, len:150]
+[2025-03-18 12:52:24,050] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 开始循环]
+[2025-03-18 12:52:24,085] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:4]
+[2025-03-18 12:52:24,112] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:8]
+[2025-03-18 12:52:24,122] [process.py[line:108]] [INFO] [>>> audio_transfer get message:4]
+[2025-03-18 12:52:24,139] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:12]
+[2025-03-18 12:52:24,148] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:16]
+[2025-03-18 12:52:24,161] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:20]
+[2025-03-18 12:52:24,173] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:24]
+[2025-03-18 12:52:24,185] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:28]
+[2025-03-18 12:52:24,197] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:32]
+[2025-03-18 12:52:24,208] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:36]
+[2025-03-18 12:52:24,222] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:40]
+[2025-03-18 12:52:24,232] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:44]
+[2025-03-18 12:52:25,722] [process.py[line:108]] [INFO] [[1002] -> frame_id:[4] 模糊置信度:[0.969]]
+[2025-03-18 12:52:25,723] [process.py[line:108]] [INFO] [[1002] -> need chaofen .]
+[2025-03-18 12:52:25,905] [utils.py[line:145]] [INFO] [Note: detected 72 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable.]
+[2025-03-18 12:52:25,906] [utils.py[line:148]] [INFO] [Note: NumExpr detected 72 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.]
+[2025-03-18 12:52:25,907] [utils.py[line:160]] [INFO] [NumExpr defaulting to 8 threads.]
+[2025-03-18 12:52:26,083] [process.py[line:108]] [INFO] [[4] -> chaofen cost:1.9595112800598145s]
+[2025-03-18 12:52:31,071] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:4, cost:6.948575258255005s]
+[2025-03-18 12:52:31,116] [process.py[line:108]] [INFO] [>>> audio_transfer get message:8]
+[2025-03-18 12:52:31,126] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:48]
+[2025-03-18 12:52:31,347] [process.py[line:108]] [INFO] [[8] -> chaofen cost:0.2294461727142334s]
+[2025-03-18 12:52:31,576] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:8, cost:0.45979762077331543s]
+[2025-03-18 12:52:31,605] [process.py[line:108]] [INFO] [>>> audio_transfer get message:12]
+[2025-03-18 12:52:31,615] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:52]
+[2025-03-18 12:52:31,818] [process.py[line:108]] [INFO] [[12] -> chaofen cost:0.21271824836730957s]
+[2025-03-18 12:52:32,036] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:12, cost:0.43187427520751953s]
+[2025-03-18 12:52:32,060] [process.py[line:108]] [INFO] [>>> audio_transfer get message:16]
+[2025-03-18 12:52:32,072] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:56]
+[2025-03-18 12:52:32,279] [process.py[line:108]] [INFO] [[16] -> chaofen cost:0.21899199485778809s]
+[2025-03-18 12:52:32,530] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:16, cost:0.47049522399902344s]
+[2025-03-18 12:52:32,552] [process.py[line:108]] [INFO] [>>> audio_transfer get message:20]
+[2025-03-18 12:52:32,567] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:60]
+[2025-03-18 12:52:32,766] [process.py[line:108]] [INFO] [[20] -> chaofen cost:0.21334147453308105s]
+[2025-03-18 12:52:32,993] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:20, cost:0.4411466121673584s]
+[2025-03-18 12:52:33,015] [process.py[line:108]] [INFO] [>>> audio_transfer get message:24]
+[2025-03-18 12:52:33,028] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:64]
+[2025-03-18 12:52:33,229] [process.py[line:108]] [INFO] [[24] -> chaofen cost:0.21344351768493652s]
+[2025-03-18 12:52:33,457] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:24, cost:0.44205546379089355s]
+[2025-03-18 12:52:33,479] [process.py[line:108]] [INFO] [>>> audio_transfer get message:28]
+[2025-03-18 12:52:33,493] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:68]
+[2025-03-18 12:52:33,697] [process.py[line:108]] [INFO] [[28] -> chaofen cost:0.21679949760437012s]
+[2025-03-18 12:52:33,924] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:28, cost:0.4448537826538086s]
+[2025-03-18 12:52:33,946] [process.py[line:108]] [INFO] [>>> audio_transfer get message:32]
+[2025-03-18 12:52:33,960] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:72]
+[2025-03-18 12:52:34,159] [process.py[line:108]] [INFO] [[32] -> chaofen cost:0.21156740188598633s]
+[2025-03-18 12:52:34,381] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:32, cost:0.43474769592285156s]
+[2025-03-18 12:52:34,403] [process.py[line:108]] [INFO] [>>> audio_transfer get message:36]
+[2025-03-18 12:52:34,417] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:76]
+[2025-03-18 12:52:34,618] [process.py[line:108]] [INFO] [[36] -> chaofen cost:0.21408891677856445s]
+[2025-03-18 12:52:34,844] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:36, cost:0.4406392574310303s]
+[2025-03-18 12:52:34,867] [process.py[line:108]] [INFO] [>>> audio_transfer get message:40]
+[2025-03-18 12:52:34,881] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:80]
+[2025-03-18 12:52:35,099] [process.py[line:108]] [INFO] [[40] -> chaofen cost:0.23105645179748535s]
+[2025-03-18 12:52:35,328] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:40, cost:0.46161866188049316s]
+[2025-03-18 12:52:35,350] [process.py[line:108]] [INFO] [>>> audio_transfer get message:44]
+[2025-03-18 12:52:35,363] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:84]
+[2025-03-18 12:52:35,577] [process.py[line:108]] [INFO] [[44] -> chaofen cost:0.22576594352722168s]
+[2025-03-18 12:52:35,808] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:44, cost:0.4577639102935791s]
+[2025-03-18 12:52:35,832] [process.py[line:108]] [INFO] [>>> audio_transfer get message:48]
+[2025-03-18 12:52:35,846] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:88]
+[2025-03-18 12:52:36,047] [process.py[line:108]] [INFO] [[48] -> chaofen cost:0.21441864967346191s]
+[2025-03-18 12:52:36,278] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:48, cost:0.4459846019744873s]
+[2025-03-18 12:52:36,301] [process.py[line:108]] [INFO] [>>> audio_transfer get message:52]
+[2025-03-18 12:52:36,315] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:92]
+[2025-03-18 12:52:36,521] [process.py[line:108]] [INFO] [[52] -> chaofen cost:0.2181704044342041s]
+[2025-03-18 12:52:36,777] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:52, cost:0.47586750984191895s]
+[2025-03-18 12:52:36,798] [process.py[line:108]] [INFO] [>>> audio_transfer get message:56]
+[2025-03-18 12:52:36,817] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:96]
+[2025-03-18 12:52:37,014] [process.py[line:108]] [INFO] [[56] -> chaofen cost:0.2147221565246582s]
+[2025-03-18 12:52:37,247] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:56, cost:0.4486660957336426s]
+[2025-03-18 12:52:37,266] [process.py[line:108]] [INFO] [>>> audio_transfer get message:60]
+[2025-03-18 12:52:37,281] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:100]
+[2025-03-18 12:52:37,483] [process.py[line:108]] [INFO] [[60] -> chaofen cost:0.21598410606384277s]
+[2025-03-18 12:52:37,703] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:60, cost:0.43683695793151855s]
+[2025-03-18 12:52:37,722] [process.py[line:108]] [INFO] [>>> audio_transfer get message:64]
+[2025-03-18 12:52:37,736] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:104]
+[2025-03-18 12:52:37,941] [process.py[line:108]] [INFO] [[64] -> chaofen cost:0.2180624008178711s]
+[2025-03-18 12:52:38,163] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:64, cost:0.4412345886230469s]
+[2025-03-18 12:52:38,183] [process.py[line:108]] [INFO] [>>> audio_transfer get message:68]
+[2025-03-18 12:52:38,197] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:108]
+[2025-03-18 12:52:38,397] [process.py[line:108]] [INFO] [[68] -> chaofen cost:0.21321654319763184s]
+[2025-03-18 12:52:38,637] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:68, cost:0.45404863357543945s]
+[2025-03-18 12:52:38,656] [process.py[line:108]] [INFO] [>>> audio_transfer get message:72]
+[2025-03-18 12:52:38,670] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:112]
+[2025-03-18 12:52:38,877] [process.py[line:108]] [INFO] [[72] -> chaofen cost:0.21999263763427734s]
+[2025-03-18 12:52:39,100] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:72, cost:0.4440436363220215s]
+[2025-03-18 12:52:39,119] [process.py[line:108]] [INFO] [>>> audio_transfer get message:76]
+[2025-03-18 12:52:39,133] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:116]
+[2025-03-18 12:52:39,347] [process.py[line:108]] [INFO] [[76] -> chaofen cost:0.22693967819213867s]
+[2025-03-18 12:52:39,568] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:76, cost:0.4492220878601074s]
+[2025-03-18 12:52:39,586] [process.py[line:108]] [INFO] [>>> audio_transfer get message:80]
+[2025-03-18 12:52:39,601] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:120]
+[2025-03-18 12:52:39,801] [process.py[line:108]] [INFO] [[80] -> chaofen cost:0.21407222747802734s]
+[2025-03-18 12:52:40,024] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:80, cost:0.4377562999725342s]
+[2025-03-18 12:52:40,052] [process.py[line:108]] [INFO] [>>> audio_transfer get message:84]
+[2025-03-18 12:52:40,068] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:124]
+[2025-03-18 12:52:40,270] [process.py[line:108]] [INFO] [[84] -> chaofen cost:0.21637320518493652s]
+[2025-03-18 12:52:40,494] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:84, cost:0.44118523597717285s]
+[2025-03-18 12:52:40,513] [process.py[line:108]] [INFO] [>>> audio_transfer get message:88]
+[2025-03-18 12:52:40,527] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:128]
+[2025-03-18 12:52:40,731] [process.py[line:108]] [INFO] [[88] -> chaofen cost:0.2170412540435791s]
+[2025-03-18 12:52:40,951] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:88, cost:0.4383111000061035s]
+[2025-03-18 12:52:40,971] [process.py[line:108]] [INFO] [>>> audio_transfer get message:92]
+[2025-03-18 12:52:40,984] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:132]
+[2025-03-18 12:52:41,187] [process.py[line:108]] [INFO] [[92] -> chaofen cost:0.2148122787475586s]
+[2025-03-18 12:52:41,416] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:92, cost:0.4454326629638672s]
+[2025-03-18 12:52:41,439] [process.py[line:108]] [INFO] [>>> audio_transfer get message:96]
+[2025-03-18 12:52:41,451] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:136]
+[2025-03-18 12:52:41,663] [process.py[line:108]] [INFO] [[96] -> chaofen cost:0.222761869430542s]
+[2025-03-18 12:52:41,887] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:96, cost:0.4477369785308838s]
+[2025-03-18 12:52:41,906] [process.py[line:108]] [INFO] [>>> audio_transfer get message:100]
+[2025-03-18 12:52:41,920] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:140]
+[2025-03-18 12:52:42,123] [process.py[line:108]] [INFO] [[100] -> chaofen cost:0.21576929092407227s]
+[2025-03-18 12:52:42,359] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:100, cost:0.4525878429412842s]
+[2025-03-18 12:52:42,379] [process.py[line:108]] [INFO] [>>> audio_transfer get message:104]
+[2025-03-18 12:52:42,394] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:144]
+[2025-03-18 12:52:42,596] [process.py[line:108]] [INFO] [[104] -> chaofen cost:0.21553897857666016s]
+[2025-03-18 12:52:42,836] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:104, cost:0.45633435249328613s]
+[2025-03-18 12:52:42,855] [process.py[line:108]] [INFO] [>>> audio_transfer get message:108]
+[2025-03-18 12:52:42,870] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:148]
+[2025-03-18 12:52:42,873] [process.py[line:108]] [INFO] [append imgs over]
+[2025-03-18 12:52:42,879] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据结束]
+[2025-03-18 12:52:43,073] [process.py[line:108]] [INFO] [[108] -> chaofen cost:0.21662592887878418s]
+[2025-03-18 12:52:43,297] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:108, cost:0.4421381950378418s]
+[2025-03-18 12:52:43,318] [process.py[line:108]] [INFO] [>>> audio_transfer get message:112]
+[2025-03-18 12:52:43,332] [process.py[line:108]] [INFO] [[1002]任务预处理进程结束]
+[2025-03-18 12:52:43,531] [process.py[line:108]] [INFO] [[112] -> chaofen cost:0.21228814125061035s]
+[2025-03-18 12:52:43,791] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:112, cost:0.47336626052856445s]
+[2025-03-18 12:52:43,811] [process.py[line:108]] [INFO] [>>> audio_transfer get message:116]
+[2025-03-18 12:52:44,034] [process.py[line:108]] [INFO] [[116] -> chaofen cost:0.2223985195159912s]
+[2025-03-18 12:52:44,262] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:116, cost:0.4509873390197754s]
+[2025-03-18 12:52:44,281] [process.py[line:108]] [INFO] [>>> audio_transfer get message:120]
+[2025-03-18 12:52:44,499] [process.py[line:108]] [INFO] [[120] -> chaofen cost:0.21637916564941406s]
+[2025-03-18 12:52:44,742] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:120, cost:0.46120476722717285s]
+[2025-03-18 12:52:44,762] [process.py[line:108]] [INFO] [>>> audio_transfer get message:124]
+[2025-03-18 12:52:44,981] [process.py[line:108]] [INFO] [[124] -> chaofen cost:0.21886157989501953s]
+[2025-03-18 12:52:45,240] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:124, cost:0.4781684875488281s]
+[2025-03-18 12:52:45,258] [process.py[line:108]] [INFO] [>>> audio_transfer get message:128]
+[2025-03-18 12:52:45,474] [process.py[line:108]] [INFO] [[128] -> chaofen cost:0.21480226516723633s]
+[2025-03-18 12:52:45,708] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:128, cost:0.44920992851257324s]
+[2025-03-18 12:52:45,726] [process.py[line:108]] [INFO] [>>> audio_transfer get message:132]
+[2025-03-18 12:52:45,943] [process.py[line:108]] [INFO] [[132] -> chaofen cost:0.21567535400390625s]
+[2025-03-18 12:52:46,181] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:132, cost:0.45519399642944336s]
+[2025-03-18 12:52:46,200] [process.py[line:108]] [INFO] [>>> audio_transfer get message:136]
+[2025-03-18 12:52:46,418] [process.py[line:108]] [INFO] [[136] -> chaofen cost:0.21763992309570312s]
+[2025-03-18 12:52:46,662] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:136, cost:0.4619452953338623s]
+[2025-03-18 12:52:46,681] [process.py[line:108]] [INFO] [>>> audio_transfer get message:140]
+[2025-03-18 12:52:46,900] [process.py[line:108]] [INFO] [[140] -> chaofen cost:0.21794748306274414s]
+[2025-03-18 12:52:47,146] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:140, cost:0.4646177291870117s]
+[2025-03-18 12:52:47,166] [process.py[line:108]] [INFO] [>>> audio_transfer get message:144]
+[2025-03-18 12:52:47,382] [process.py[line:108]] [INFO] [[144] -> chaofen cost:0.21491503715515137s]
+[2025-03-18 12:52:47,619] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:144, cost:0.4536001682281494s]
+[2025-03-18 12:52:47,639] [process.py[line:108]] [INFO] [>>> audio_transfer get message:148]
+[2025-03-18 12:52:47,857] [process.py[line:108]] [INFO] [[148] -> chaofen cost:0.21780657768249512s]
+[2025-03-18 12:52:48,098] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:148, cost:0.459348201751709s]
+[2025-03-18 12:52:48,104] [process.py[line:108]] [INFO] [>>> audio_transfer get exception msg:-1]
+[2025-03-18 12:52:48,105] [process.py[line:108]] [INFO] [[1002]任务数字人图片处理已完成]
+[2025-03-18 12:52:48,146] [run.py[line:43]] [INFO] [Custom VideoWriter [1002]视频帧队列处理已结束]
+[2025-03-18 12:52:48,151] [run.py[line:46]] [INFO] [Custom VideoWriter Silence Video saved in /mnt/nfs/bj4-v100-23/data1/yubosun/git_proj/heygem/heygem_ori_so/1002-t.mp4]
+[2025-03-18 12:52:48,155] [run.py[line:118]] [INFO] [Custom command:ffmpeg -loglevel warning -y -i ./example/audio.wav -i ./1002-t.mp4 -c:a aac -c:v libx264 -crf 15 -strict -2 ./1002-r.mp4]
+[2025-03-18 12:53:06,908] [run.py[line:147]] [INFO] [>>> 任务:1002 耗时:59.3451771736145 ]
diff --git a/model_lib/__init__.py b/model_lib/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..80148b1d9239cce56a1735f751086b34ef7e5550
--- /dev/null
+++ b/model_lib/__init__.py
@@ -0,0 +1,4 @@
+from .base_wrapper import ONNXModel
+from .model_base import ModelBase
+
+
diff --git a/model_lib/base_wrapper/__init__.py b/model_lib/base_wrapper/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..02a0264c7d88762e2b94c51df87f7c0d497b66ef
--- /dev/null
+++ b/model_lib/base_wrapper/__init__.py
@@ -0,0 +1,6 @@
+# -- coding: utf-8 --
+# @Time : 2022/8/26
+
+
+from .onnx_model import ONNXModel
+
diff --git a/model_lib/base_wrapper/onnx_model.cpython-38-x86_64-linux-gnu.so b/model_lib/base_wrapper/onnx_model.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..64c758f3607a44c47e876ed3d1565ec981944a30
--- /dev/null
+++ b/model_lib/base_wrapper/onnx_model.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:70f6dfb7c88ef61746c89bd35a818447cd58df393d321e5ca0147a2f17cd68ba
+size 1143792
diff --git a/model_lib/model_base.py b/model_lib/model_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..3056664d290d3fa0c292fc717e1f125b08d90766
--- /dev/null
+++ b/model_lib/model_base.py
@@ -0,0 +1,51 @@
+# -- coding: utf-8 --
+# @Time : 2022/7/29
+
+
+
+from .base_wrapper import ONNXModel
+from pathlib import Path
+
+
+try:
+ from .base_wrapper import TRTWrapper, TRTWrapperSelf
+except:
+ pass
+
+
+# from cv2box.utils import try_import
+
+class ModelBase:
+ def __init__(self, model_info, provider):
+ self.model_path = model_info['model_path']
+
+ if 'input_dynamic_shape' in model_info.keys():
+ self.input_dynamic_shape = model_info['input_dynamic_shape']
+ else:
+ self.input_dynamic_shape = None
+
+ if 'picklable' in model_info.keys():
+ picklable = model_info['picklable']
+ else:
+ picklable = False
+
+ if 'trt_wrapper_self' in model_info.keys():
+ TRTWrapper = TRTWrapperSelf
+
+ # init model
+ if Path(self.model_path).suffix == '.engine':
+ self.model_type = 'trt'
+ self.model = TRTWrapper(self.model_path)
+ elif Path(self.model_path).suffix == '.tjm':
+ self.model_type = 'tjm'
+ self.model = TJMWrapper(self.model_path, provider=provider)
+ elif Path(self.model_path).suffix in ['.onnx', '.bin']:
+ self.model_type = 'onnx'
+ if not picklable:
+ if 'encrypt' in model_info.keys():
+ self.model_path = load_encrypt_model(self.model_path, key=model_info['encrypt'])
+ self.model = ONNXModel(self.model_path, provider=provider, input_dynamic_shape=self.input_dynamic_shape)
+ else:
+ self.model = OnnxModelPickable(self.model_path, provider=provider, )
+ else:
+ raise 'check model suffix , support engine/tjm/onnx now.'
diff --git a/pack/packaging_script.py b/pack/packaging_script.py
new file mode 100644
index 0000000000000000000000000000000000000000..45dce18a9618d5ce413b98712a4b7d1e626c09e7
--- /dev/null
+++ b/pack/packaging_script.py
@@ -0,0 +1,102 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+from distutils.core import setup
+from Cython.Build import cythonize
+from distutils.extension import Extension
+import shutil
+import os
+from multiprocessing import Pool
+
+
+
+
+def packaged_search(path, directory_file=None):
+ '''
+ 遍历当前目录下文件及文件夹
+ :param path:
+ :param directory_file:
+ :return:
+ '''
+ if directory_file:
+ for i in os.listdir(path):
+ if i == directory_file:
+ path = os.path.join(path, directory_file)
+ pack_so(path)
+ elif os.path.isdir(os.path.join(path, i)):
+ packaged_search(os.path.join(path, i), directory_file)
+ else:
+ pack_so(path)
+
+
+def pack_so(path):
+ '''
+ 递归遍历所有文件夹,并创建进程池,将任务放入进程
+ :param path:
+ :return:
+ '''
+ all_file_path = []
+ for i in os.listdir(path):
+ all_file_path.append(os.path.join(path, i))
+ # 创建进程池
+ p = Pool(8)
+ for j in all_file_path:
+ p.apply_async(pack_to_so_and_del_src, args=(j, ))
+ p.close()
+ p.join()
+ for g in all_file_path:
+ # 是文件夹递归
+ if os.path.isdir(os.path.join(g)):
+ pack_so(g)
+
+
+def pack_to_so_and_del_src(path):
+ '''
+ 将需要打包的.py脚本进行打包
+ :param path:
+ :return:
+ '''
+ if '.py' in path and '.pyc' not in path and '__init__.py' not in path:
+ setup(
+ ext_modules=cythonize(Extension(path.rsplit('/', 1)[1].rsplit('.', 1)[0], [path])),
+ compiler_directives={'language_level': 3}
+ )
+ # path_os = os.getcwd().rsplit('/', 1)[0] + '/pack/build/lib.linux-x86_64-3.6' # TODO
+ path_os = os.getcwd().rsplit('/', 1)[0] + '/pack/build/lib.linux-x86_64-3.8'
+ for j in os.listdir(path_os):
+ # 将打好的包放入原文件夹下
+ shutil.move(os.path.join(path_os, j), os.path.join(path.rsplit('/', 1)[0], j))
+ # 删除.py文件
+ # if path.rsplit('/', 1)[1] not in ['packaging_script.py', 'manage.py', 'client.py']:
+ if path.rsplit('/', 1)[1] not in ['packaging_script.py', 'app.py', 'app_local.py', 'tts_config.py']:
+ os.remove(path)
+ # shutil.rmtree('./build')
+ # 删除.c文件
+ elif len(path.rsplit('.', 1)) == 2:
+ if path.rsplit('.', 1)[1] == 'c':
+ os.remove(path)
+
+
+def view_log():
+ '''
+ 删除log日志文件
+ :return:
+ '''
+ pass
+
+
+if __name__ == '__main__':
+ path = os.getcwd().rsplit('/', 1)[0]
+ packaged_search(path)
+ # 查看版本号并创建外文件写入
+ # edition = os.popen('git show')
+ # with open('./edition.txt', 'w') as e:
+ # e.write(edition.readline())
+
+"""
+
+usage:
+ python3 packaging_script.py build_ext
+打包说明:
+
+"""
\ No newline at end of file
diff --git a/preprocess_audio_and_3dmm.cpython-38-x86_64-linux-gnu.so b/preprocess_audio_and_3dmm.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..b3ee50a7a7fb83e99cf0f03ebcbdfd3d1c28101c
--- /dev/null
+++ b/preprocess_audio_and_3dmm.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f7be130473effd974a04755c90cbc1149944144c7425321d92fa5bf699edb5b7
+size 2746048
diff --git a/pretrain_models/face_lib/face_parsing/79999_iter.onnx b/pretrain_models/face_lib/face_parsing/79999_iter.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..d34346fbb5f9f2e80122952fcaa2d36159a2438b
--- /dev/null
+++ b/pretrain_models/face_lib/face_parsing/79999_iter.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:506b4c6e514a348980a49fd3f19d53ce3807dac57fedac127e917165fac0e928
+size 52588603
diff --git a/pretrain_models/face_lib/face_restore/gfpgan/GFPGANv1.4.onnx b/pretrain_models/face_lib/face_restore/gfpgan/GFPGANv1.4.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..c385c7a13abdc4403376e2c623b8d65532987b1e
--- /dev/null
+++ b/pretrain_models/face_lib/face_restore/gfpgan/GFPGANv1.4.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f061a99cb6c8025dd5d29bf04e4d0349d613bb8dc1484555ea79403cee2cf448
+size 340255593
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..77e00d188001fa488431387fcb4d77abf7ed2b02
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,65 @@
+cv2box==0.5.9
+apstone==0.0.8
+appdirs==1.4.4
+audioread==2.1.9
+typeguard==2.13.3
+cffi==1.15.0
+charset-normalizer==2.0.12
+click==8.1.3
+colorama==0.4.4
+cycler==0.11.0
+decorator==5.1.1
+filelock==3.7.1
+flatbuffers==2.0
+fonttools==4.36.0
+freetype-py==2.3.0
+huggingface-hub==0.0.8
+idna==3.3
+imageio==2.19.3
+importlib-metadata==4.11.4
+joblib==1.1.0
+kiwisolver==1.4.4
+kornia==0.6.6
+librosa==0.8.1
+matplotlib==3.5.3
+networkx==2.6.3
+numba==0.55.2
+numexpr==2.8.6
+numpy==1.21.6
+onnxruntime-gpu==1.9.0
+opencv-python==4.7.0.72
+packaging==21.3
+pillow==9.1.1
+pooch==1.6.0
+protobuf==4.21.5
+psutil==5.9.1
+pycparser==2.21
+pyglet==1.5.26
+pyopengl==3.1.0
+pyparsing==3.0.9
+pyrender==0.1.45
+python-dateutil==2.8.2
+pywavelets==1.3.0
+pyyaml==6.0
+regex==2022.6.2
+requests==2.27.1
+resampy==0.2.2
+sacremoses==0.0.53
+scikit-image==0.19.3
+scikit-learn==1.0.2
+scipy==1.7.1
+six==1.16.0
+soundfile==0.10.3.post1
+threadpoolctl==3.1.0
+tifffile==2021.11.2
+tokenizers==0.10.3
+torch==1.11.0+cu113
+torchaudio==0.11.0+cu113
+torchvision==0.12.0+cu113
+tqdm==4.64.0
+transformers==4.6.1
+trimesh==3.12.7
+typeguard==2.13.3
+typing-extensions==4.2.0
+urllib3==1.26.9
+zipp==3.8.0
diff --git a/requirements_0.txt b/requirements_0.txt
new file mode 100644
index 0000000000000000000000000000000000000000..5f4d175967c2695f2d1e2e783996d83f518acdf5
--- /dev/null
+++ b/requirements_0.txt
@@ -0,0 +1,104 @@
+aiofiles==23.2.1
+annotated-types==0.7.0
+anyio==4.5.2
+apstone==0.0.8
+audioread==3.0.1
+blinker==1.8.2
+certifi==2025.1.31
+cffi==1.17.1
+charset-normalizer==3.4.1
+click==8.1.8
+coloredlogs==15.0.1
+contourpy==1.1.1
+cv2box==0.5.9
+cycler==0.12.1
+decorator==5.2.1
+einops==0.8.1
+exceptiongroup==1.2.2
+fastapi==0.115.11
+ffmpy==0.5.0
+filelock==3.16.1
+Flask==3.0.3
+flatbuffers==25.2.10
+fonttools==4.56.0
+fsspec==2025.3.0
+gradio==4.44.1
+gradio_client==1.3.0
+h11==0.14.0
+httpcore==1.0.7
+httpx==0.28.1
+huggingface-hub==0.29.3
+humanfriendly==10.0
+idna==3.10
+imageio==2.35.1
+importlib_metadata==8.5.0
+importlib_resources==6.4.5
+itsdangerous==2.2.0
+Jinja2==3.1.6
+joblib==1.4.2
+kiwisolver==1.4.7
+lazy_loader==0.4
+librosa==0.11.0
+llvmlite==0.41.1
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.7.5
+mdurl==0.1.2
+mpmath==1.3.0
+msgpack==1.1.0
+networkx==3.1
+numba==0.58.1
+numexpr==2.8.6
+numpy==1.24.4
+onnxruntime-gpu==1.16.0
+opencv-python==4.11.0.86
+orjson==3.10.15
+packaging==24.2
+pandas==2.0.3
+pillow==10.4.0
+platformdirs==4.3.6
+pooch==1.8.2
+protobuf==5.29.4
+pycparser==2.22
+pydantic==2.10.6
+pydantic_core==2.27.2
+pydub==0.25.1
+Pygments==2.19.1
+pyparsing==3.1.4
+python-dateutil==2.9.0.post0
+python-multipart==0.0.20
+pytz==2025.1
+PyWavelets==1.4.1
+PyYAML==6.0.2
+requests==2.32.3
+rich==13.9.4
+ruff==0.11.1
+scikit-image==0.21.0
+scikit-learn==1.3.2
+scipy==1.10.1
+semantic-version==2.10.0
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+soundfile==0.13.1
+soxr==0.3.7
+spark-parser==1.8.9
+starlette==0.44.0
+sympy==1.13.3
+threadpoolctl==3.5.0
+tifffile==2023.7.10
+tomlkit==0.12.0
+torch==1.11.0+cu113
+torchaudio==0.11.0+cu113
+torchvision==0.12.0+cu113
+tqdm==4.67.1
+typeguard==2.13.3
+typer==0.15.2
+typing_extensions==4.12.2
+tzdata==2025.1
+urllib3==2.2.3
+uvicorn==0.33.0
+websockets==12.0
+Werkzeug==3.0.6
+xdis==6.1.3
+zipp==3.20.2
diff --git a/run.py b/run.py
new file mode 100644
index 0000000000000000000000000000000000000000..046776a676869d38e1c967145883f65b035cb291
--- /dev/null
+++ b/run.py
@@ -0,0 +1,191 @@
+import argparse
+import gc
+import json
+import os
+import subprocess
+import sys
+import threading
+import time
+import traceback
+import uuid
+from enum import Enum
+
+import queue
+import cv2
+from flask import Flask, request
+
+import service.trans_dh_service
+
+from h_utils.custom import CustomError
+from y_utils.config import GlobalConfig
+from y_utils.logger import logger
+
+
+def get_args():
+ parser = argparse.ArgumentParser(
+ formatter_class=(argparse.ArgumentDefaultsHelpFormatter)
+ )
+
+ parser.add_argument(
+ "--audio_path",
+ type=str,
+ default="example/audio.wav",
+ help="path to local audio file",
+ )
+ parser.add_argument(
+ "--video_path",
+ type=str,
+ default="example/video.mp4",
+ help="path to local video file",
+ )
+ opt = parser.parse_args()
+ return opt
+
+
+def write_video(
+ output_imgs_queue,
+ temp_dir,
+ result_dir,
+ work_id,
+ audio_path,
+ result_queue,
+ width,
+ height,
+ fps,
+ watermark_switch=0,
+ digital_auth=0,
+):
+ output_mp4 = os.path.join(temp_dir, "{}-t.mp4".format(work_id))
+ fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+ result_path = os.path.join(result_dir, "{}-r.mp4".format(work_id))
+ video_write = cv2.VideoWriter(output_mp4, fourcc, fps, (width, height))
+ print("Custom VideoWriter init done")
+ try:
+ while True:
+ state, reason, value_ = output_imgs_queue.get()
+ if type(state) == bool and state == True:
+ logger.info(
+ "Custom VideoWriter [{}]视频帧队列处理已结束".format(work_id)
+ )
+ logger.info(
+ "Custom VideoWriter Silence Video saved in {}".format(
+ os.path.realpath(output_mp4)
+ )
+ )
+ video_write.release()
+ break
+ else:
+ if type(state) == bool and state == False:
+ logger.error(
+ "Custom VideoWriter [{}]任务视频帧队列 -> 异常原因:[{}]".format(
+ work_id, reason
+ )
+ )
+ raise CustomError(reason)
+ for result_img in value_:
+ video_write.write(result_img)
+ if video_write is not None:
+ video_write.release()
+ if watermark_switch == 1 and digital_auth == 1:
+ logger.info(
+ "Custom VideoWriter [{}]任务需要水印和数字人标识".format(work_id)
+ )
+ if width > height:
+ command = 'ffmpeg -y -i {} -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:(main_h-overlay_h)-10,overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format(
+ audio_path,
+ output_mp4,
+ GlobalConfig.instance().watermark_path,
+ GlobalConfig.instance().digital_auth_path,
+ result_path,
+ )
+ logger.info("command:{}".format(command))
+ else:
+ command = 'ffmpeg -y -i {} -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:(main_h-overlay_h)-10,overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format(
+ audio_path,
+ output_mp4,
+ GlobalConfig.instance().watermark_path,
+ GlobalConfig.instance().digital_auth_path,
+ result_path,
+ )
+ logger.info("command:{}".format(command))
+ elif watermark_switch == 1 and digital_auth == 0:
+ logger.info("Custom VideoWriter [{}]任务需要水印".format(work_id))
+ command = 'ffmpeg -y -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:(main_h-overlay_h)-10" -c:a aac -crf 15 -strict -2 {}'.format(
+ audio_path,
+ output_mp4,
+ GlobalConfig.instance().watermark_path,
+ result_path,
+ )
+ logger.info("command:{}".format(command))
+ elif watermark_switch == 0 and digital_auth == 1:
+ logger.info("Custom VideoWriter [{}]任务需要数字人标识".format(work_id))
+ if width > height:
+ command = 'ffmpeg -loglevel warning -y -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format(
+ audio_path,
+ output_mp4,
+ GlobalConfig.instance().digital_auth_path,
+ result_path,
+ )
+ logger.info("command:{}".format(command))
+ else:
+ command = 'ffmpeg -loglevel warning -y -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format(
+ audio_path,
+ output_mp4,
+ GlobalConfig.instance().digital_auth_path,
+ result_path,
+ )
+ logger.info("command:{}".format(command))
+ else:
+ command = "ffmpeg -loglevel warning -y -i {} -i {} -c:a aac -c:v libx264 -crf 15 -strict -2 {}".format(
+ audio_path, output_mp4, result_path
+ )
+ logger.info("Custom command:{}".format(command))
+ subprocess.call(command, shell=True)
+ print("###### Custom Video Writer write over")
+ print(f"###### Video result saved in {os.path.realpath(result_path)}")
+ exit(0)
+ result_queue.put([True, result_path])
+ except Exception as e:
+ logger.error(
+ "Custom VideoWriter [{}]视频帧队列处理异常结束,异常原因:[{}]".format(
+ work_id, e.__str__()
+ )
+ )
+ result_queue.put(
+ [
+ False,
+ "[{}]视频帧队列处理异常结束,异常原因:[{}]".format(
+ work_id, e.__str__()
+ ),
+ ]
+ )
+ logger.info("Custom VideoWriter 后处理进程结束")
+
+
+service.trans_dh_service.write_video = write_video
+
+
+def main():
+ opt = get_args()
+ if not os.path.exists(opt.audio_path):
+ audio_url = "example/audio.wav"
+ else:
+ audio_url = opt.audio_path
+
+ if not os.path.exists(opt.video_path):
+ video_url = "example/video.mp4"
+ else:
+ video_url = opt.video_path
+ sys.argv = [sys.argv[0]]
+ task = service.trans_dh_service.TransDhTask()
+ time.sleep(10) # somehow, this works...
+
+ code = "1004"
+ task.work(audio_url, video_url, code, 0, 0, 0, 0)
+
+
+if __name__ == "__main__":
+ main()
+
+# python run.py
+# python run.py --audio_path example/audio.wav --video_path example/video.mp4
diff --git a/service/__init__.py b/service/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..8b21ce203e632922945642d636d583b9a4f05915
--- /dev/null
+++ b/service/__init__.py
@@ -0,0 +1,9 @@
+#!/user/bin/env python
+# coding=utf-8
+"""
+@project : face2face_train
+@author : huyi
+@file : __init__.py.py
+@ide : PyCharm
+@time : 2023-12-06 14:46:40
+"""
diff --git a/service/server.cpython-38-x86_64-linux-gnu.so b/service/server.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..c38b0e84ab2002f340c4b497a8fccdb429bb989b
--- /dev/null
+++ b/service/server.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d46fb64f31e5eb10ee09a487715577fa81a89a96ae131f6c52401fad7f53998d
+size 594112
diff --git a/service/trans_dh_service.cpython-38-x86_64-linux-gnu.so b/service/trans_dh_service.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..cdd62076606f22255257b7a8b8fe544ed038bb7e
--- /dev/null
+++ b/service/trans_dh_service.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b889283ed5cb58960f599388418e0acf4d00eddb6b62e0ebf0bf40c23b6b8e4
+size 16362968
diff --git a/sources.list b/sources.list
new file mode 100644
index 0000000000000000000000000000000000000000..a247bbfa612a824e1abf8f91e169865ae3adb572
--- /dev/null
+++ b/sources.list
@@ -0,0 +1,13 @@
+# 默认注释了源码镜像以提高 apt update 速度,如有需要可自行取消注释
+deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal main restricted universe multiverse
+# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal main restricted universe multiverse
+deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-updates main restricted universe multiverse
+# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-updates main restricted universe multiverse
+deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-backports main restricted universe multiverse
+# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-backports main restricted universe multiverse
+deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-security main restricted universe multiverse
+# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-security main restricted universe multiverse
+
+# 预发布软件源,不建议启用
+# deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-proposed main restricted universe multiverse
+# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-proposed main restricted universe multiverse
diff --git a/wenet/compute_ctc_att_bnf.cpython-38-x86_64-linux-gnu.so b/wenet/compute_ctc_att_bnf.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..d4d148b37f716c653238111b135bf067d19d972d
--- /dev/null
+++ b/wenet/compute_ctc_att_bnf.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7c09cd4d98e4b6e35361a5c2822c6b38cdc776c00f46f80ffce1d22b75e7bdf8
+size 2096032
diff --git a/wenet/examples/aishell/aidata/conf/train_conformer_multi_cn.yaml b/wenet/examples/aishell/aidata/conf/train_conformer_multi_cn.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fd2bd417acfd107cf335619bb81a918df913af8e
--- /dev/null
+++ b/wenet/examples/aishell/aidata/conf/train_conformer_multi_cn.yaml
@@ -0,0 +1,85 @@
+# network architecture
+# encoder related
+encoder: conformer
+encoder_conf:
+ output_size: 256 # dimension of attention
+ attention_heads: 4
+ linear_units: 2048 # the number of units of position-wise feed forward
+ num_blocks: 12 # the number of encoder blocks
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ attention_dropout_rate: 0.0
+ input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
+ normalize_before: true
+ cnn_module_kernel: 15
+ use_cnn_module: True
+ activation_type: 'swish'
+ pos_enc_layer_type: 'rel_pos'
+ selfattention_layer_type: 'rel_selfattn'
+
+# decoder related
+decoder: transformer
+decoder_conf:
+ attention_heads: 4
+ linear_units: 2048
+ num_blocks: 6
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ self_attention_dropout_rate: 0.0
+ src_attention_dropout_rate: 0.0
+
+# hybrid CTC/attention
+model_conf:
+ ctc_weight: 0.3
+ lsm_weight: 0.1 # label smoothing option
+ length_normalized_loss: false
+
+# use raw_wav or kaldi feature
+raw_wav: false
+
+# feature extraction
+collate_conf:
+ # waveform level config
+ wav_distortion_conf:
+ wav_dither: 0.1
+ wav_distortion_rate: 0.0
+ distortion_methods: []
+ speed_perturb: true
+ feature_extraction_conf:
+ feature_type: 'fbank'
+ mel_bins: 80
+ frame_shift: 10
+ frame_length: 25
+ using_pitch: false
+ # spec level config
+ # spec_swap: false
+ feature_dither: 0.0 # add dither [-feature_dither,feature_dither] on fbank feature
+ spec_aug: true
+ spec_aug_conf:
+ warp_for_time: False
+ num_t_mask: 2
+ num_f_mask: 2
+ max_t: 50
+ max_f: 10
+ max_w: 80
+
+
+# dataset related
+dataset_conf:
+ max_length: 1300 #40960
+ min_length: 0
+ batch_type: 'static' # static or dynamic
+ batch_size: 40
+ sort: true
+
+grad_clip: 5
+accum_grad: 4
+max_epoch: 240
+log_interval: 100
+
+optim: adam
+optim_conf:
+ lr: 0.0025 #0.0025
+scheduler: warmuplr # pytorch v1.1.0+ required
+scheduler_conf:
+ warmup_steps: 100000
diff --git a/wenet/examples/aishell/aidata/conf/train_conformer_multi_cn_linear.yaml b/wenet/examples/aishell/aidata/conf/train_conformer_multi_cn_linear.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..eeee99201ae15bab94f6cfb1effcb7319c293657
--- /dev/null
+++ b/wenet/examples/aishell/aidata/conf/train_conformer_multi_cn_linear.yaml
@@ -0,0 +1,85 @@
+# network architecture
+# encoder related
+encoder: conformer
+encoder_conf:
+ output_size: 256 # dimension of attention
+ attention_heads: 4
+ linear_units: 1024 # the number of units of position-wise feed forward
+ num_blocks: 6 # the number of encoder blocks
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ attention_dropout_rate: 0.0
+ input_layer: linear # encoder input type, you can chose linear,conv2d, conv2d6 and conv2d8
+ normalize_before: true
+ cnn_module_kernel: 15
+ use_cnn_module: True
+ activation_type: 'swish'
+ pos_enc_layer_type: 'rel_pos'
+ selfattention_layer_type: 'rel_selfattn'
+
+# decoder related
+decoder: transformer
+decoder_conf:
+ attention_heads: 4
+ linear_units: 1024
+ num_blocks: 3
+ dropout_rate: 0.1
+ positional_dropout_rate: 0.1
+ self_attention_dropout_rate: 0.0
+ src_attention_dropout_rate: 0.0
+
+# hybrid CTC/attention
+model_conf:
+ ctc_weight: 0.3
+ lsm_weight: 0.1 # label smoothing option
+ length_normalized_loss: false
+
+# use raw_wav or kaldi feature
+raw_wav: false
+
+# feature extraction
+collate_conf:
+ # waveform level config
+ wav_distortion_conf:
+ wav_dither: 0.1
+ wav_distortion_rate: 0.0
+ distortion_methods: []
+ speed_perturb: true
+ feature_extraction_conf:
+ feature_type: 'fbank'
+ mel_bins: 80
+ frame_shift: 10
+ frame_length: 25
+ using_pitch: false
+ # spec level config
+ # spec_swap: false
+ feature_dither: 0.0 # add dither [-feature_dither,feature_dither] on fbank feature
+ spec_aug: true
+ spec_aug_conf:
+ warp_for_time: False
+ num_t_mask: 2
+ num_f_mask: 2
+ max_t: 50
+ max_f: 10
+ max_w: 80
+
+
+# dataset related
+dataset_conf:
+ max_length: 1300 #40960
+ min_length: 0
+ batch_type: 'static' # static or dynamic
+ batch_size: 40
+ sort: true
+
+grad_clip: 5
+accum_grad: 4
+max_epoch: 240
+log_interval: 100
+
+optim: adam
+optim_conf:
+ lr: 0.002
+scheduler: warmuplr # pytorch v1.1.0+ required
+scheduler_conf:
+ warmup_steps: 50000
diff --git a/wenet/examples/aishell/aidata/exp/conformer/wenetmodel.pt b/wenet/examples/aishell/aidata/exp/conformer/wenetmodel.pt
new file mode 100644
index 0000000000000000000000000000000000000000..60e81c19f221bfdb59560129788ab5e05048ac1f
--- /dev/null
+++ b/wenet/examples/aishell/aidata/exp/conformer/wenetmodel.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0f59bf2ebb02c7fa560b44370a656392e8cc8d8214a0d0a99e8fd8edde39c792
+size 191093698
diff --git a/wenet/tools/_extract_feats.py b/wenet/tools/_extract_feats.py
new file mode 100644
index 0000000000000000000000000000000000000000..c0dc9ad2f8af0ad7521faa55e6a89bcf86914658
--- /dev/null
+++ b/wenet/tools/_extract_feats.py
@@ -0,0 +1,452 @@
+import librosa
+# import tensorflow as tf
+import numpy as np
+from scipy.io import wavfile
+from scipy import signal
+
+import torchaudio.compliance.kaldi as kaldi
+import torchaudio
+# torchaudio.set_audio_backend("sox_io")
+
+
+def _extract_feature(wav_path):
+ """ Extract acoustic fbank feature from origin waveform.
+
+ Speed perturbation and wave amplitude distortion is optional.
+
+ Args:
+ batch: a list of tuple (wav id , wave path).
+ speed_perturb: bool, whether or not to use speed pertubation.
+ wav_distortion_conf: a dict , the config of wave amplitude distortion.
+ feature_extraction_conf:a dict , the config of fbank extraction.
+
+ Returns:
+ (keys, feats, labels)
+ """
+ waveform, sample_rate = torchaudio.load_wav(wav_path)
+
+ mat = kaldi.fbank(
+ waveform,
+ num_mel_bins=80,
+ frame_length=25,
+ frame_shift=10,
+ dither=0.1,
+ energy_floor=0.0,
+ sample_frequency=sample_rate)
+ mat = mat.detach().numpy()
+
+ return mat
+
+def _extract_feature_norm(wav_path):
+ """ Extract acoustic fbank feature from origin waveform.
+
+ Speed perturbation and wave amplitude distortion is optional.
+
+ Args:
+ batch: a list of tuple (wav id , wave path).
+ speed_perturb: bool, whether or not to use speed pertubation.
+ wav_distortion_conf: a dict , the config of wave amplitude distortion.
+ feature_extraction_conf:a dict , the config of fbank extraction.
+
+ Returns:
+ (keys, feats, labels)
+ """
+
+ waveform, sample_rate = torchaudio.load_wav(wav_path)
+
+ mat = kaldi.fbank(
+ waveform,
+ num_mel_bins=80,
+ frame_length=25,
+ frame_shift=10,
+ dither=0.1,
+ energy_floor=0.0,
+ sample_frequency=sample_rate)
+ mat = mat.detach().numpy()
+
+ return mat
+
+
+hparams = {
+ 'sample_rate': 16000,#一秒16000个采样点
+ 'preemphasis': 0.97,
+ 'n_fft': 1024,
+ 'hop_length': 200,#80个采样点为帧移动步长 5ms
+ 'win_length': 800,#400个采样点为帧宽度,25ms
+ 'num_mels': 80,
+ 'n_mfcc': 13,
+ 'window': 'hann',
+ 'fmin': 0.,
+ 'fmax': 8000.,
+ 'ref_db': 20, #
+ 'min_db': -80.0, # restrict the dynamic range of log power
+ 'iterations': 100, # griffin_lim #iterations
+ 'silence_db': -28.0,
+ 'center': True,#是否将MFCC作为当前帧中间向量的结果。(数个向量作为一帧生成一个mfcc)
+}
+
+_mel_basis = None
+
+
+def load_wav(wav_f, sr=None):
+ # wav_arr, _ = librosa.load(wav_f, sr=sr)
+ # return wav_arr
+ if type(wav_f)==str:
+ wav_arr, _ = librosa.load(wav_f, sr=sr)
+ else:
+ wav_arr = wav_f
+ return wav_arr
+
+def write_wav(write_path, wav_arr, sr):
+ wav_arr *= 32767 / max(0.01, np.max(np.abs(wav_arr)))
+ wavfile.write(write_path, sr, wav_arr.astype(np.int16))
+ return
+
+def preempahsis(wav_arr, pre_param=hparams['preemphasis']):
+ return signal.lfilter([1, -pre_param], [1], wav_arr)
+
+def deemphasis(wav_arr, pre_param=hparams['preemphasis']):
+ return signal.lfilter([1], [1, -pre_param], wav_arr)
+
+def split_wav(wav_arr, top_db=-hparams['silence_db']):
+ intervals = librosa.effects.split(wav_arr, top_db=top_db)
+ return intervals
+
+def mulaw_encode(wav_arr, quantization_channels):
+ mu = float(quantization_channels - 1)
+ safe_wav_abs = np.minimum(np.abs(wav_arr), 1.0)
+ encoded = np.sign(wav_arr) * np.log1p(mu * safe_wav_abs) / np.log1p(mu)
+ return encoded
+
+def mulaw_encode_quantize(wav_arr, quantization_channels):
+ mu = float(quantization_channels - 1)
+ safe_wav_abs = np.minimum(np.abs(wav_arr), 1.0)
+ encoded = np.sign(wav_arr) * np.log1p(mu * safe_wav_abs) / np.log1p(mu)
+ return ((encoded + 1.) / 2 * mu + 0.5).astype(np.int32)
+
+def mulaw_decode(encoded, quantization_channels):
+ mu = float(quantization_channels - 1)
+ magnitude = (1 / mu) * ((1 + mu) ** abs(encoded) - 1.)
+ return np.sign(encoded) * magnitude
+
+def mulaw_decode_quantize(encoded, quantization_channels):
+ mu = float(quantization_channels - 1)
+ signal = 2 * (encoded.astype(np.float32) / mu) - 1.
+ magnitude = (1 / mu) * ((1 + mu) ** abs(signal) - 1.)
+ return np.sign(signal) * magnitude
+
+def mulaw_encode_quantize_tf(wav_batch, quantization_channels):
+ with tf.variable_scope('mulaw_encode'):
+ mu = tf.cast(quantization_channels - 1, tf.float32)
+ safe_wav_abs = tf.minimum(tf.abs(wav_batch), 1.0)
+ encoded = tf.sign(wav_batch) * tf.log1p(mu * safe_wav_abs) / tf.log1p(mu)
+ return tf.cast((encoded + 1.) / 2 * mu + 0.5, tf.int32)
+
+# def mulaw_encode_tf(wav_batch, quantization_channels):
+# with tf.variable_scope('mulaw_encode'):
+# mu = tf.cast(quantization_channels - 1, tf.float32)
+# safe_wav_abs = tf.minimum(tf.abs(wav_batch), 1.0)
+# encoded = tf.sign(wav_batch) * tf.log1p(mu * safe_wav_abs) / tf.log1p(mu)
+# return encoded
+
+# def mulaw_decode_quantize_tf(encoded, quantization_channels):
+ with tf.variable_scope('mulaw_decode'):
+ mu = tf.cast(quantization_channels - 1, tf.float32)
+ signal = 2 * (tf.cast(encoded, tf.float32) / mu) - 1.
+ magnitude = (1 / mu) * ((1 + mu) ** abs(signal) - 1.)
+ return tf.sign(signal) * magnitude
+
+# def mulaw_decode_tf(encoded, quantization_channels):
+ with tf.variable_scope('mulaw_decode'):
+ mu = tf.cast(quantization_channels - 1, tf.float32)
+ magnitude = (1 / mu) * ((1 + mu) ** abs(encoded) - 1.)
+ return tf.sign(encoded) * magnitude
+
+def stft(wav_arr, n_fft=hparams['n_fft'],#短时傅里叶变化
+ hop_len=hparams['hop_length'],
+ win_len=hparams['win_length'],
+ window=hparams['window'],
+ center=hparams['center']):
+ # return shape: [n_freqs, time]
+ return librosa.core.stft(wav_arr, n_fft=n_fft, hop_length=hop_len,
+ win_length=win_len, window=window, center=center)
+
+# def stft_tf(wav_arr, n_fft=hparams['n_fft'],
+# hop_len=hparams['hop_length'],
+# win_len=hparams['win_length'],
+# window=hparams['window']):
+# window_f = {'hann': tf.contrib.signal.hann_window,
+# 'hamming': tf.contrib.signal.hamming_window}[window]
+# # returned value is of shape [..., frames, fft_bins] and complex64 value
+# return tf.contrib.signal.stft(signals=wav_arr, frame_length=win_len,
+# frame_step=hop_len, fft_length=n_fft,
+# window_fn=window_f)
+
+def istft(stft_matrix, hop_len=hparams['hop_length'],
+ win_len=hparams['win_length'], window=hparams['window']):
+ # stft_matrix should be complex stft results instead of magnitude spectrogram
+ # or power spectrogram, and of shape [n_freqs, time]
+ return librosa.core.istft(stft_matrix, hop_length=hop_len,
+ win_length=win_len, window=window)
+
+# def istft_tf(stft_matrix, hop_len=hparams['hop_length'], n_fft=hparams['n_fft'],
+# win_len=hparams['win_length'], window=hparams['window']):
+# window_f = {'hann': tf.contrib.signal.hann_window,
+# 'hamming': tf.contrib.signal.hamming_window}[window]
+# # stft_matrix should be of shape [..., frames, fft_bins]
+# return tf.contrib.signal.inverse_stft(stft_matrix, frame_length=win_len,
+# frame_step=hop_len, fft_length=n_fft,
+# window_fn=window_f)
+
+def spectrogram(wav_arr, n_fft=hparams['n_fft'],
+ hop_len=hparams['hop_length'],
+ win_len=hparams['win_length'],
+ window=hparams['window'],
+ center=hparams['center']):
+ # return shape: [time, n_freqs]
+ s = stft(wav_arr, n_fft=n_fft, hop_len=hop_len,
+ win_len=win_len, window=window, center=center).T
+ magnitude = np.abs(s) #幅度谱
+ power = magnitude ** 2 #能量谱 #经过短时傅里叶变换得到magnitude(?)和其平方 为什么不是快速傅里叶变化
+ return {'magnitude': magnitude,
+ 'power': power,
+ 'stft':s.T}
+
+def power_spec2mel(power_spec, sr=hparams['sample_rate'], n_fft=hparams['n_fft'],
+ num_mels=hparams['num_mels'], fmin=hparams['fmin'], fmax=hparams['fmax']):
+ # power_spec should be of shape [time, 1+n_fft/2]
+ power_spec_t = power_spec.T
+ global _mel_basis
+ _mel_basis = (librosa.filters.mel(sr=sr, n_fft=n_fft, n_mels=num_mels, fmin=fmin, fmax=fmax)
+ if _mel_basis is None else _mel_basis) # [n_mels, 1+n_fft/2]
+ mel_spec = np.dot(_mel_basis, power_spec_t) # [n_mels, time]
+ return mel_spec.T # mel谱
+
+def wav2melspec(wav_arr, sr=hparams['sample_rate'], n_fft=hparams['n_fft'],
+ hop_len=hparams['hop_length'], win_len=hparams['win_length'],
+ window=hparams['window'], num_mels=hparams['num_mels'],
+ fmin=hparams['fmin'], fmax=hparams['fmax']):
+ power_spec = spectrogram(wav_arr, n_fft, hop_len, win_len, window)['power']
+ melspec = power_spec2mel(power_spec.T, sr, n_fft, num_mels, fmin, fmax)
+ return melspec # [time, num_mels]
+
+def wav2mfcc(wav_arr, sr=hparams['sample_rate'], n_mfcc=hparams['n_mfcc'],
+ n_fft=hparams['n_fft'], hop_len=hparams['hop_length'],
+ win_len=hparams['win_length'], window=hparams['window'],
+ num_mels=hparams['num_mels'], fmin=0.0,
+ fmax=None, ref_db=hparams['ref_db']):
+ from scipy.fftpack import dct
+ print("wav_arr1:",wav_arr.shape)
+ wav_arr = preempahsis(wav_arr)
+ print("wav_arr2:",wav_arr.shape)
+
+ mag_spec = spectrogram(wav_arr, n_fft=n_fft, hop_len=hop_len,
+ win_len=win_len, window=window)['magnitude']
+ mel_spec = power_spec2mel(mag_spec, sr=sr, n_fft=n_fft, num_mels=num_mels,
+ fmin=fmin, fmax=fmax)
+ # log_melspec = power2db(mel_spec, ref_db=ref_db)
+ log_melspec = librosa.amplitude_to_db(mel_spec)
+ mfcc = dct(x=log_melspec.T, axis=0, type=2, norm='ortho')[:n_mfcc]
+ # mfcc = np.dot(librosa.filters.dct(n_mfcc, log_melspec.shape[1]), log_melspec.T)
+ deltas = librosa.feature.delta(mfcc)
+ delta_deltas = librosa.feature.delta(mfcc, order=2)
+ mfcc_feature = np.concatenate((mfcc, deltas, delta_deltas), axis=0)
+
+ return mfcc_feature.T
+
+def wav2mfcc_v2(wav_arr, sr=hparams['sample_rate'], n_mfcc=hparams['n_mfcc'],#使用这个
+ n_fft=hparams['n_fft'], hop_len=hparams['hop_length'],
+ win_len=hparams['win_length'], window=hparams['window'],
+ num_mels=hparams['num_mels'], fmin=0.0,
+ fmax=None, ref_db=hparams['ref_db'],
+ center=hparams['center']):
+ from scipy.fftpack import dct
+ wav_arr = preempahsis(wav_arr)
+ #经过一次滤波
+ power_spec = spectrogram(wav_arr, n_fft=n_fft, hop_len=hop_len,
+ win_len=win_len, window=window, center=center)['power']
+ mel_spec = power_spec2mel(power_spec, sr=sr, n_fft=n_fft, num_mels=num_mels,
+ fmin=fmin, fmax=fmax) # mel谱
+ log_melspec = power2db(mel_spec, ref_db=ref_db) #对数mel谱
+
+
+ """下面是MFCC"""
+ # mfcc = dct(x=log_melspec.T, axis=0, type=2, norm='ortho')[:n_mfcc]
+ # deltas = librosa.feature.delta(mfcc)
+ # delta_deltas = librosa.feature.delta(mfcc, order=2)
+ # mfcc_feature = np.concatenate((mfcc, deltas, delta_deltas), axis=0)
+ # return mfcc_feature.T
+ x_stft = spectrogram(wav_arr, n_fft=n_fft, hop_len=hop_len,
+ win_len=win_len, window=window, center=center)['stft']
+ # print("log_melspec:", x_stft.shape)
+ return log_melspec,x_stft
+
+
+def wav2linear_v2(wav_arr, sr=hparams['sample_rate'], n_mfcc=hparams['n_mfcc'], # 使用这个
+ n_fft=hparams['n_fft'], hop_len=hparams['hop_length'],
+ win_len=hparams['win_length'], window=hparams['window'],
+ num_mels=hparams['num_mels'], fmin=0.0,
+ fmax=None, ref_db=hparams['ref_db'],
+ center=hparams['center']):
+ from scipy.fftpack import dct
+ wav_arr = preempahsis(wav_arr)
+ # 经过一次滤波
+ power_spec = spectrogram(wav_arr, n_fft=n_fft, hop_len=hop_len,
+ win_len=win_len, window=window, center=center)['power']
+ linear = _amp_to_db(power_spec, ref_db=ref_db) # 对数mel谱
+ normalized_linear = _db_normalize(linear, min_db=hparams['min_db'])
+ x_stft = spectrogram(wav_arr, n_fft=n_fft, hop_len=hop_len,
+ win_len=win_len, window=window, center=center)['stft']
+
+
+ return normalized_linear,x_stft
+
+def _amp_to_db(x,ref_db=20):
+ return 20 * np.log10(np.maximum(1e-5, x)) + ref_db
+
+
+def mel2log_mel(mel_spec, ref_db=hparams['ref_db'], min_db=hparams['min_db']):
+ log_mel = power2db(mel_spec, ref_db)
+ normalized = log_power_normalize(log_mel, min_db)
+ return normalized
+
+def power2db(power_spec, ref_db=hparams['ref_db'], tol=1e-5):
+ # power spectrogram is stft ** 2
+ # returned value: (10. * log10(power_spec) - ref_db)
+ return 10. * np.log10(power_spec + tol) - ref_db
+
+def db2power(power_db, ref_db=hparams['ref_db']):
+ return np.power(10.0, 0.1 * (power_db + ref_db))
+#
+# def db2power_tf(power_db, ref_db=hparams['ref_db']):
+# return tf.pow(10.0, 0.1 * (power_db + ref_db))
+
+def log_power_normalize(log_power, min_db=hparams['min_db']):
+ """
+ :param log_power: in db, computed by power2db(spectrogram(wav_arr)['power'])
+ :param min_db: minimum value of log_power in db
+ :return: log_power normalized to [0., 1.]
+ """
+ assert min_db < 0. or "min_db should be a negative value like -80.0 or -100.0"
+ return np.clip((log_power - min_db) / -min_db, 0., 1.)
+
+def log_power_denormalize(normalized_logpower, min_db=hparams['min_db']):
+ return np.clip(normalized_logpower, 0., 1.) * -min_db + min_db
+
+# def log_power_denormalize_tf(normalized_logpower, min_db=hparams['min_db']):
+# return tf.clip_by_value(normalized_logpower, 0., 1.) * -min_db + min_db
+
+def griffin_lim(magnitude_spec, iterations=hparams['iterations']):
+ """
+ :param magnitude_spec: magnitude spectrogram of shape [time, n_freqs]
+ obtained from spectrogram(wav_arr)['magnitude]
+ :param iterations: number of iterations to estimate phase
+ :return: waveform array
+ """
+ mag = magnitude_spec.T # transpose to [n_freqs, time]
+ angles = np.exp(2j * np.pi * np.random.rand(*mag.shape))
+ complex_mag = np.abs(mag).astype(np.complex)
+ stft_0 = complex_mag * angles
+ y = istft(stft_0)
+ for i in range(iterations):
+ angles = np.exp(1j * np.angle(stft(y)))
+ y = istft(complex_mag * angles)
+ return y
+
+# def grinffin_lim_tf(magnitude_spec, iterations=hparams['iterations']):
+# # magnitude_spec: [frames, fft_bins], of type tf.float32
+# angles = tf.cast(
+# tf.exp(2j * np.pi * tf.cast(
+# tf.random_uniform(
+# tf.shape(magnitude_spec)),
+# dtype=tf.complex64)),
+# dtype=tf.complex64)
+# complex_mag = tf.cast(tf.abs(magnitude_spec), tf.complex64)
+# stft_0 = complex_mag * angles
+# y = istft_tf(stft_0)
+# for i in range(iterations):
+# angles = tf.exp(1j * tf.cast(tf.angle(stft_tf(y)), tf.complex64))
+# y = istft_tf(complex_mag * angles)
+# return y
+
+def griffin_lim_test(wav_f, n_fft=hparams['n_fft'],
+ hop_len=hparams['hop_length'],
+ win_len=hparams['win_length'],
+ window=hparams['window']):
+ wav_arr = load_wav(wav_f)
+ spec_dict = spectrogram(wav_arr, n_fft=n_fft, hop_len=hop_len,
+ win_len=win_len, window=window)
+ mag_spec = spec_dict['magnitude']
+ y = griffin_lim(mag_spec)
+ write_wav('reconstructed1.wav', y, sr=16000)
+
+def stft2wav_test(stft_f, mean_f, std_f):
+ spec = np.load(stft_f)
+ mean = np.load(mean_f)
+ std = np.load(std_f)
+ spec = spec * std + mean
+ spec = log_power_denormalize(spec)
+ power_spec = db2power(spec)
+ mag_spec = power_spec ** 0.5
+ y = griffin_lim(mag_spec)
+ y = deemphasis(y)
+ write_wav('reconstructed2.wav', y, sr=16000)
+ return y
+#
+# def stft2wav_tf_test(stft_f, mean_f, std_f):
+# # get inputs
+# spec = np.load(stft_f)
+# mean = np.load(mean_f)
+# std = np.load(std_f)
+# spec = spec * std + mean
+# # build graph
+# spec_pl = tf.placeholder(tf.float32, [None, None, 513])
+# denormalized = log_power_denormalize_tf(spec_pl)
+# mag_spec = tf.pow(db2power_tf(denormalized), 0.5)
+# wav = grinffin_lim_tf(mag_spec)
+# # set session and run
+# config = tf.ConfigProto()
+# config.gpu_options.allow_growth = True
+# sess = tf.Session(config=config)
+# wav_arr = sess.run(wav, feed_dict={spec_pl: np.expand_dims(spec, axis=0)})
+# sess.close()
+# y = deemphasis(np.squeeze(wav_arr))
+# write_wav('reconstructed_tf.wav', y, sr=16000)
+# return y
+
+# 超参数个数:1
+# return: db normalized to [0., 1.]
+def _db_normalize(db, min_db):
+ return np.clip((db - min_db) / -min_db, 0., 1.)
+
+
+
+
+def mfcc_test():
+ wav_f = './test.wav'
+ wav_arr = load_wav(wav_f)
+
+
+ mfcc = wav2mfcc_v2(wav_arr)
+ mfcc1 = np.load('test.npy')
+ print(mfcc.min(), mfcc1.min())
+ print(mfcc.max(), mfcc1.max())
+ print(mfcc.mean(), mfcc1.mean())
+ print(np.abs(mfcc - mfcc1))
+ print(np.mean(np.abs(mfcc - mfcc1)))
+ import matplotlib.pyplot as plt
+ plt.figure()
+ plt.subplot(211)
+ plt.imshow(mfcc.T, origin='lower')
+ # plt.colorbar()
+ plt.subplot(212)
+ plt.imshow(mfcc1.T, origin='lower')
+ # plt.colorbar()
+ plt.tight_layout()
+ plt.show()
+ return
+
+
+
+if __name__ == '__main__':
+ mfcc_test()
diff --git a/wenet/transformer/__init__.py b/wenet/transformer/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/wenet/transformer/asr_model.cpython-38-x86_64-linux-gnu.so b/wenet/transformer/asr_model.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..a184d5ed71f15923f40068f9df666666a7f3c149
--- /dev/null
+++ b/wenet/transformer/asr_model.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ec56eda7711fe563fe8c2f933ddb72fa1369d45ed0b730db235fd27cf1aebcf9
+size 5865400
diff --git a/wenet/transformer/attention.cpython-38-x86_64-linux-gnu.so b/wenet/transformer/attention.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..a55e32b0714f2ee9788b06f4fa499e5757db7bda
--- /dev/null
+++ b/wenet/transformer/attention.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:acd8fabea0da032a4c4d01ffdff4f19a56ec2597155b4630afcb94fc41443c3e
+size 1671488
diff --git a/wenet/transformer/cmvn.cpython-38-x86_64-linux-gnu.so b/wenet/transformer/cmvn.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..281c1d50deb1700399317cd9618341e8fade9956
--- /dev/null
+++ b/wenet/transformer/cmvn.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8f2e40cca572e36742df40c4631154ae95ec7ba28f219271b313b08ad7f05aa6
+size 368416
diff --git a/wenet/transformer/convolution.cpython-38-x86_64-linux-gnu.so b/wenet/transformer/convolution.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..fc7bba8a92b1db7267d052aa050e70ae94a271f1
--- /dev/null
+++ b/wenet/transformer/convolution.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:70d2836540994e363691eec27b7c1a2c906d641f467d61b6cbd9503d5dd663ad
+size 851632
diff --git a/wenet/transformer/ctc.cpython-38-x86_64-linux-gnu.so b/wenet/transformer/ctc.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..0ee76e98fd5d5e8d74e68bbd9a71fb14cc6b3527
--- /dev/null
+++ b/wenet/transformer/ctc.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4124cee25e02edea8a864be87486eb2c1ca20d01ec7f312a9172d997b636c56d
+size 617048
diff --git a/wenet/transformer/decoder.cpython-38-x86_64-linux-gnu.so b/wenet/transformer/decoder.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..2c33b37ca509e166cf4c6a1c3d85d342636b41a5
--- /dev/null
+++ b/wenet/transformer/decoder.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e3aeae40976ddbe78c9557995afd69b27d812f2c3c1a413ec4c1e0791eaffb3e
+size 1541184
diff --git a/wenet/transformer/decoder_layer.cpython-38-x86_64-linux-gnu.so b/wenet/transformer/decoder_layer.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..11c50e16438f5419d9f9e4fbadc88cd98fd40872
--- /dev/null
+++ b/wenet/transformer/decoder_layer.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe5675e1cb453ecbd5e605b6aa58ce3142e1470b8c949c9af0c0f76efb5f68a4
+size 846672
diff --git a/wenet/transformer/embedding.cpython-38-x86_64-linux-gnu.so b/wenet/transformer/embedding.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..2059f8e084a8d1eff5d6e30fc5fc7216574324de
--- /dev/null
+++ b/wenet/transformer/embedding.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7e861789f9d9509dcf2e2512d5df62404dc144a7a6c0a4b7fca2e1f005aa5872
+size 1261192
diff --git a/wenet/transformer/encoder.cpython-38-x86_64-linux-gnu.so b/wenet/transformer/encoder.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..b8d76ceafe2d23930e911c49b51155047e9eae6e
--- /dev/null
+++ b/wenet/transformer/encoder.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f65f8f9169a4e2b67634517636245ba5d2ba434f88f3fd8075bcb52e1c57559a
+size 2290992
diff --git a/wenet/transformer/encoder_layer.cpython-38-x86_64-linux-gnu.so b/wenet/transformer/encoder_layer.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..5122561b360b07955b44daaad8346f8c9fa3f060
--- /dev/null
+++ b/wenet/transformer/encoder_layer.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ed85cbb0f3511cfb4f536a78b1f5c37f563d54691df1dcd774f1f29b5fe3cc39
+size 1481440
diff --git a/wenet/transformer/label_smoothing_loss.cpython-38-x86_64-linux-gnu.so b/wenet/transformer/label_smoothing_loss.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..963999d7b873d6359c11015c90de6e767d811114
--- /dev/null
+++ b/wenet/transformer/label_smoothing_loss.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:977392bd2e4169ca8ae3c6fb6e9328b92c2f1ae22d9021587259fab1bd64cc04
+size 667904
diff --git a/wenet/transformer/positionwise_feed_forward.cpython-38-x86_64-linux-gnu.so b/wenet/transformer/positionwise_feed_forward.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..79e5ab273762b759b403c1c0829c4b4cb6370237
--- /dev/null
+++ b/wenet/transformer/positionwise_feed_forward.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0416be1596a6b89764998a3b3005179865c654fa98272b2fa0ca95c06d346a69
+size 440744
diff --git a/wenet/transformer/subsampling.cpython-38-x86_64-linux-gnu.so b/wenet/transformer/subsampling.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..84b4158ee39d43cbda2aceec29b2f8243df22669
--- /dev/null
+++ b/wenet/transformer/subsampling.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69cd56f257c7d0af1e34faf23f457791f96325ae8b69fba67176a2a216485a84
+size 2521760
diff --git a/wenet/transformer/swish.cpython-38-x86_64-linux-gnu.so b/wenet/transformer/swish.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..d7cf2ceefc72399a4f3f91f9a8235da0216a90e5
--- /dev/null
+++ b/wenet/transformer/swish.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ccbb4b3e109f824530cf5a0d7c82e98f43ca12ee020e3a67ba992a32e83211bc
+size 287656
diff --git a/wenet/utils/checkpoint.cpython-38-x86_64-linux-gnu.so b/wenet/utils/checkpoint.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..1337f679c3b077f571b180e08f0bd467ea239b8c
--- /dev/null
+++ b/wenet/utils/checkpoint.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8dd41ea6d72902483379b808266bf9e49334ebc15be37a9219a6a6959acdace7
+size 695712
diff --git a/wenet/utils/cmvn.py b/wenet/utils/cmvn.py
new file mode 100644
index 0000000000000000000000000000000000000000..d262143210dde2c73b7dabd67eba87ecdbc2a7b4
--- /dev/null
+++ b/wenet/utils/cmvn.py
@@ -0,0 +1,94 @@
+#!/usr/bin/env python3
+# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang)
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import math
+
+import numpy as np
+
+
+def _load_json_cmvn(json_cmvn_file):
+ """ Load the json format cmvn stats file and calculate cmvn
+
+ Args:
+ json_cmvn_file: cmvn stats file in json format
+
+ Returns:
+ a numpy array of [means, vars]
+ """
+ with open(json_cmvn_file) as f:
+ cmvn_stats = json.load(f)
+
+ means = cmvn_stats['mean_stat']
+ variance = cmvn_stats['var_stat']
+ count = cmvn_stats['frame_num']
+ for i in range(len(means)):
+ means[i] /= count
+ variance[i] = variance[i] / count - means[i] * means[i]
+ if variance[i] < 1.0e-20:
+ variance[i] = 1.0e-20
+ variance[i] = 1.0 / math.sqrt(variance[i])
+ cmvn = np.array([means, variance])
+ return cmvn
+
+
+def _load_kaldi_cmvn(kaldi_cmvn_file):
+ """ Load the kaldi format cmvn stats file and calculate cmvn
+
+ Args:
+ kaldi_cmvn_file: kaldi text style global cmvn file, which
+ is generated by:
+ compute-cmvn-stats --binary=false scp:feats.scp global_cmvn
+
+ Returns:
+ a numpy array of [means, vars]
+ """
+ means = []
+ variance = []
+ with open(kaldi_cmvn_file, 'r') as fid:
+ # kaldi binary file start with '\0B'
+ if fid.read(2) == '\0B':
+ logging.error('kaldi cmvn binary file is not supported, please '
+ 'recompute it by: compute-cmvn-stats --binary=false '
+ ' scp:feats.scp global_cmvn')
+ sys.exit(1)
+ fid.seek(0)
+ arr = fid.read().split()
+ assert (arr[0] == '[')
+ assert (arr[-2] == '0')
+ assert (arr[-1] == ']')
+ feat_dim = int((len(arr) - 2 - 2) / 2)
+ for i in range(1, feat_dim + 1):
+ means.append(float(arr[i]))
+ count = float(arr[feat_dim + 1])
+ for i in range(feat_dim + 2, 2 * feat_dim + 2):
+ variance.append(float(arr[i]))
+
+ for i in range(len(means)):
+ means[i] /= count
+ variance[i] = variance[i] / count - means[i] * means[i]
+ if variance[i] < 1.0e-20:
+ variance[i] = 1.0e-20
+ variance[i] = 1.0 / math.sqrt(variance[i])
+ cmvn = np.array([means, variance])
+ return cmvn
+
+
+def load_cmvn(cmvn_file, is_json):
+ if is_json:
+ cmvn = _load_json_cmvn(cmvn_file)
+ else:
+ cmvn = _load_kaldi_cmvn(cmvn_file)
+ return cmvn[0], cmvn[1]
diff --git a/wenet/utils/common.cpython-38-x86_64-linux-gnu.so b/wenet/utils/common.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..c79034e27e4ea53fa3494e8bdfdb0fb5ae2c6fc6
--- /dev/null
+++ b/wenet/utils/common.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6f65e0e494a4b6606d030618584725497d2128b93f249006e28fcd8869deaf92
+size 1319320
diff --git a/wenet/utils/ctc_util.cpython-38-x86_64-linux-gnu.so b/wenet/utils/ctc_util.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..f44df0873561c4d13eb973374374552ba1cd5c19
--- /dev/null
+++ b/wenet/utils/ctc_util.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:541fcb132b6cf217e53fffd15614c974622f3e28b665a851fb43d928387b3f92
+size 1057752
diff --git a/wenet/utils/executor.cpython-38-x86_64-linux-gnu.so b/wenet/utils/executor.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..fda548ad5231f9c9b89c20cb0a00a54e3c969316
--- /dev/null
+++ b/wenet/utils/executor.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:417593caf90505d54a90eea11c45278d8193e896895e44a209b0a4741d854c92
+size 1957912
diff --git a/wenet/utils/mask.cpython-38-x86_64-linux-gnu.so b/wenet/utils/mask.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..2f59e9d8d7613f6c660b37de988e75e1cf761c7a
--- /dev/null
+++ b/wenet/utils/mask.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c24d803544574ac73daf71519b29926d6dbce47fba5c7d01068dcaf882bc3bf6
+size 983672
diff --git a/wenet/utils/scheduler.cpython-38-x86_64-linux-gnu.so b/wenet/utils/scheduler.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..f08f71daf83e31635f02f2eb1ea892e0fb9b4862
--- /dev/null
+++ b/wenet/utils/scheduler.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0f76fd1d22cd5dd82d2e6e60168aaa947691151b33670e51b819f2d863d4ffb3
+size 459744
diff --git a/xseg/dfl_xseg_api.cpython-38-x86_64-linux-gnu.so b/xseg/dfl_xseg_api.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..45b03ecf300f7377a08186693aee3d6e3d649e5f
--- /dev/null
+++ b/xseg/dfl_xseg_api.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f20b6ca257385d27be2079242895b5c3dba9c54ff5a9ae60c717693f7dc45982
+size 505632
diff --git a/xseg/xseg_211104_4790000.onnx b/xseg/xseg_211104_4790000.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..a2951f8345ad6b7c055d5a79362bbeedac9794cd
--- /dev/null
+++ b/xseg/xseg_211104_4790000.onnx
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8867760aeb83176e392d58c9ca123945cc36732c16763915b1d97f3acb0f1d64
+size 70324585
diff --git a/y_utils/__init__.py b/y_utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..bfd8b1d1c5317d10ab6a65a58e02cad14bae6bd6
--- /dev/null
+++ b/y_utils/__init__.py
@@ -0,0 +1,9 @@
+#!/user/bin/env python
+# coding=utf-8
+"""
+@project : dhp-service
+@author : huyi
+@file : __init__.py.py
+@ide : PyCharm
+@time : 2021-08-18 16:29:13
+"""
\ No newline at end of file
diff --git a/y_utils/config.cpython-38-x86_64-linux-gnu.so b/y_utils/config.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..389cce5cd178dbe4525c213c8d74aeee6566f231
--- /dev/null
+++ b/y_utils/config.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f4ab5543f166c2957ead83d6e3dc08227196ae23e26456ee2d93e20bf223e2ed
+size 938840
diff --git a/y_utils/lcr.cpython-38-x86_64-linux-gnu.so b/y_utils/lcr.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..29883a0e2c429d844b8a592b24b8db02dec61c9e
--- /dev/null
+++ b/y_utils/lcr.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cc0b56827ff7c8a085e6b9d9b6f5ed14154eba06d0ce826869e4f4d55354acc1
+size 443008
diff --git a/y_utils/liblcr.so b/y_utils/liblcr.so
new file mode 100644
index 0000000000000000000000000000000000000000..da6d206415beddcd7158001aeb2a5662471c1120
--- /dev/null
+++ b/y_utils/liblcr.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:083c5efea750de193af50008cb1e7ca9959ff61224524c1d490e80963e72a169
+size 25712808
diff --git a/y_utils/logger.cpython-38-x86_64-linux-gnu.so b/y_utils/logger.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..91d819d017f890ee72220ddc2da73ea2de4f2e8d
--- /dev/null
+++ b/y_utils/logger.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da30ffc850398063969cb4ffecb77c1e6258c98cf1f512dbc9aebf2318c7f093
+size 444096
diff --git a/y_utils/md5.cpython-38-x86_64-linux-gnu.so b/y_utils/md5.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..19d0e9a600de1f31982ada30ef7be1d653870748
--- /dev/null
+++ b/y_utils/md5.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c881874fb1de10b45dee2bb42a6ba62b3ee502575d78312535c6deb7931d527a
+size 567408
diff --git a/y_utils/time_utils.cpython-38-x86_64-linux-gnu.so b/y_utils/time_utils.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..107b9bb90c61111eb6c19d476cef0bb990d03fde
--- /dev/null
+++ b/y_utils/time_utils.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f5e75839b5b9b75fdd5c91b08da8e45862e0fd5ceb78ce7f0ce99252039644b
+size 528776
diff --git a/y_utils/tools.cpython-38-x86_64-linux-gnu.so b/y_utils/tools.cpython-38-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..0325b92e24c83de2b09c3e1f9a1780ab1710b3a8
--- /dev/null
+++ b/y_utils/tools.cpython-38-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ddd26efd5171f0e0a6b35e67dd70750306bb2bf419847ce110062fc9ca7b05aa
+size 375008