diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..826c3f13661f416cf095dcb91b1f0e92c42b7d02 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,85 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +example/audio.wav filter=lfs diff=lfs merge=lfs -text +example/video.mp4 filter=lfs diff=lfs merge=lfs -text +face_attr_detect/face_attr.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +face_detect_utils/face_detect.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +face_detect_utils/head_pose.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +face_detect_utils/scrfd.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +face_lib/face_detect_and_align/face_align_5_landmarks.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +face_lib/face_detect_and_align/face_align_utils.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +face_lib/face_detect_and_align/scrfd_insightface/scrfd.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +face_lib/face_parsing/face_parsing_api.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +face_lib/face_restore/gfpgan_onnx/gfpgan_onnx_api.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +h_utils/custom.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +h_utils/obs_client.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +h_utils/request_utils.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +h_utils/sweep_bot.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +h_utils/zip_utils.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +landmark2face_wy/audio_handler.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +landmark2face_wy/data/Facereala3dmm_dataset.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +landmark2face_wy/data/base_dataset.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +landmark2face_wy/data/image_folder.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +landmark2face_wy/digitalhuman_interface.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +landmark2face_wy/loss/perceptual.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +landmark2face_wy/models/DINet.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +landmark2face_wy/models/base_function.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +landmark2face_wy/models/base_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +landmark2face_wy/models/face3d2face_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +landmark2face_wy/models/face_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +landmark2face_wy/models/l2faceaudio_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +landmark2face_wy/models/networks.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +landmark2face_wy/models/networks_HD.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +landmark2face_wy/models/networks_pix2pixHD.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +landmark2face_wy/models/pirender_3dmm_mouth_hd_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +landmark2face_wy/models/pirender_3dmm_mouth_hdv2_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +landmark2face_wy/options/base_options.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +landmark2face_wy/options/test_options.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +landmark2face_wy/options/train_options.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +landmark2face_wy/sync_batchnorm/batchnorm.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +landmark2face_wy/sync_batchnorm/batchnorm_reimpl.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +landmark2face_wy/sync_batchnorm/comm.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +landmark2face_wy/sync_batchnorm/replicate.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +landmark2face_wy/sync_batchnorm/unittest.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +landmark2face_wy/test_3dmm_multi_exp_wenet.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +landmark2face_wy/test_3dmm_multi_exp_wenet0.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +landmark2face_wy/util/flow_util.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +landmark2face_wy/util/get_data.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +landmark2face_wy/util/html.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +landmark2face_wy/util/image_pool.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +landmark2face_wy/util/util.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +landmark2face_wy/util/visualizer.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +model_lib/base_wrapper/onnx_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +preprocess_audio_and_3dmm.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +service/server.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +service/trans_dh_service.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +wenet/compute_ctc_att_bnf.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +wenet/transformer/asr_model.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +wenet/transformer/attention.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +wenet/transformer/cmvn.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +wenet/transformer/convolution.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +wenet/transformer/ctc.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +wenet/transformer/decoder.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +wenet/transformer/decoder_layer.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +wenet/transformer/embedding.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +wenet/transformer/encoder.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +wenet/transformer/encoder_layer.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +wenet/transformer/label_smoothing_loss.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +wenet/transformer/positionwise_feed_forward.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +wenet/transformer/subsampling.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +wenet/transformer/swish.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +wenet/utils/checkpoint.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +wenet/utils/common.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +wenet/utils/ctc_util.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +wenet/utils/executor.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +wenet/utils/mask.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +wenet/utils/scheduler.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +xseg/dfl_xseg_api.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +y_utils/config.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +y_utils/lcr.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +y_utils/liblcr.so filter=lfs diff=lfs merge=lfs -text +y_utils/logger.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +y_utils/md5.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +y_utils/time_utils.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text +y_utils/tools.cpython-38-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f4235a4031d64a8a09c6ad961dbddab385412534 --- /dev/null +++ b/README.md @@ -0,0 +1,73 @@ + +[![License](https://img.shields.io/badge/License-View%20License-blue.svg)](https://github.com/GuijiAI/HeyGem.ai/blob/main/LICENSE) +![Python](https://img.shields.io/badge/Python-3.8-blue.svg) +![Linux](https://img.shields.io/badge/OS-Linux-brightgreen.svg) + +**[中文](#chinese-version)** | **[English](README_en.md)** + +--- + + + +# HeyGem-Linux-Python-Hack + +## 项目简介 + +[HeyGem-Linux-Python-Hack] 是一个基于 Python 的数字人项目,它从 [HeyGem.ai](https://github.com/GuijiAI/HeyGem.ai) 中提取出来,它能够直接在 Linux 系统上运行,摆脱了对 Docker 和 Windows 系统的依赖。我们的目标是提供一个更易于部署和使用的数字人解决方案。 + +**如果你觉得这个项目对你有帮助,欢迎给我们 Star!** +**如果运行过程中遇到问题,在查阅已有 Issue 后,在查阅 Google/baidu/ai 后,欢迎提交 Issues!** + +## 主要特性 + +* 无需 Docker: 直接在 Linux 系统上运行,简化部署流程。 +* 无需 Windows: 完全基于 Linux 开发和测试。 +* Python 驱动: 使用 Python 语言开发,易于理解和扩展。 +* 开发者友好: 易于使用和扩展。 +* 完全离线。 + +## 开始使用 + +### 安装 +本项目**支持且仅支持 Linux & python3.8 环境** +请确保你的 Linux 系统上已经安装了 **Python 3.8**。然后,使用 pip 安装项目依赖项 +同时也提供一个备用的环境 [requirements_0.txt](requirements_0.txt),遇到问题的话,你可以参考它来建立一个新的环境。 +**具体的 onnxruntime-gpu / torch 等需要结合你的机器上的 cuda 版本去尝试一些组合,否则仍旧可能遇到问题。** +**请尽量不要询问任何关于 pip 的问题,感谢合作** + +```bash +# 直接安装整个 requirements.txt 不一定成功,更建议跑代码观察报错信息,然后根据报错信息结合 requirements 去尝试安装,祝你顺利。 +# pip install -r requirements.txt +``` + +### 使用 +把项目克隆到本地 +```bash +git clone https://github.com/Holasyb918/HeyGem-Linux-Python-Hack +cd HeyGem-Linux-Python-Hack +bash download.sh +``` +#### 开始使用 +* repo 中已提供可以用于 demo 的音视频样例,代码可以直接运行。 +#### command: +```bash +python run.py +``` + +* 如果要使用自己的数据,可以外部传入参数,请注意,**path 是本地文件,且仅支持相对路径**. + +#### command: +```bash +python run.py --audio_path example/audio.wav --video_path example/video.mp4 +``` +#### gradio: +```bash +python app.py +# 请等待模型初始化完成后提交任务 +``` + +## Contributing +欢迎贡献! + +## License +参考 heyGem.ai 的协议. diff --git a/README_en.md b/README_en.md new file mode 100644 index 0000000000000000000000000000000000000000..243c4567271ec87e8f82ad844db64c3ece8fb5ca --- /dev/null +++ b/README_en.md @@ -0,0 +1,66 @@ + +[![License](https://img.shields.io/badge/License-View%20License-blue.svg)](https://github.com/GuijiAI/HeyGem.ai/blob/main/LICENSE) +![Python](https://img.shields.io/badge/Python-3.8-blue.svg) +![Linux](https://img.shields.io/badge/OS-Linux-brightgreen.svg) + +**[中文](./readme.md)** | **[English](#english-version)** + +--- + + + +# HeyGem-Linux-Python-Hack + +## Introduction + +[HeyGem-Linux-Python-Hack] is a Python-based digital human project extracted from HeyGem.ai. It is designed to run directly on Linux systems, eliminating the need for Docker and Windows. Our goal is to provide a easier-to-deploy, and user-friendly digital human solution. + +**Feel free to Star us if you find this project useful!** +**Please submit an Issue if you run into any problems!** + +## Key Features + +* No Docker Required: Runs directly on Linux systems, simplifying the deployment process. +* No Windows Required: Fully developed and tested on Linux. +* Python Powered: Developed using the Python language, making it easy to understand and extend. +* Developer-Friendly: Easy to use, and easy to extend. + +## Getting Started + +### Installation + +Please ensure that **Python 3.8** is installed on your Linux system. Then, you can install the project dependencies using pip: + +```bash +pip install -r requirements.txt +``` + +### Usage +Clone this repository to your local machine: +```bash +git clone https://github.com/Holasyb918/HeyGem-Linux-Python-Hack +cd HeyGem-Linux-Python-Hack +bash download.sh +``` +#### Getting Started +* Audio and video examples that can be used for the demo are already provided in the repo, and the code can be run directly. +#### Command: +```bash +python run.py +``` +* If you want to use your own data, you can pass parameters externally. **Please note that the path is a local file and only supports relative paths.** +#### command: +```bash +python run.py --audio_path example/audio.wav --video_path example/video.mp4 +``` +#### gradio: +```bash +python app.py +# Please wait until processor init done. +``` + +## Contributing +Contributions are welcome! + +## License +This project is licensed under the HeyGem.ai License. diff --git a/app.py b/app.py new file mode 100644 index 0000000000000000000000000000000000000000..bb75637941f58c701a4f9e859ed5c84c6e15336a --- /dev/null +++ b/app.py @@ -0,0 +1,230 @@ +import argparse +import gc +import json +import os + +os.environ["GRADIO_SERVER_NAME"] = "0.0.0.0" +import subprocess +import threading +import time +import traceback +import uuid +from enum import Enum +import queue +import shutil +from functools import partial + +import cv2 +import gradio as gr +from flask import Flask, request + +import service.trans_dh_service +from h_utils.custom import CustomError +from y_utils.config import GlobalConfig +from y_utils.logger import logger + + +def write_video_gradio( + output_imgs_queue, + temp_dir, + result_dir, + work_id, + audio_path, + result_queue, + width, + height, + fps, + watermark_switch=0, + digital_auth=0, + temp_queue=None, +): + output_mp4 = os.path.join(temp_dir, "{}-t.mp4".format(work_id)) + fourcc = cv2.VideoWriter_fourcc(*"mp4v") + result_path = os.path.join(result_dir, "{}-r.mp4".format(work_id)) + video_write = cv2.VideoWriter(output_mp4, fourcc, fps, (width, height)) + print("Custom VideoWriter init done") + try: + while True: + state, reason, value_ = output_imgs_queue.get() + if type(state) == bool and state == True: + logger.info( + "Custom VideoWriter [{}]视频帧队列处理已结束".format(work_id) + ) + logger.info( + "Custom VideoWriter Silence Video saved in {}".format( + os.path.realpath(output_mp4) + ) + ) + video_write.release() + break + else: + if type(state) == bool and state == False: + logger.error( + "Custom VideoWriter [{}]任务视频帧队列 -> 异常原因:[{}]".format( + work_id, reason + ) + ) + raise CustomError(reason) + for result_img in value_: + video_write.write(result_img) + if video_write is not None: + video_write.release() + if watermark_switch == 1 and digital_auth == 1: + logger.info( + "Custom VideoWriter [{}]任务需要水印和数字人标识".format(work_id) + ) + if width > height: + command = 'ffmpeg -y -i {} -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:(main_h-overlay_h)-10,overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format( + audio_path, + output_mp4, + GlobalConfig.instance().watermark_path, + GlobalConfig.instance().digital_auth_path, + result_path, + ) + logger.info("command:{}".format(command)) + else: + command = 'ffmpeg -y -i {} -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:(main_h-overlay_h)-10,overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format( + audio_path, + output_mp4, + GlobalConfig.instance().watermark_path, + GlobalConfig.instance().digital_auth_path, + result_path, + ) + logger.info("command:{}".format(command)) + elif watermark_switch == 1 and digital_auth == 0: + logger.info("Custom VideoWriter [{}]任务需要水印".format(work_id)) + command = 'ffmpeg -y -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:(main_h-overlay_h)-10" -c:a aac -crf 15 -strict -2 {}'.format( + audio_path, + output_mp4, + GlobalConfig.instance().watermark_path, + result_path, + ) + logger.info("command:{}".format(command)) + elif watermark_switch == 0 and digital_auth == 1: + logger.info("Custom VideoWriter [{}]任务需要数字人标识".format(work_id)) + if width > height: + command = 'ffmpeg -loglevel warning -y -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format( + audio_path, + output_mp4, + GlobalConfig.instance().digital_auth_path, + result_path, + ) + logger.info("command:{}".format(command)) + else: + command = 'ffmpeg -loglevel warning -y -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format( + audio_path, + output_mp4, + GlobalConfig.instance().digital_auth_path, + result_path, + ) + logger.info("command:{}".format(command)) + else: + command = "ffmpeg -loglevel warning -y -i {} -i {} -c:a aac -c:v libx264 -crf 15 -strict -2 {}".format( + audio_path, output_mp4, result_path + ) + logger.info("Custom command:{}".format(command)) + subprocess.call(command, shell=True) + print("###### Custom Video Writer write over") + print(f"###### Video result saved in {os.path.realpath(result_path)}") + result_queue.put([True, result_path]) + # temp_queue.put([True, result_path]) + except Exception as e: + logger.error( + "Custom VideoWriter [{}]视频帧队列处理异常结束,异常原因:[{}]".format( + work_id, e.__str__() + ) + ) + result_queue.put( + [ + False, + "[{}]视频帧队列处理异常结束,异常原因:[{}]".format( + work_id, e.__str__() + ), + ] + ) + logger.info("Custom VideoWriter 后处理进程结束") + + +service.trans_dh_service.write_video = write_video_gradio + + +class VideoProcessor: + def __init__(self): + self.task = service.trans_dh_service.TransDhTask() + self.basedir = GlobalConfig.instance().result_dir + self.is_initialized = False + self._initialize_service() + print("VideoProcessor init done") + + def _initialize_service(self): + logger.info("开始初始化 trans_dh_service...") + try: + time.sleep(5) + logger.info("trans_dh_service 初始化完成。") + self.is_initialized = True + except Exception as e: + logger.error(f"初始化 trans_dh_service 失败: {e}") + + def process_video( + self, audio_file, video_file, watermark=False, digital_auth=False + ): + while not self.is_initialized: + logger.info("服务尚未完成初始化,等待 1 秒...") + time.sleep(1) + work_id = str(uuid.uuid1()) + code = work_id + temp_dir = os.path.join(GlobalConfig.instance().temp_dir, work_id) + result_dir = GlobalConfig.instance().result_dir + video_writer_thread = None + final_result = None + + try: + cap = cv2.VideoCapture(video_file) + width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + fps = cap.get(cv2.CAP_PROP_FPS) + cap.release() + + audio_path = audio_file + video_path = video_file + + self.task.task_dic[code] = "" + self.task.work(audio_path, video_path, code, 0, 0, 0, 0) + + result_path = self.task.task_dic[code][2] + final_result_dir = os.path.join("result", code) + os.makedirs(final_result_dir, exist_ok=True) + os.system(f"mv {result_path} {final_result_dir}") + os.system( + f"rm -rf {os.path.join(os.path.dirname(result_path), code + '*.*')}" + ) + result_path = os.path.realpath( + os.path.join(final_result_dir, os.path.basename(result_path)) + ) + return result_path + + except Exception as e: + logger.error(f"处理视频时发生错误: {e}") + raise gr.Error(str(e)) + + +if __name__ == "__main__": + processor = VideoProcessor() + + inputs = [ + gr.File(label="上传音频文件/upload audio file"), + gr.File(label="上传视频文件/upload video file"), + ] + outputs = gr.Video(label="生成的视频/Generated video") + + title = "数字人视频生成/Digital Human Video Generation" + description = "上传音频和视频文件,即可生成数字人视频。/Upload audio and video files to generate digital human videos." + + demo = gr.Interface( + fn=processor.process_video, + inputs=inputs, + outputs=outputs, + title=title, + description=description, + ) + demo.queue().launch() diff --git a/config/config.ini b/config/config.ini new file mode 100644 index 0000000000000000000000000000000000000000..5ad541391190e67578940d63eb907f7b54488434 --- /dev/null +++ b/config/config.ini @@ -0,0 +1,23 @@ +[log] +log_dir = ./log +log_file = dh.log + +[http_server] +server_ip = 0.0.0.0 +server_port = 8383 + +[temp] +temp_dir = ./ +clean_switch = 1 + +[result] +result_dir = ./result +clean_switch = 0 + +[digital] +batch_size = 4 + +[register] +url = http://172.16.160.51:12120 +report_interval = 10 +enable=0 diff --git a/download.sh b/download.sh new file mode 100644 index 0000000000000000000000000000000000000000..2bc2fa5bc45c01a6b3f45bfe4ec058ebdead8429 --- /dev/null +++ b/download.sh @@ -0,0 +1,32 @@ +set -e +set -u + +# face attr +mkdir -p face_attr_detect +wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/face_attr_epoch_12_220318.onnx -O face_attr_detect/face_attr_epoch_12_220318.onnx + +# face detect +mkdir -p face_detect_utils/resources +wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/pfpld_robust_sim_bs1_8003.onnx -O face_detect_utils/resources/pfpld_robust_sim_bs1_8003.onnx +wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/scrfd_500m_bnkps_shape640x640.onnx -O face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx +wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/model_float32.onnx -O face_detect_utils/resources/model_float32.onnx + +# dh model +mkdir -p landmark2face_wy/checkpoints/anylang +wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/dinet_v1_20240131.pth -O landmark2face_wy/checkpoints/anylang/dinet_v1_20240131.pth + +# face parsing +mkdir -p pretrain_models/face_lib/face_parsing +wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/79999_iter.onnx -O pretrain_models/face_lib/face_parsing/79999_iter.onnx + +# gfpgan +mkdir -p pretrain_models/face_lib/face_restore/gfpgan +wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/GFPGANv1.4.onnx -O pretrain_models/face_lib/face_restore/gfpgan/GFPGANv1.4.onnx + +# xseg +mkdir -p xseg +wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/xseg_211104_4790000.onnx -O xseg/xseg_211104_4790000.onnx + +# wenet +mkdir -p wenet/examples/aishell/aidata/exp/conformer +wget https://github.com/Holasyb918/HeyGem-Linux-Python-Hack/releases/download/ckpts_and_onnx/wenetmodel.pt -O wenet/examples/aishell/aidata/exp/conformer/wenetmodel.pt \ No newline at end of file diff --git a/example/audio.wav b/example/audio.wav new file mode 100644 index 0000000000000000000000000000000000000000..96521569dd0506b15ce95d3b9791ce10bf88b052 --- /dev/null +++ b/example/audio.wav @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:886f4e5cd90b79c8575c8bb18c93d41543b2619272f75841dac095a65c8f85dd +size 192044 diff --git a/example/video.mp4 b/example/video.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..f02062222f7ac4286eedcde24372117655701db9 --- /dev/null +++ b/example/video.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a11e32bda4b3d15777ed8d481e66859805e71c5168221d0098eac2b31b3f4e7b +size 7068410 diff --git a/face_attr_detect/.DS_Store b/face_attr_detect/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..38734ca2de71d90578b12a191d5ff30a57f26d5c Binary files /dev/null and b/face_attr_detect/.DS_Store differ diff --git a/face_attr_detect/__init__.py b/face_attr_detect/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..5a7bb5272a2b009694971a124620c7f8cccb4563 --- /dev/null +++ b/face_attr_detect/__init__.py @@ -0,0 +1 @@ +from .face_attr import FaceAttr diff --git a/face_attr_detect/face_attr.cpython-38-x86_64-linux-gnu.so b/face_attr_detect/face_attr.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..ef3c63b0d4794d279a14a5dcd68e69ea4f7549ce --- /dev/null +++ b/face_attr_detect/face_attr.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0aa56c5cbaddc1bc7204823fd2252cf54d056365795737c846f876236a3e5056 +size 441864 diff --git a/face_attr_detect/face_attr_epoch_12_220318.onnx b/face_attr_detect/face_attr_epoch_12_220318.onnx new file mode 100644 index 0000000000000000000000000000000000000000..cddcb2d37f06e7c8b307864c4b8e4e8a167c5158 --- /dev/null +++ b/face_attr_detect/face_attr_epoch_12_220318.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fa6604beacd1e560ffc4cae6fa1537591d5f1a765a9f55473a295a1d22da3af +size 3723167 diff --git a/face_detect_utils/__init__.py b/face_detect_utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/face_detect_utils/face_detect.cpython-38-x86_64-linux-gnu.so b/face_detect_utils/face_detect.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..2ff5e75da52735d4728d2148a2e7fe7fc578312c --- /dev/null +++ b/face_detect_utils/face_detect.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68b5fd825eebc9421090c1daf3e940833b7bf5712ecee16deef937c87bbe666e +size 1363368 diff --git a/face_detect_utils/head_pose.cpython-38-x86_64-linux-gnu.so b/face_detect_utils/head_pose.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..c230fe24694ff027d9e98eca344bd2d4bebc6d13 --- /dev/null +++ b/face_detect_utils/head_pose.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1897346bf14dbbac7328a43598ba0c6d3f4db9ab6628dbebb381d4139aca179e +size 1158712 diff --git a/face_detect_utils/resources/model_float32.onnx b/face_detect_utils/resources/model_float32.onnx new file mode 100644 index 0000000000000000000000000000000000000000..5a3232dae666298ddda0bebeb63db2dbf1ea7c1f --- /dev/null +++ b/face_detect_utils/resources/model_float32.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e5dc9dd52836b2029a599e74134f1a0f03e416db3e40e932f69609adb0238ad +size 17315434 diff --git a/face_detect_utils/resources/pfpld_robust_sim_bs1_8003.onnx b/face_detect_utils/resources/pfpld_robust_sim_bs1_8003.onnx new file mode 100644 index 0000000000000000000000000000000000000000..b1bb1378eea30fcd79a3ae28f11c55aefab2f17c --- /dev/null +++ b/face_detect_utils/resources/pfpld_robust_sim_bs1_8003.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bd9913817152831562cccde7e51ed431d1cf4547d8f21e0876b58a0d82baa55 +size 6889235 diff --git a/face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx b/face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx new file mode 100644 index 0000000000000000000000000000000000000000..be85dd4610c207a52ba07b98d4ea4f0fdfc0aee0 --- /dev/null +++ b/face_detect_utils/resources/scrfd_500m_bnkps_shape640x640.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b467f878e26ff1e7ee05cd9936fdff63fa2a5af5d732ed17ee231f2dd5cc07ae +size 2524648 diff --git a/face_detect_utils/scrfd.cpython-38-x86_64-linux-gnu.so b/face_detect_utils/scrfd.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..7b9ca27f4d993b16034f9fafebcd4f18ca183d80 --- /dev/null +++ b/face_detect_utils/scrfd.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fa07e8146248e1b4deaafdfa0cc6c1e1b7a9d641db536aa3ae9b9ee10ab3b01 +size 3178688 diff --git a/face_lib/__init__.py b/face_lib/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/face_lib/face_detect_and_align/__init__.py b/face_lib/face_detect_and_align/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2e8e58ddd298d9a9776e3a74822b57c500609155 --- /dev/null +++ b/face_lib/face_detect_and_align/__init__.py @@ -0,0 +1,3 @@ +from .face_align_5_landmarks import FaceDetect5Landmarks +from .face_align_utils import estimate_norm + diff --git a/face_lib/face_detect_and_align/face_align_5_landmarks.cpython-38-x86_64-linux-gnu.so b/face_lib/face_detect_and_align/face_align_5_landmarks.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..e8dfa4558957ca42fd45cfe4a75f8e9ed54ce5e4 --- /dev/null +++ b/face_lib/face_detect_and_align/face_align_5_landmarks.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f0fd0fff60f8fb1fe6cbb1b549d5c43ae9bfaef1e5f4ee4edb27085d3023d22 +size 1321904 diff --git a/face_lib/face_detect_and_align/face_align_utils.cpython-38-x86_64-linux-gnu.so b/face_lib/face_detect_and_align/face_align_utils.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..86c01e70c7021de5baaeb2743edfde9a720dec9e --- /dev/null +++ b/face_lib/face_detect_and_align/face_align_utils.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da29cd727e8bf8f7107c322d5d40ef8596b29e2a858cad779362dbf2516c38a0 +size 1611632 diff --git a/face_lib/face_detect_and_align/scrfd_insightface/__init__.py b/face_lib/face_detect_and_align/scrfd_insightface/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c1b34f01e048af40146ef07235a612629890b594 --- /dev/null +++ b/face_lib/face_detect_and_align/scrfd_insightface/__init__.py @@ -0,0 +1,5 @@ +# -- coding: utf-8 -- +# @Time : 2021/11/10 + + +from .scrfd import SCRFD \ No newline at end of file diff --git a/face_lib/face_detect_and_align/scrfd_insightface/scrfd.cpython-38-x86_64-linux-gnu.so b/face_lib/face_detect_and_align/scrfd_insightface/scrfd.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..2829e1566dd0737b7884b5f2a8b3d96ab776caf0 --- /dev/null +++ b/face_lib/face_detect_and_align/scrfd_insightface/scrfd.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8b9f64f8ef1c198e7d240ba6f1c9e4ff333c48f0259b71e74ca466d5ea274bb +size 2529880 diff --git a/face_lib/face_parsing/__init__.py b/face_lib/face_parsing/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2830c3f2937803943038b5b64337387dc0faafd5 --- /dev/null +++ b/face_lib/face_parsing/__init__.py @@ -0,0 +1,6 @@ +# -- coding: utf-8 -- +# @Time : 2022/3/29 + + +from .face_parsing_api import FaceParsing +# from .dfl_xseg_net import XsegNet diff --git a/face_lib/face_parsing/face_parsing_api.cpython-38-x86_64-linux-gnu.so b/face_lib/face_parsing/face_parsing_api.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..6cfd45471143586b109da23be31c45a86eb07fa5 --- /dev/null +++ b/face_lib/face_parsing/face_parsing_api.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04931709d9c22f909e7ead81acb06cae05b70162fbcb6d2055ac7315b61834d3 +size 1053792 diff --git a/face_lib/face_restore/__init__.py b/face_lib/face_restore/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..76500b3b83375ff989281e919fb8e6046c94e3f6 --- /dev/null +++ b/face_lib/face_restore/__init__.py @@ -0,0 +1,2 @@ + +from .gfpgan_onnx.gfpgan_onnx_api import GFPGAN diff --git a/face_lib/face_restore/gfpgan_onnx/gfpgan_onnx_api.cpython-38-x86_64-linux-gnu.so b/face_lib/face_restore/gfpgan_onnx/gfpgan_onnx_api.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..3101bdfc574235256bf5f7a5972bd4ce9f612f51 --- /dev/null +++ b/face_lib/face_restore/gfpgan_onnx/gfpgan_onnx_api.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2088f84d33b0a442e7dcb04135939e56d918b8edefd1de0b06340cb38573d1e +size 567104 diff --git a/h_utils/__init__.py b/h_utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..2c7305e619fd17580d4403c0764f4c8e7627427c --- /dev/null +++ b/h_utils/__init__.py @@ -0,0 +1,9 @@ +#!/user/bin/env python +# coding=utf-8 +""" +@project : dhp-service +@author : huyi +@file : __init__.py.py +@ide : PyCharm +@time : 2021-08-18 15:45:13 +""" \ No newline at end of file diff --git a/h_utils/custom.cpython-38-x86_64-linux-gnu.so b/h_utils/custom.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..5a600ab8e7aa02c037f74df97072d983833a4e9e --- /dev/null +++ b/h_utils/custom.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37ecc1ed06eb9b804f9de3470dbe6780976514d120bde8fed20d67c4cd26fe0e +size 259136 diff --git a/h_utils/obs_client.cpython-38-x86_64-linux-gnu.so b/h_utils/obs_client.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..fe6959bc611d429f7c1b318de7fc35be94984122 --- /dev/null +++ b/h_utils/obs_client.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d881bc9a1303697bd912c005c71ccc74bab724f1ade6e1156c2d9ca0277e344 +size 982656 diff --git a/h_utils/request_utils.cpython-38-x86_64-linux-gnu.so b/h_utils/request_utils.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..1be50bc25ede8efe01873d1a6cb66cf3535eabf2 --- /dev/null +++ b/h_utils/request_utils.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:215847205ce3a0f416ee3f5d07b4406b88ac0815cdd3e671c0d317c649cf2420 +size 1304616 diff --git a/h_utils/sweep_bot.cpython-38-x86_64-linux-gnu.so b/h_utils/sweep_bot.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..c417c2c1adeb6877f8937837d5e909815035970f --- /dev/null +++ b/h_utils/sweep_bot.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fcfcce02dcbc5f9740329c10ab5fd0bfb157f3e6eb2fa4622adb901ac1feab5 +size 607848 diff --git a/h_utils/zip_utils.cpython-38-x86_64-linux-gnu.so b/h_utils/zip_utils.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..0f9ee726b3745ac8b63694e053d70cdf12543243 --- /dev/null +++ b/h_utils/zip_utils.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4baf454a9940278b6696977b71a489ccf7c920faf37340b9968dcddfa582c508 +size 594864 diff --git a/landmark2face_wy/audio_handler.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/audio_handler.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..4e80ff7a18dbbcd19124ee6fd9c85c6305607db6 --- /dev/null +++ b/landmark2face_wy/audio_handler.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ced0b512524155f205d2b4c6a46a1a63c2d347387b30550a1fd99ccd53df172 +size 1749648 diff --git a/landmark2face_wy/checkpoints/anylang/dinet_v1_20240131.pth b/landmark2face_wy/checkpoints/anylang/dinet_v1_20240131.pth new file mode 100644 index 0000000000000000000000000000000000000000..14e330fceadf0b678821a4e6be4d94c68cef65c2 --- /dev/null +++ b/landmark2face_wy/checkpoints/anylang/dinet_v1_20240131.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c4568b1f1f2890b4a92edc3f9457af63f908065961233af2125d060cb56fd9e +size 392392973 diff --git a/landmark2face_wy/checkpoints/test/opt.txt b/landmark2face_wy/checkpoints/test/opt.txt new file mode 100644 index 0000000000000000000000000000000000000000..785e8614bc4466272be2b1176f4e559b76ac34e4 --- /dev/null +++ b/landmark2face_wy/checkpoints/test/opt.txt @@ -0,0 +1,74 @@ +----------------- Options --------------- + aspect_ratio: 1.0 + audio_feature: 3dmm + batch_size: 16 + checkpoints_dir: ./landmark2face_wy/checkpoints + crop_size: 256 + dataroot: ./data + dataset_mode: Facereala3dmm + direction: AtoB + display_winsize: 256 + distributed: False + epoch: latest + eval: False + feat_num: 3 + feature_path: ../AnnI_deep3dface_256_contains_id/ + fp16: False + gpu_ids: 0 + img_size: 256 + init_gain: 0.02 + init_type: normal + input_nc: 3 + instance_feat: False + isTrain: False [default: None] + label_feat: False + lan_size: 1 + load_features: False + load_iter: 0 [default: 0] + load_size: 286 + local_rank: -1 + max_dataset_size: inf + mfcc0_rate: 0.2 + model: pirender_3dmm_mouth_hd + model_path: ./landmark2face_wy/checkpoints/anylang/dinet_v1_20240131.pth + n_blocks: 9 + n_blocks_global: 9 + n_blocks_local: 3 + n_clusters: 10 + n_downsample_E: 4 + n_downsample_global: 4 + n_layers_D: 3 + n_local_enhancers: 1 + name: test + ndf: 64 + nef: 16 + netD: basic + netG: pirender + ngf: 64 + niter_fix_global: 0 + no_dropout: True + no_flip: False + no_ganFeat_loss: False + no_instance: False + norm: instance + ntest: inf + num_D: 2 + num_test: 50 + num_threads: 4 + output_nc: 3 + perceptual_layers: ['relu_1_1', 'relu_2_1', 'relu_3_1', 'relu_4_1', 'relu_5_1'] + perceptual_network: vgg19 + perceptual_num_scales: 4 +perceptual_use_style_loss: True + perceptual_weights: [4, 4, 4, 4, 4] + phase: test + preprocess: resize_and_crop + resize_size: 512 + results_dir: ./results/ + serial_batches: False + suffix: + test_audio_path: None + test_muban: None + verbose: False +weight_style_to_perceptual: 250 +----------------- End ------------------- diff --git a/landmark2face_wy/data/Facereala3dmm_dataset.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/data/Facereala3dmm_dataset.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..fd3e48d9c1fb9bd699443bbef1a04ad9db4b44cd --- /dev/null +++ b/landmark2face_wy/data/Facereala3dmm_dataset.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfb97b75c48cdbf3cde9f0d6cb9c2e0665b8a0f8f6870a78480263638f8b2bd9 +size 3479728 diff --git a/landmark2face_wy/data/Facereala3dmmexp512_dataset.py b/landmark2face_wy/data/Facereala3dmmexp512_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..34ce72992f34f98f6ca1cb1254b48bc252b294fc --- /dev/null +++ b/landmark2face_wy/data/Facereala3dmmexp512_dataset.py @@ -0,0 +1,212 @@ +import os.path +import random +from data.base_dataset import BaseDataset, get_params, get_transform +import torchvision.transforms as transforms +from data.image_folder import make_dataset +from PIL import Image, ImageEnhance +import numpy as np +import cv2 +import torch +import time + +def get_idts(config_name): + idts = list() + with open(os.path.join('../config', config_name + '.txt')) as f: + for line in f: + line = line.strip() + video_name = line.split(':')[0] + idts.append(video_name) + return idts + + +def obtain_seq_index(index, num_frames): + seq = list(range(index - 13, index + 13 + 1)) + seq = [min(max(item, 0), num_frames - 1) for item in seq] + return seq + +def get_3dmm_feature(img_path, idx, new_dict): + id = img_path.split('/')[-3] + features = new_dict[id] + idx_list = obtain_seq_index(idx, features.shape[0]) + feature = features[idx_list, 80:144] +# feature[:, -1] = 50 + return np.transpose(feature, (1, 0)) + + + +class Facereala3dmmexp512Dataset(BaseDataset): + def __init__(self, opt, mode=None): + BaseDataset.__init__(self, opt) + img_size = opt.img_size + idts = get_idts(opt.name.split('_')[0]) + print("---------load data list--------: ", idts) + self.new_dict = {} + if mode == 'train': + self.labels = [] + self.label_starts = [] + self.label_ends = [] + count = 0 + for idt_name in idts: + # root = '../AnnVI/feature/{}'.format(idt_name) + root = os.path.join(opt.feature_path, idt_name) + feature = np.load(os.path.join(root, '%s.npy' % opt.audio_feature)) + self.new_dict[idt_name] = feature + if opt.audio_feature == "3dmm": + training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode)) + else: + training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature)) + training_data = torch.load(training_data_path) + img_paths = training_data['img_paths'] + features_3dmm = training_data['features_3dmm'] + index = [i[0].split('/')[-1] for i in img_paths] + + image_dir = '{}/{}_dlib_crop'.format(root, img_size) + self.label_starts.append(count) + for img in range(len(index)): + img_path = os.path.join(image_dir, index[img]) + # idx_list = obtain_seq_index(img, feature.shape[0]) + # self.labels.append([img_path, np.transpose(feature[idx_list, ...], (1, 0))]) + self.labels.append([img_path, features_3dmm[img]]) + count = count + 1 + self.label_ends.append(count) + + self.label_starts = np.array(self.label_starts) + self.label_ends = np.array(self.label_ends) + self.transforms_image = transforms.Compose([transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) + + self.transforms_label = transforms.Compose([transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) + self.shuffle() + elif mode == 'test': + self.labels = [] + self.label_starts = [] + self.label_ends = [] + count = 0 + for idt_name in idts: + # root = '../AnnVI/feature/{}'.format(idt_name) + root = os.path.join(opt.feature_path, idt_name) + feature = np.load(os.path.join(root, '%s.npy' % opt.audio_feature)) + self.new_dict[idt_name] = feature + if opt.audio_feature == "3dmm": + training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode)) + else: + training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature)) + training_data = torch.load(training_data_path) + img_paths = training_data['img_paths'] + features_3dmm = training_data['features_3dmm'] + index = [i[0].split('/')[-1] for i in img_paths] + + image_dir = '{}/{}_dlib_crop'.format(root, img_size) + self.label_starts.append(count) + for img in range(len(index)): + img_path = os.path.join(image_dir, index[img]) + # idx_list = obtain_seq_index(img, feature.shape[0]) + # self.labels.append([img_path, np.transpose(feature[idx_list, ...], (1, 0))]) + self.labels.append([img_path, features_3dmm[img]]) + count = count + 1 + self.label_ends.append(count) + + self.label_starts = np.array(self.label_starts) + self.label_ends = np.array(self.label_ends) + self.transforms_image = transforms.Compose([transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) + + self.transforms_label = transforms.Compose([transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) + self.shuffle() + + def shuffle(self): + self.labels_index = list(range(len(self.labels))) + random.shuffle(self.labels_index) + + def add_mouth_mask2(self, img): + mask = np.ones_like(img) + rect_area = [img.shape[1] // 2 - 60, np.random.randint(226, 246), 30, 256 - 30] + mask_rect_area = mask[rect_area[0]: rect_area[1], rect_area[2]:rect_area[3]] + x = np.tile(np.arange(rect_area[1] - rect_area[0])[:, np.newaxis], (1, rect_area[3] - rect_area[2])) + x = np.flip(x, 0) + y = np.tile(np.arange(rect_area[3] - rect_area[2])[:, np.newaxis], (1, rect_area[1] - rect_area[0])).transpose() + zz1 = -y - x + 88 > 0 + zz2 = np.flip(zz1, 1) + zz = (zz1 + zz2) > 0 + mask[rect_area[0]:rect_area[1], rect_area[2]:rect_area[3]] = np.tile(zz[:, :, np.newaxis], (1, 1, 3)) * 1 + imgm = img * mask + return imgm + + def __getitem__(self, index): + # s1= time.time() + idx = self.labels_index[index] + img_path, feature_3dmm_idx= self.labels[idx] + # print(img_path, feature_3dmm_idx) + feature_3dmm = get_3dmm_feature(img_path, feature_3dmm_idx, self.new_dict) + #print(img_path, feature_3dmm_idx, feature_3dmm.shape) + + img = np.array(Image.open(img_path).convert('RGB')) + img = np.array(np.clip(img + np.random.randint(-20, 20, size=3, dtype='int8'), 0, 255), dtype='uint8') + cut_pad1 = np.random.randint(0, 20) + cut_pad2 = np.random.randint(0, 20) + img = img[cut_pad1:512 + cut_pad1, cut_pad2:512 + cut_pad2] + # s2 =time.time() + # print('get data and read data ', s2-s1) + mask_B = img.copy() + # mask_end = np.random.randint(236*2, 250*2) + # index = np.random.randint(80, 90) + # mask_B[mask_B.shape[1] // 2 - index:mask_end, 30:-30] = 0 + mask_end = np.random.randint(480, 500) + index = np.random.randint(15, 30) + mask_B[index:mask_end, 70:-70] = 0 + img = Image.fromarray(img) + + mask_B = Image.fromarray(mask_B) + img = self.transforms_image(img) + mask_B = self.transforms_image(mask_B) + + x = np.where((idx >= self.label_starts) * (idx < self.label_ends))[0] + + audio = torch.tensor(feature_3dmm) + # s3 = time.time() + # print('get 3dmm and mask ', s3 - s2) + # 保证real_A_index不是idx + max_i = 0 + real_A_index = random.randint(self.label_starts[x], self.label_ends[x] - 1) + while real_A_index == idx: + max_i += 1 + real_A_index = random.randint(self.label_starts[x], self.label_ends[x] - 1) + if max_i > 5: + break + + imgA_path, _ = self.labels[real_A_index] + imgA = np.array(Image.open(imgA_path).convert('RGB')) + cut_pad1 = np.random.randint(0, 20) + cut_pad2 = np.random.randint(0, 20) + imgA = imgA[cut_pad1:256*2 + cut_pad1, cut_pad2:256*2 + cut_pad2] + + ########椭圆########## + # mask = np.zeros(imgA.shape, dtype=np.uint8) + # cv2.ellipse(mask, (imgA.shape[1] // 2, imgA.shape[0] // 2 - 165 - cut_pad1), + # (imgA.shape[1] // 2 + 25, imgA.shape[0]), 0, 0, 360, (255, 255, 255), -1) + # ROI = cv2.bitwise_and(imgA, mask) + # imgA = Image.fromarray(ROI) + ############################# + # imgA[:imgA.shape[1] // 2 - 40 - index2, :] = 0 + imgA = Image.fromarray(imgA) + imgA = self.transforms_image(imgA) + # s4 = time.time() + # print('end time reala ', s4 - s3) + return {'A': imgA, 'A_label': audio, 'B': img, 'B_label': audio, 'mask_B': mask_B} + + def __len__(self): + """Return the total number of images in the dataset.""" + return len(self.labels) + + +if __name__ == '__main__': + from options.train_options import TrainOptions + + opt = TrainOptions().parse() + dataset = Facereala3dmmDataset(opt) + dataset_size = len(dataset) + print(dataset_size) + for i, data in enumerate(dataset): + print(data) diff --git a/landmark2face_wy/data/Facereala3dmmexpwenet512_dataset.py b/landmark2face_wy/data/Facereala3dmmexpwenet512_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..3c63a0e7f5b14b80769c7ef4987814ce12e8ff38 --- /dev/null +++ b/landmark2face_wy/data/Facereala3dmmexpwenet512_dataset.py @@ -0,0 +1,222 @@ +import os.path +import random +from data.base_dataset import BaseDataset, get_params, get_transform +import torchvision.transforms as transforms +from data.image_folder import make_dataset +from PIL import Image, ImageEnhance +import numpy as np +import cv2 +import torch +import time + +def get_idts(config_name): + idts = list() + with open(os.path.join('../config', config_name + '.txt')) as f: + for line in f: + line = line.strip() + video_name = line.split(':')[0] + idts.append(video_name) + return idts + + +def obtain_seq_index(index, num_frames): + seq = list(range(index - 10, index + 9 + 1)) + seq = [min(max(item, 0), num_frames - 1) for item in seq] + return seq + +def get_3dmm_feature(img_path, idx, audio_feature, new_dict): + id = img_path.split('/')[-3] + features, features1, features1 = new_dict[id] + idx_list = obtain_seq_index(idx, features.shape[0]) + feature = features[idx_list, 80:144] + feature1 = features1[:,audio_feature[0]:audio_feature[1]] + feature = np.concatenate([feature, features[idx_list, -3:], np.transpose(feature1, (1, 0))], 1) + # print(feature.shape) + return np.transpose(feature, (1, 0)) + # return feature + + + +class Facereala3dmmexpwenet512Dataset(BaseDataset): + def __init__(self, opt, mode=None): + BaseDataset.__init__(self, opt) + img_size = opt.img_size + idts = get_idts(opt.name.split('_')[0]) + print("---------load data list--------: ", idts) + self.new_dict = {} + if mode == 'train': + self.labels = [] + self.label_starts = [] + self.label_ends = [] + count = 0 + for idt_name in idts: + # root = '../AnnVI/feature/{}'.format(idt_name) + root = os.path.join(opt.feature_path, idt_name) + feature = np.load(os.path.join(root, '%s.npy' % opt.audio_feature)) + feature1 = np.load(os.path.join(root,'audio_wenet_feature.npy')) + self.new_dict[idt_name] = [feature, feature1, feature1] + if opt.audio_feature == "3dmm": + training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode)) + else: + training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature)) + training_data = torch.load(training_data_path) + img_paths = training_data['img_paths'] + features_3dmm = training_data['features_3dmm'] + audio_features = np.load(os.path.join(root, 'audio_data.npy'), allow_pickle=True) + audio_features = audio_features.tolist() + index = [i[0].split('/')[-1] for i in img_paths] + + image_dir = '{}/{}_dlib_crop'.format(root, img_size) + self.label_starts.append(count) + for img in range(len(index)): + img_path = os.path.join(image_dir, index[img]) + # idx_list = obtain_seq_index(img, feature.shape[0]) + # self.labels.append([img_path, np.transpose(feature[idx_list, ...], (1, 0))]) + if type(features_3dmm[img]) != int: + print(img_path) + audio_feature = audio_features[img] + self.labels.append([img_path, features_3dmm[img], audio_feature]) + count = count + 1 + self.label_ends.append(count) + + self.label_starts = np.array(self.label_starts) + self.label_ends = np.array(self.label_ends) + self.transforms_image = transforms.Compose([transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) + + self.transforms_label = transforms.Compose([transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) + self.shuffle() + elif mode == 'test': + self.labels = [] + self.label_starts = [] + self.label_ends = [] + count = 0 + for idt_name in idts: + # root = '../AnnVI/feature/{}'.format(idt_name) + root = os.path.join(opt.feature_path, idt_name) + feature = np.load(os.path.join(root, '%s.npy' % opt.audio_feature)) + self.new_dict[idt_name] = feature + if opt.audio_feature == "3dmm": + training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode)) + else: + training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature)) + training_data = torch.load(training_data_path) + img_paths = training_data['img_paths'] + features_3dmm = training_data['features_3dmm'] + index = [i[0].split('/')[-1] for i in img_paths] + + image_dir = '{}/{}_dlib_crop'.format(root, img_size) + self.label_starts.append(count) + for img in range(len(index)): + img_path = os.path.join(image_dir, index[img]) + # idx_list = obtain_seq_index(img, feature.shape[0]) + # self.labels.append([img_path, np.transpose(feature[idx_list, ...], (1, 0))]) + self.labels.append([img_path, features_3dmm[img]]) + count = count + 1 + self.label_ends.append(count) + + self.label_starts = np.array(self.label_starts) + self.label_ends = np.array(self.label_ends) + self.transforms_image = transforms.Compose([transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) + + self.transforms_label = transforms.Compose([transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) + self.shuffle() + + def shuffle(self): + self.labels_index = list(range(len(self.labels))) + random.shuffle(self.labels_index) + + def add_mouth_mask2(self, img): + mask = np.ones_like(img) + rect_area = [img.shape[1] // 2 - 60, np.random.randint(226, 246), 30, 256 - 30] + mask_rect_area = mask[rect_area[0]: rect_area[1], rect_area[2]:rect_area[3]] + x = np.tile(np.arange(rect_area[1] - rect_area[0])[:, np.newaxis], (1, rect_area[3] - rect_area[2])) + x = np.flip(x, 0) + y = np.tile(np.arange(rect_area[3] - rect_area[2])[:, np.newaxis], (1, rect_area[1] - rect_area[0])).transpose() + zz1 = -y - x + 88 > 0 + zz2 = np.flip(zz1, 1) + zz = (zz1 + zz2) > 0 + mask[rect_area[0]:rect_area[1], rect_area[2]:rect_area[3]] = np.tile(zz[:, :, np.newaxis], (1, 1, 3)) * 1 + imgm = img * mask + return imgm + + def __getitem__(self, index): + # s1= time.time() + idx = self.labels_index[index] + img_path, feature_3dmm_idx, audio_feature= self.labels[idx] + # print(img_path, feature_3dmm_idx) + feature_3dmm = get_3dmm_feature(img_path, feature_3dmm_idx, audio_feature, self.new_dict) + #print(img_path, feature_3dmm_idx, feature_3dmm.shape) + + img = np.array(Image.open(img_path).convert('RGB')) + img = np.array(np.clip(img + np.random.randint(-20, 20, size=3, dtype='int8'), 0, 255), dtype='uint8') + cut_pad1 = np.random.randint(0, 20) + cut_pad2 = np.random.randint(0, 20) + img = img[cut_pad1:512 + cut_pad1, cut_pad2:512 + cut_pad2] + # s2 =time.time() + # print('get data and read data ', s2-s1) + mask_B = img.copy() + # mask_end = np.random.randint(236*2, 250*2) + # index = np.random.randint(80, 90) + # mask_B[mask_B.shape[1] // 2 - index:mask_end, 30:-30] = 0 + mask_end = np.random.randint(480, 500) + index = np.random.randint(15, 30) + # index = np.random.randint(90, 100) + mask_B[index:mask_end, 70:-70] = 0 + img = Image.fromarray(img) + + mask_B = Image.fromarray(mask_B) + img = self.transforms_image(img) + mask_B = self.transforms_image(mask_B) + + x = np.where((idx >= self.label_starts) * (idx < self.label_ends))[0] + + audio = torch.tensor(feature_3dmm) + # s3 = time.time() + # print('get 3dmm and mask ', s3 - s2) + # 保证real_A_index不是idx + max_i = 0 + real_A_index = random.randint(self.label_starts[x], self.label_ends[x] - 1) + while real_A_index == idx: + max_i += 1 + real_A_index = random.randint(self.label_starts[x], self.label_ends[x] - 1) + if max_i > 5: + break + + imgA_path, _, _ = self.labels[real_A_index] + imgA = np.array(Image.open(imgA_path).convert('RGB')) + cut_pad1 = np.random.randint(0, 20) + cut_pad2 = np.random.randint(0, 20) + imgA = imgA[cut_pad1:256*2 + cut_pad1, cut_pad2:256*2 + cut_pad2] + + ########椭圆########## + # mask = np.zeros(imgA.shape, dtype=np.uint8) + # cv2.ellipse(mask, (imgA.shape[1] // 2, imgA.shape[0] // 2 - 165 - cut_pad1), + # (imgA.shape[1] // 2 + 25, imgA.shape[0]), 0, 0, 360, (255, 255, 255), -1) + # ROI = cv2.bitwise_and(imgA, mask) + # imgA = Image.fromarray(ROI) + ############################# + # imgA[:imgA.shape[1] // 2 - 40 - index2, :] = 0 + imgA = Image.fromarray(imgA) + imgA = self.transforms_image(imgA) + # s4 = time.time() + # print('end time reala ', s4 - s3) + return {'A': imgA, 'A_label': audio, 'B': img, 'B_label': audio, 'mask_B': mask_B} + + def __len__(self): + """Return the total number of images in the dataset.""" + return len(self.labels) + + +if __name__ == '__main__': + from options.train_options import TrainOptions + + opt = TrainOptions().parse() + dataset = Facereala3dmmDataset(opt) + dataset_size = len(dataset) + print(dataset_size) + for i, data in enumerate(dataset): + print(data) diff --git a/landmark2face_wy/data/__init__.py b/landmark2face_wy/data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ef09ef89810a9de201c234e79cef5a7ff2b37d8e --- /dev/null +++ b/landmark2face_wy/data/__init__.py @@ -0,0 +1,99 @@ +"""This package includes all the modules related to data loading and preprocessing + + To add a custom dataset class called 'dummy', you need to add a file called 'dummy_dataset.py' and define a subclass 'DummyDataset' inherited from BaseDataset. + You need to implement four functions: + -- <__init__>: initialize the class, first call BaseDataset.__init__(self, opt). + -- <__len__>: return the size of dataset. + -- <__getitem__>: get a data point from data loader. + -- : (optionally) add dataset-specific options and set default options. + +Now you can use the dataset class by specifying flag '--dataset_mode dummy'. +See our template dataset class 'template_dataset.py' for more details. +""" +import importlib +import torch.utils.data +from landmark2face_wy.data.base_dataset import BaseDataset + + +def find_dataset_using_name(dataset_name): + """Import the module "data/[dataset_name]_dataset.py". + + In the file, the class called DatasetNameDataset() will + be instantiated. It has to be a subclass of BaseDataset, + and it is case-insensitive. + """ + dataset_filename = "landmark2face_wy.data." + dataset_name + "_dataset" + datasetlib = importlib.import_module(dataset_filename) + + dataset = None + target_dataset_name = dataset_name.replace('_', '') + 'dataset' + for name, cls in datasetlib.__dict__.items(): + if name.lower() == target_dataset_name.lower() \ + and issubclass(cls, BaseDataset): + dataset = cls + + if dataset is None: + raise NotImplementedError("In %s.py, there should be a subclass of BaseDataset with class name that matches %s in lowercase." % (dataset_filename, target_dataset_name)) + + return dataset + + +def get_option_setter(dataset_name): + """Return the static method of the dataset class.""" + dataset_class = find_dataset_using_name(dataset_name) + return dataset_class.modify_commandline_options + + +def create_dataset(opt, mode='train'): + """Create a dataset given the option. + + This function wraps the class CustomDatasetDataLoader. + This is the main interface between this package and 'train.py'/'test.py' + + Example: + >>> from data import create_dataset + >>> dataset = create_dataset(opt) + """ + data_loader = CustomDatasetDataLoader(opt, mode) + dataset = data_loader.load_data() + return dataset + + +class CustomDatasetDataLoader(): + """Wrapper class of Dataset class that performs multi-threaded data loading""" + + def __init__(self, opt, mode): + """Initialize this class + + Step 1: create a dataset instance given the name [dataset_mode] + Step 2: create a multi-threaded data loader. + """ + self.opt = opt + dataset_class = find_dataset_using_name(opt.dataset_mode) + self.dataset = dataset_class(opt, mode) + print("dataset [%s] was created" % type(self.dataset).__name__) + if mode == 'test': + batchsize = opt.batch_size // 2 + else: + batchsize = opt.batch_size + print(opt.batch_size) + if not opt.distributed: + self.dataloader = torch.utils.data.DataLoader(self.dataset,batch_size=batchsize, + shuffle=not opt.serial_batches,num_workers=int(opt.num_threads)) + else: + self.train_sampler = torch.utils.data.distributed.DistributedSampler(self.dataset) ### 数据切分 + self.dataloader = torch.utils.data.DataLoader(self.dataset, batch_size=batchsize, sampler=self.train_sampler, num_workers=int(opt.num_threads), pin_memory=True) + + def load_data(self): + return self + + def __len__(self): + """Return the number of data in the dataset""" + return min(len(self.dataset), self.opt.max_dataset_size) + + def __iter__(self): + """Return a batch of data""" + for i, data in enumerate(self.dataloader): + if i * self.opt.batch_size >= self.opt.max_dataset_size: + break + yield data diff --git a/landmark2face_wy/data/base_dataset.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/data/base_dataset.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..fc6c4ca8f30d9105f63cd455086fc2807b39fed9 --- /dev/null +++ b/landmark2face_wy/data/base_dataset.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9f9eca68615a251926ce113af4594a8dd1f50644c66be50ff5ab27020569c89 +size 1093920 diff --git a/landmark2face_wy/data/image_folder.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/data/image_folder.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..cf8ba59170e04c0c1e9acc574d8f5104b2ecaf59 --- /dev/null +++ b/landmark2face_wy/data/image_folder.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:180bb0b0dc195aa073049a4c7630e071577f5607bbb3bd2c8247468ec84c7f6c +size 860856 diff --git a/landmark2face_wy/data/l2faceaudio512_dataset.py b/landmark2face_wy/data/l2faceaudio512_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..060bf5e086923b20b1e4fb631ae6ef09dbc3f03b --- /dev/null +++ b/landmark2face_wy/data/l2faceaudio512_dataset.py @@ -0,0 +1,189 @@ +import os.path +import random +from data.base_dataset import BaseDataset, get_params, get_transform +import torchvision.transforms as transforms +from data.image_folder import make_dataset +from PIL import Image, ImageEnhance +import numpy as np +import cv2 +import torch + + +def get_idts(config_name): + idts = list() + with open(os.path.join('../config', config_name + '.txt')) as f: + for line in f: + line = line.strip() + idts.append(line) + return idts + + +class L2FaceAudio512Dataset(BaseDataset): + def __init__(self, opt, mode=None): + BaseDataset.__init__(self, opt) + img_size = opt.img_size + idts = get_idts(opt.name.split('_')[0]) + print("---------load data list--------: ", idts) + if mode == 'train': + self.labels = [] + for idt_name in idts: + # root = '../AnnVI/feature/{}'.format(idt_name) + root = os.path.join(opt.feature_path, idt_name) + if opt.audio_feature == "mfcc": + training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode)) + else: + training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature)) + training_data = torch.load(training_data_path) + img_paths = training_data['img_paths'] + audio_features = training_data['audio_features'] + index = [i[0].split('/')[-1] for i in img_paths] + + image_dir = '{}/{}_dlib_crop'.format(root, img_size) + # label_dir = '{}/512_landmark_crop'.format(root) + + # if 'man' in opt.name: + # imgs.sort(key=lambda x:int(x.split('.')[0])) + # else: + # imgs.sort(key=lambda x: (int(x.split('.')[0].split('-')[0]), int(x.split('.')[0].split('-')[1]))) + for img in range(len(index)): + img_path = os.path.join(image_dir, index[img]) + audio_feature = audio_features[img] + self.labels.append([img_path, audio_feature]) + # transforms.Resize([img_size, img_size], Image.BICUBIC), + self.transforms_image = transforms.Compose([transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) + # transforms.Resize([img_size, img_size], Image.BICUBIC), + self.transforms_label = transforms.Compose([transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) + self.shuffle() + elif mode == 'test': + self.labels = [] + for idt_name in idts: + # root = '../AnnVI/feature/{}'.format(idt_name) + root = os.path.join(opt.feature_path, idt_name) + if opt.audio_feature == "mfcc": + training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode)) + else: + training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature)) + training_data = torch.load(training_data_path) + img_paths = training_data['img_paths'] + audio_features = training_data['audio_features'] + index = [i[0].split('/')[-1] for i in img_paths] + + image_dir = '{}/{}_dlib_crop'.format(root, img_size) + # label_dir = '{}/512_landmark_crop'.format(root) + + # if 'man' in opt.name: + # imgs.sort(key=lambda x:int(x.split('.')[0])) + # else: + # imgs.sort(key=lambda x: (int(x.split('.')[0].split('-')[0]), int(x.split('.')[0].split('-')[1]))) + for img in range(len(index)): + img_path = os.path.join(image_dir, index[img]) + audio_feature = audio_features[img] + self.labels.append([img_path, audio_feature]) + # transforms.Resize([img_size, img_size], Image.BICUBIC), + self.transforms_image = transforms.Compose([transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) + # transforms.Resize([img_size, img_size], Image.BICUBIC), + self.transforms_label = transforms.Compose([transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) + self.shuffle() + + def shuffle(self): + random.shuffle(self.labels) + + def add_mouth_mask2(self, img): + mask = np.ones_like(img) + rect_area = [img.shape[1] // 2 - np.random.randint(50, 60), np.random.randint(226, 246), 30, 256 - 30] + mask_rect_area = mask[rect_area[0]: rect_area[1], rect_area[2]:rect_area[3]] + x = np.tile(np.arange(rect_area[1] - rect_area[0])[:, np.newaxis], (1, rect_area[3] - rect_area[2])) + x = np.flip(x, 0) + y = np.tile(np.arange(rect_area[3] - rect_area[2])[:, np.newaxis], (1, rect_area[1] - rect_area[0])).transpose() + zz1 = -y - x + 88 > 0 + zz2 = np.flip(zz1, 1) + zz = (zz1 + zz2) > 0 + mask[rect_area[0]:rect_area[1], rect_area[2]:rect_area[3]] = np.tile(zz[:, :, np.newaxis], (1, 1, 3)) * 1 + imgm = img * mask + return imgm + + def __getitem__(self, index): + cv2.setNumThreads(0) + img_path, audio_feature = self.labels[index] + img = np.array(Image.open(img_path).convert('RGB')) + img = np.array(np.clip(img + np.random.randint(-20, 20, size=3, dtype='int8'), 0, 255), dtype='uint8') + cut_pad1 = np.random.randint(0, 20) + cut_pad2 = np.random.randint(0, 20) + img = img[cut_pad1:512 + cut_pad1, cut_pad2:512 + cut_pad2] + + ####椭圆mask遮住衣领##### + ''' + mask = np.zeros(img.shape, dtype=np.uint8) + cv2.ellipse(mask, (img.shape[1] // 2, img.shape[0] // 2 - 160 - cut_pad1), (img.shape[1] // 2 + 10, img.shape[0]), 0, 0, 360, (255, 255, 255), -1) + ''' + ####mask遮眼睛##### + mask = np.ones(img.shape, dtype=np.uint8) * 255 + mask[40 - cut_pad1:140 - cut_pad1, 110 - cut_pad2:-110 - cut_pad2] = 0 + img = cv2.bitwise_and(img, mask) + + mask_B = img.copy() + mask_B = cv2.resize(mask_B, (256, 256)) + ##########脖子分割加mask############# + # img_edge = cv2.imread(img_path.replace("dlib_crop", "dlib_crop_neck")) + # img_edge = img_edge[cut_pad1:256 + cut_pad1, cut_pad2:256 + cut_pad2] + # mask_B = cv2.bitwise_and(img, 255 - img_edge) + # img_edge[:128, :, :] = img[:128, :, :] + + ##########增加脖子椭圆mask############# + ''' + maske = np.zeros(img.shape, dtype=np.uint8) + cv2.ellipse(maske, (img.shape[1] // 2, img.shape[0] // 2 + 50), + (img.shape[1] // 4 + np.random.randint(-5, 5), img.shape[0] // 3 + np.random.randint(-10, 10)), + 0, 0, 360, (255, 255, 255), -1) + maske[:img.shape[0] // 2, :, :] = 0 + mask_B = cv2.bitwise_and(mask_B, 255-maske) + ''' + ##########之前老的矩形mask############# + mask_end = np.random.randint(236, 256) + mask_B[mask_B.shape[1] // 2 - np.random.randint(40, 50):mask_end, 30:-30] = 0 + ##########之前老的矩形mask############# + ##########蔡星宇三角mask############# + # mask_B = self.add_mouth_mask2(mask_B) + ##########蔡星宇三角mask############# + # mask_B[mask_B.shape[1] // 2 - 50:, 30:-30] = 0 + img = Image.fromarray(img) + mask_B = Image.fromarray(mask_B) + img = self.transforms_image(img) + mask_B = self.transforms_image(mask_B) + # lab = Image.open(lab_path).convert('RGB') + # lab = self.transforms_label(lab) + audio = np.zeros((256, 256), dtype=np.float32) + audio_feature = np.array(audio_feature) + audio[:audio_feature.shape[0], :audio_feature.shape[1]] = audio_feature + audio = torch.tensor([audio]) + + imgA_path, _ = random.sample(self.labels, 1)[0] + imgA = np.array(Image.open(imgA_path).convert('RGB')) + cut_pad1 = np.random.randint(0, 20) + cut_pad2 = np.random.randint(0, 20) + imgA = imgA[cut_pad1:512 + cut_pad1, cut_pad2:512 + cut_pad2] + # mask = np.ones(imgA.shape, dtype=np.uint8) * 255 + # mask[40 - cut_pad1:140 - cut_pad1, 110 - cut_pad2:-110 - cut_pad2] = 0 + imgA = cv2.bitwise_and(imgA, mask) + imgA = Image.fromarray(imgA) + imgA = self.transforms_image(imgA) + return {'A': imgA, 'A_label': audio, 'B': img, 'B_label': audio, 'mask_B': mask_B} + + def __len__(self): + """Return the total number of images in the dataset.""" + return len(self.labels) + + +if __name__ == '__main__': + from options.train_options import TrainOptions + + opt = TrainOptions().parse() + dataset = L2FaceDataset(opt) + dataset_size = len(dataset) + print(dataset_size) + for i, data in enumerate(dataset): + print(data) \ No newline at end of file diff --git a/landmark2face_wy/data/l2faceaudio_dataset.py b/landmark2face_wy/data/l2faceaudio_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..1be5a8587551eb96b7be0998e3719b0ba5987e5e --- /dev/null +++ b/landmark2face_wy/data/l2faceaudio_dataset.py @@ -0,0 +1,166 @@ +import os.path +import random +from data.base_dataset import BaseDataset, get_params, get_transform +import torchvision.transforms as transforms +from data.image_folder import make_dataset +from PIL import Image, ImageEnhance +import numpy as np +import cv2 +import torch + + +def get_idts(config_name): + idts = list() + with open(os.path.join('../config', config_name + '.txt')) as f: + for line in f: + line = line.strip() + idts.append(line) + return idts + + +class L2FaceAudioDataset(BaseDataset): + def __init__(self, opt, mode=None): + BaseDataset.__init__(self, opt) + img_size = opt.img_size + idts = get_idts(opt.name.split('_')[0]) + print("---------load data list--------: ", idts) + if mode == 'train': + self.labels = [] + for idt_name in idts: + # root = '../AnnVI/feature/{}'.format(idt_name) + root = os.path.join(opt.feature_path, idt_name) + if opt.audio_feature == "mfcc": + training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode)) + else: + training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature)) + training_data = torch.load(training_data_path) + img_paths = training_data['img_paths'] + audio_features = training_data['audio_features'] + index = [i[0].split('/')[-1] for i in img_paths] + + image_dir = '{}/{}_dlib_crop'.format(root, img_size) + # label_dir = '{}/512_landmark_crop'.format(root) + + # if 'man' in opt.name: + # imgs.sort(key=lambda x:int(x.split('.')[0])) + # else: + # imgs.sort(key=lambda x: (int(x.split('.')[0].split('-')[0]), int(x.split('.')[0].split('-')[1]))) + for img in range(len(index)): + img_path = os.path.join(image_dir, index[img]) + audio_feature = audio_features[img] + self.labels.append([img_path, audio_feature]) + # transforms.Resize([img_size, img_size], Image.BICUBIC), + self.transforms_image = transforms.Compose([transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) + # transforms.Resize([img_size, img_size], Image.BICUBIC), + self.transforms_label = transforms.Compose([transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) + self.shuffle() + elif mode == 'test': + self.labels = [] + for idt_name in idts: + # root = '../AnnVI/feature/{}'.format(idt_name) + root = os.path.join(opt.feature_path, idt_name) + if opt.audio_feature == "mfcc": + training_data_path = os.path.join(root, '{}_{}.t7'.format(img_size, mode)) + else: + training_data_path = os.path.join(root, '{}_{}_{}.t7'.format(img_size, mode, opt.audio_feature)) + training_data = torch.load(training_data_path) + img_paths = training_data['img_paths'] + audio_features = training_data['audio_features'] + index = [i[0].split('/')[-1] for i in img_paths] + + image_dir = '{}/{}_dlib_crop'.format(root, img_size) + # label_dir = '{}/512_landmark_crop'.format(root) + + # if 'man' in opt.name: + # imgs.sort(key=lambda x:int(x.split('.')[0])) + # else: + # imgs.sort(key=lambda x: (int(x.split('.')[0].split('-')[0]), int(x.split('.')[0].split('-')[1]))) + for img in range(len(index)): + img_path = os.path.join(image_dir, index[img]) + audio_feature = audio_features[img] + self.labels.append([img_path, audio_feature]) + # transforms.Resize([img_size, img_size], Image.BICUBIC), + self.transforms_image = transforms.Compose([transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) + # transforms.Resize([img_size, img_size], Image.BICUBIC), + self.transforms_label = transforms.Compose([transforms.ToTensor(), + transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) + self.shuffle() + + def shuffle(self): + random.shuffle(self.labels) + + def add_mouth_mask2(self, img): + mask = np.ones_like(img) + rect_area = [img.shape[1] // 2 - 60, np.random.randint(226, 246), 30, 256 - 30] + mask_rect_area = mask[rect_area[0]: rect_area[1], rect_area[2]:rect_area[3]] + x = np.tile(np.arange(rect_area[1] - rect_area[0])[:, np.newaxis], (1, rect_area[3] - rect_area[2])) + x = np.flip(x, 0) + y = np.tile(np.arange(rect_area[3] - rect_area[2])[:, np.newaxis], (1, rect_area[1] - rect_area[0])).transpose() + zz1 = -y - x + 88 > 0 + zz2 = np.flip(zz1, 1) + zz = (zz1 + zz2) > 0 + mask[rect_area[0]:rect_area[1], rect_area[2]:rect_area[3]] = np.tile(zz[:, :, np.newaxis], (1, 1, 3)) * 1 + imgm = img * mask + return imgm + + def __getitem__(self, index): + cv2.setNumThreads(0) + img_path, audio_feature = self.labels[index] + img = np.array(Image.open(img_path).convert('RGB')) + img = np.array(np.clip(img + np.random.randint(-20, 20, size=3, dtype='int8'), 0, 255), dtype='uint8') + cut_pad1 = np.random.randint(0, 10) + cut_pad2 = np.random.randint(0, 10) + img = img[cut_pad1:256 + cut_pad1, cut_pad2:256 + cut_pad2] + + ####mask遮眼睛##### + mask = np.ones(img.shape, dtype=np.uint8) * 255 + mask[20 - cut_pad1:70 - cut_pad1, 55 - cut_pad2:-55 - cut_pad2] = 0 + img = cv2.bitwise_and(img, mask) + + mask_B = img.copy() + mask_end = np.random.randint(236, 256) + ##########之前老的矩形mask############# + mask_B[mask_B.shape[1] // 2 - np.random.randint(40, 50):mask_end, 30:-30] = 0 + ##########之前老的矩形mask############# + ##########蔡星宇三角mask############# + # mask_B = self.add_mouth_mask2(mask_B) + ##########蔡星宇三角mask############# + # mask_B[mask_B.shape[1] // 2 - 50:, 30:-30] = 0 + img = Image.fromarray(img) + mask_B = Image.fromarray(mask_B) + img = self.transforms_image(img) + mask_B = self.transforms_image(mask_B) + # lab = Image.open(lab_path).convert('RGB') + # lab = self.transforms_label(lab) + audio = np.zeros((256, 256), dtype=np.float32) + audio_feature = np.array(audio_feature) + audio[:audio_feature.shape[0], :audio_feature.shape[1]] = audio_feature + audio = torch.tensor([audio]) + + imgA_path, _ = random.sample(self.labels, 1)[0] + imgA = np.array(Image.open(imgA_path).convert('RGB')) + cut_pad1 = np.random.randint(0, 10) + cut_pad2 = np.random.randint(0, 10) + imgA = imgA[cut_pad1:256 + cut_pad1, cut_pad2:256 + cut_pad2] + imgA = cv2.bitwise_and(imgA, mask) + imgA = Image.fromarray(imgA) + imgA = self.transforms_image(imgA) + return {'A': imgA, 'A_label': audio, 'B': img, 'B_label': audio, 'mask_B': mask_B} + + def __len__(self): + """Return the total number of images in the dataset.""" + return len(self.labels) + + +if __name__ == '__main__': + from options.train_options import TrainOptions + + opt = TrainOptions().parse() + dataset = L2FaceDataset(opt) + dataset_size = len(dataset) + print(dataset_size) + for i, data in enumerate(dataset): + print(data) \ No newline at end of file diff --git a/landmark2face_wy/digitalhuman_interface.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/digitalhuman_interface.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..737b99937dc263c3e9dc6b28d24700be619a6b4d --- /dev/null +++ b/landmark2face_wy/digitalhuman_interface.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1bc38a9e2a82a9022573da1e8326128a98a661a17d61283f3911c5ee3aa504a +size 5284104 diff --git a/landmark2face_wy/loss/__init__.py b/landmark2face_wy/loss/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/landmark2face_wy/loss/perceptual.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/loss/perceptual.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..8537e214367585168d34a351b6e3136238a24d93 --- /dev/null +++ b/landmark2face_wy/loss/perceptual.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:784d25b825d83d4634932f2628747a4f7f9f6c8bfe84610bc757131810c2e412 +size 2056248 diff --git a/landmark2face_wy/models/DINet.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/models/DINet.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..1d344846b7ac61659a74b47c355914dc89ae58d6 --- /dev/null +++ b/landmark2face_wy/models/DINet.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71be733004b4ba60a93dee4971f4e69eefe575dfc99d8e3dffc3ed160d9ba4d4 +size 10580992 diff --git a/landmark2face_wy/models/__init__.py b/landmark2face_wy/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6c740092bfde331bd18642cdc6abaf1768e3b2f1 --- /dev/null +++ b/landmark2face_wy/models/__init__.py @@ -0,0 +1,67 @@ +"""This package contains modules related to objective functions, optimizations, and network architectures. + +To add a custom model class called 'dummy', you need to add a file called 'dummy_model.py' and define a subclass DummyModel inherited from BaseModel. +You need to implement the following five functions: + -- <__init__>: initialize the class; first call BaseModel.__init__(self, opt). + -- : unpack data from dataset and apply preprocessing. + -- : produce intermediate results. + -- : calculate loss, gradients, and update network weights. + -- : (optionally) add model-specific options and set default options. + +In the function <__init__>, you need to define four lists: + -- self.loss_names (str list): specify the training losses that you want to plot and save. + -- self.model_names (str list): define networks used in our training. + -- self.visual_names (str list): specify the images that you want to display and save. + -- self.optimizers (optimizer list): define and initialize optimizers. You can define one optimizer for each network. If two networks are updated at the same time, you can use itertools.chain to group them. See cycle_gan_model.py for an usage. + +Now you can use the model class by specifying flag '--model dummy'. +See our template model class 'template_model.py' for more details. +""" + +import importlib +from landmark2face_wy.models.base_model import BaseModel + + +def find_model_using_name(model_name): + """Import the module "models/[model_name]_model.py". + + In the file, the class called DatasetNameModel() will + be instantiated. It has to be a subclass of BaseModel, + and it is case-insensitive. + """ + model_filename = "landmark2face_wy.models." + model_name + "_model" + modellib = importlib.import_module(model_filename) + model = None + target_model_name = model_name.replace('_', '') + 'model' + for name, cls in modellib.__dict__.items(): + if name.lower() == target_model_name.lower() \ + and issubclass(cls, BaseModel): + model = cls + + if model is None: + print("In %s.py, there should be a subclass of BaseModel with class name that matches %s in lowercase." % (model_filename, target_model_name)) + exit(0) + + return model + + +def get_option_setter(model_name): + """Return the static method of the model class.""" + model_class = find_model_using_name(model_name) + return model_class.modify_commandline_options + + +def create_model(opt): + """Create a model given the option. + + This function warps the class CustomDatasetDataLoader. + This is the main interface between this package and 'train.py'/'test.py' + + Example: + >>> from landmark2face_wy.models import create_model + >>> model = create_model(opt) + """ + model = find_model_using_name(opt.model) + instance = model(opt) + print("model [%s] was created" % type(instance).__name__) + return instance diff --git a/landmark2face_wy/models/base_function.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/models/base_function.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..8e20be9aac5ae561b8eb2e14b5a0634c87c43521 --- /dev/null +++ b/landmark2face_wy/models/base_function.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77518b79b14d01bc4bc57332bcad738cb9391a60a1978147138126d362d7cc70 +size 4592584 diff --git a/landmark2face_wy/models/base_model.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/models/base_model.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..76b9be7039b72ff7346333ab81e8e50be678d941 --- /dev/null +++ b/landmark2face_wy/models/base_model.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65b491eaaf49cd76c0e4685cd0a43508ad4bab0a1dc106c6ea7cc1c00ca3863f +size 1519752 diff --git a/landmark2face_wy/models/face3d2face_model.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/models/face3d2face_model.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..f5b53411d62c2d9539cf5f552f0561207df26700 --- /dev/null +++ b/landmark2face_wy/models/face3d2face_model.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d21fbec65244a28de7db1acbcd059a5c09b27e6e4d3089da57954d5536fece7 +size 3049016 diff --git a/landmark2face_wy/models/face_model.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/models/face_model.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..69d8a4241f7e9a4678fa1ba3c6f6f337d9b0e280 --- /dev/null +++ b/landmark2face_wy/models/face_model.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fe9f6e3d20cd00922e6d343365f3dfef1b2afe131c3e33d54bc648a057014cb +size 1400824 diff --git a/landmark2face_wy/models/l2faceaudio_model.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/models/l2faceaudio_model.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..b20fa5e748ba6255067c7d74e06687656f755f4b --- /dev/null +++ b/landmark2face_wy/models/l2faceaudio_model.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05bf1f0ae2cb6289d387bbe144f0ffb0b6fc14946ca3c2b7791e7763732c3ade +size 1165520 diff --git a/landmark2face_wy/models/networks.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/models/networks.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..afdaad5cf35e6d55705be5a5c1b993ce0478362c --- /dev/null +++ b/landmark2face_wy/models/networks.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d0b6c7860980c0e5f7ea959cd92df4c5f959ff7ce7480db55ec63a82660c662 +size 3533456 diff --git a/landmark2face_wy/models/networks_HD.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/models/networks_HD.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..a295d7115b5719429a01c45c4c961bdabb339ad8 --- /dev/null +++ b/landmark2face_wy/models/networks_HD.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e38ddd0e588ed94a3fc805f98ca00a18f38a738370f2a250da7bc1f52821402 +size 5950296 diff --git a/landmark2face_wy/models/networks_pix2pixHD.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/models/networks_pix2pixHD.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..14e0beb5901e1c18f1f831117b44f8ecae34474d --- /dev/null +++ b/landmark2face_wy/models/networks_pix2pixHD.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02d32e66441f9b49d1251855626fde459c253a7efbb84b041004ad583528e323 +size 12729352 diff --git a/landmark2face_wy/models/pirender_3dmm_mouth_hd_model.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/models/pirender_3dmm_mouth_hd_model.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..4b52d50ec8827e71664f25da8b75b6438d59c101 --- /dev/null +++ b/landmark2face_wy/models/pirender_3dmm_mouth_hd_model.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dc4ad4b811689cffe59a4a8ac0dddf96527c36e4e191acdfff70d9f8f70f699 +size 3121488 diff --git a/landmark2face_wy/models/pirender_3dmm_mouth_hdv2_model.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/models/pirender_3dmm_mouth_hdv2_model.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..f48d95b8d0dbb4ad222ae88ff53b364e7c6bf4c4 --- /dev/null +++ b/landmark2face_wy/models/pirender_3dmm_mouth_hdv2_model.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3f9d8c05b21d9b50748ecb8b1a4acd276fbe4f09f934b2f0c19b822e5696792 +size 3119104 diff --git a/landmark2face_wy/options/__init__.py b/landmark2face_wy/options/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e7eedebe54aa70169fd25951b3034d819e396c90 --- /dev/null +++ b/landmark2face_wy/options/__init__.py @@ -0,0 +1 @@ +"""This package options includes option modules: training options, test options, and basic options (used in both training and test).""" diff --git a/landmark2face_wy/options/base_options.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/options/base_options.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..65775edfb7a76db412c5d37b0f2840c2b60323a7 --- /dev/null +++ b/landmark2face_wy/options/base_options.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03aaa24b77f771ec2d43299d549d837b519ed78856c7cecd93f5f20fad70da0a +size 1831608 diff --git a/landmark2face_wy/options/test_options.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/options/test_options.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..3dc7fa192ee9833a89275e7685385d54f8da708d --- /dev/null +++ b/landmark2face_wy/options/test_options.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fcdacdf5f87f5e021452fcb1701e1de6f41d05fe19aa81325e56d147ff72ffa +size 418552 diff --git a/landmark2face_wy/options/train_options.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/options/train_options.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..f19b0e4b38ab1e3851bd4cbc819d67dce335f05c --- /dev/null +++ b/landmark2face_wy/options/train_options.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:697c5d54710eeb5378eaa12dededefa102c5fd1a3d6504e4dd3747222d83164a +size 500864 diff --git a/landmark2face_wy/sync_batchnorm/__init__.py b/landmark2face_wy/sync_batchnorm/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..6d9b36c74b1808b56ded68cf080a689db7e0ee4e --- /dev/null +++ b/landmark2face_wy/sync_batchnorm/__init__.py @@ -0,0 +1,14 @@ +# -*- coding: utf-8 -*- +# File : __init__.py +# Author : Jiayuan Mao +# Email : maojiayuan@gmail.com +# Date : 27/01/2018 +# +# This file is part of Synchronized-BatchNorm-PyTorch. +# https://github.com/vacancy/Synchronized-BatchNorm-PyTorch +# Distributed under MIT License. + +from .batchnorm import set_sbn_eps_mode +from .batchnorm import SynchronizedBatchNorm1d, SynchronizedBatchNorm2d, SynchronizedBatchNorm3d +from .batchnorm import patch_sync_batchnorm, convert_model +from .replicate import DataParallelWithCallback, patch_replication_callback diff --git a/landmark2face_wy/sync_batchnorm/batchnorm.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/sync_batchnorm/batchnorm.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..8b1507d167cb21a19dfb99a2b9cd32e9cdfd56c2 --- /dev/null +++ b/landmark2face_wy/sync_batchnorm/batchnorm.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:665e6144d64cca7bfe5072dc9a211e13349ef285253cd6a57ff6eaf56d274f5d +size 2167608 diff --git a/landmark2face_wy/sync_batchnorm/batchnorm_reimpl.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/sync_batchnorm/batchnorm_reimpl.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..3e3777efa28fe25b93e9724fa404219e97a955da --- /dev/null +++ b/landmark2face_wy/sync_batchnorm/batchnorm_reimpl.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fa6d7a220684fac32aaa7a14c68145ddcbd781777a3c7a0240db6ecf0f98f23 +size 931240 diff --git a/landmark2face_wy/sync_batchnorm/comm.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/sync_batchnorm/comm.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..601839b71900478e98445b613f1c266a60d95c7f --- /dev/null +++ b/landmark2face_wy/sync_batchnorm/comm.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:980f9ffc65140759448eb3448c68a419a2be0c4b93d41edc062c9d50d08f3beb +size 909744 diff --git a/landmark2face_wy/sync_batchnorm/replicate.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/sync_batchnorm/replicate.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..7989bd825ea52c40a50f28f50f833ca388e9093f --- /dev/null +++ b/landmark2face_wy/sync_batchnorm/replicate.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11b8c7903814a08d26a000a790445e33f80041c4d0955bcd6472cfe39bc90b01 +size 557920 diff --git a/landmark2face_wy/sync_batchnorm/unittest.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/sync_batchnorm/unittest.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..869bfe21ae394595f62e0c52cb7b055f029fbfe8 --- /dev/null +++ b/landmark2face_wy/sync_batchnorm/unittest.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6319f7a4a2c6822c673223c080e98457f9014d86ec7027adaf69d382abb7200 +size 427336 diff --git a/landmark2face_wy/test_3dmm_multi_exp_wenet.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/test_3dmm_multi_exp_wenet.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..0d5d253d29365548277f76572503ed9fbe444fc2 --- /dev/null +++ b/landmark2face_wy/test_3dmm_multi_exp_wenet.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d10d5f1833370bffe3c5348eedd2b8c16c9034cc7d58467dccd7cd1320b1c349 +size 1978808 diff --git a/landmark2face_wy/test_3dmm_multi_exp_wenet0.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/test_3dmm_multi_exp_wenet0.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..95c6c071c258bfdeb66770090d8c55592ff390ca --- /dev/null +++ b/landmark2face_wy/test_3dmm_multi_exp_wenet0.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec3590c44f8e988965c904fada8a42edffad31346e33b6e8ad1a3168e020ac0a +size 1932008 diff --git a/landmark2face_wy/util/__init__.py b/landmark2face_wy/util/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ae36f63d8859ec0c60dcbfe67c4ac324e751ddf7 --- /dev/null +++ b/landmark2face_wy/util/__init__.py @@ -0,0 +1 @@ +"""This package includes a miscellaneous collection of useful helper functions.""" diff --git a/landmark2face_wy/util/flow_util.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/util/flow_util.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..2e137f8a3088dd91286049869da222a3576d2477 --- /dev/null +++ b/landmark2face_wy/util/flow_util.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe5f28f3f8e56f11ca64a356cc4e05d3d986156ec9ec6c44e136a2d947deece4 +size 647432 diff --git a/landmark2face_wy/util/get_data.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/util/get_data.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..f4550eeb873075ebf27966cbdb4948f2c95dd7bc --- /dev/null +++ b/landmark2face_wy/util/get_data.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbc71c56031e8980a02c8f3d87a34a049532e5b1150f6ddd79e8ef8cf6d6a0c0 +size 906216 diff --git a/landmark2face_wy/util/html.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/util/html.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..10442f42c15cf124c4fb17c9400fb92d0c63e611 --- /dev/null +++ b/landmark2face_wy/util/html.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b904436c9878cd81d9bf94d7f900a99806dfa6fb2837c5e011fd1531df352e99 +size 1575136 diff --git a/landmark2face_wy/util/image_pool.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/util/image_pool.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..fc9da5f9f85e1db9e79b5ba41d61b08ad91a874d --- /dev/null +++ b/landmark2face_wy/util/image_pool.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f05925441e2483e4b8aaa81de93d5ce302875bbf4d11533295994f57b7d467cf +size 459208 diff --git a/landmark2face_wy/util/util.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/util/util.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..6cd522eb51ef320a9b102674e74c58a18b8a2fb7 --- /dev/null +++ b/landmark2face_wy/util/util.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c57e9889e8bcb49b289f7ac8c2167265d044ce74916f3d4c6d09b72c8d1cd8ff +size 757944 diff --git a/landmark2face_wy/util/visualizer.cpython-38-x86_64-linux-gnu.so b/landmark2face_wy/util/visualizer.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..1388dcec9b48678e11418152ca46776443f74752 --- /dev/null +++ b/landmark2face_wy/util/visualizer.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3289991e476fea0132803f7e9489b7c327152f7f67ab247066822da13f1668b2 +size 3234024 diff --git a/license.txt b/license.txt new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/log/dh.log b/log/dh.log new file mode 100644 index 0000000000000000000000000000000000000000..cfa73460aa98f768c833161992954afcb43c5872 --- /dev/null +++ b/log/dh.log @@ -0,0 +1,186 @@ +[2025-03-18 12:50:40,644] [run.py[line:153]] [INFO] [TransDhTask init] +[2025-03-18 12:50:41,729] [run.py[line:158]] [INFO] [任务:1002 -> audio_url:./temp/example/audio.wav video_url:./temp/example/video.mp4] +[2025-03-18 12:50:41,732] [run.py[line:158]] [INFO] [[1002] -> ffmpeg video: ffmpeg -loglevel warning -i ./temp/example/video.mp4 -c:v libx264 -crf 15 -an -y ./temp/1002_format.mp4] +[2025-03-18 12:50:41,790] [run.py[line:158]] [ERROR] [[1002]预处理失败,异常信息:[format video error]] +[2025-03-18 12:50:41,790] [run.py[line:158]] [ERROR] [[1002]任务执行失败,异常信息:[[1002]预处理失败,异常信息:[format video error]]] +[2025-03-18 12:50:41,791] [run.py[line:158]] [INFO] [>>> 任务:1002 耗时:0.06167912483215332 ] +[2025-03-18 12:50:57,817] [run.py[line:143]] [INFO] [TransDhTask init] +[2025-03-18 12:50:58,906] [run.py[line:147]] [INFO] [任务:1002 -> audio_url:./temp/example/audio.wav video_url:./temp/example/video.mp4] +[2025-03-18 12:50:58,908] [run.py[line:147]] [INFO] [[1002] -> ffmpeg video: ffmpeg -loglevel warning -i ./temp/example/video.mp4 -c:v libx264 -crf 15 -an -y ./temp/1002_format.mp4] +[2025-03-18 12:50:58,964] [run.py[line:147]] [ERROR] [[1002]预处理失败,异常信息:[format video error]] +[2025-03-18 12:50:58,965] [run.py[line:147]] [ERROR] [[1002]任务执行失败,异常信息:[[1002]预处理失败,异常信息:[format video error]]] +[2025-03-18 12:50:58,966] [run.py[line:147]] [INFO] [>>> 任务:1002 耗时:0.059505462646484375 ] +[2025-03-18 12:52:06,385] [run.py[line:143]] [INFO] [TransDhTask init] +[2025-03-18 12:52:07,560] [run.py[line:147]] [INFO] [任务:1002 -> audio_url:./example/audio.wav video_url:./example/video.mp4] +[2025-03-18 12:52:07,646] [run.py[line:147]] [INFO] [[1002] -> ffmpeg video: ffmpeg -loglevel warning -i ./example/video.mp4 -crf 15 -vcodec copy -an -y ./1002_format.mp4] +[2025-03-18 12:52:07,801] [run.py[line:147]] [INFO] [[1002] -> ffmpeg audio: ffmpeg -loglevel warning -i ./example/audio.wav -ac 1 -ar 16000 -acodec pcm_s16le -y ./1002_format.wav] +[2025-03-18 12:52:07,922] [run.py[line:147]] [INFO] [[1002] -> 预处理耗时:0.35927414894104004s] +[2025-03-18 12:52:10,169] [run.py[line:147]] [INFO] [[1002] -> get_aud_feat1 cost:2.245649576187134s] +[2025-03-18 12:52:11,702] [process.py[line:108]] [INFO] [>>> init_wh_process进程启动] +[2025-03-18 12:52:20,087] [process.py[line:108]] [INFO] [[1002]init_wh result :[0.8809176216714891], cost: 8.382684469223022 s] +[2025-03-18 12:52:20,090] [run.py[line:147]] [INFO] [[1002] -> wh: [0.8809176216714891]] +[2025-03-18 12:52:21,453] [process.py[line:108]] [INFO] [>>> 数字人图片处理进程启动] +[2025-03-18 12:52:24,015] [process.py[line:108]] [INFO] [[1002]任务视频驱动队列启动 batch_size:4, len:150] +[2025-03-18 12:52:24,050] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 开始循环] +[2025-03-18 12:52:24,085] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:4] +[2025-03-18 12:52:24,112] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:8] +[2025-03-18 12:52:24,122] [process.py[line:108]] [INFO] [>>> audio_transfer get message:4] +[2025-03-18 12:52:24,139] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:12] +[2025-03-18 12:52:24,148] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:16] +[2025-03-18 12:52:24,161] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:20] +[2025-03-18 12:52:24,173] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:24] +[2025-03-18 12:52:24,185] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:28] +[2025-03-18 12:52:24,197] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:32] +[2025-03-18 12:52:24,208] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:36] +[2025-03-18 12:52:24,222] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:40] +[2025-03-18 12:52:24,232] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:44] +[2025-03-18 12:52:25,722] [process.py[line:108]] [INFO] [[1002] -> frame_id:[4] 模糊置信度:[0.969]] +[2025-03-18 12:52:25,723] [process.py[line:108]] [INFO] [[1002] -> need chaofen .] +[2025-03-18 12:52:25,905] [utils.py[line:145]] [INFO] [Note: detected 72 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable.] +[2025-03-18 12:52:25,906] [utils.py[line:148]] [INFO] [Note: NumExpr detected 72 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.] +[2025-03-18 12:52:25,907] [utils.py[line:160]] [INFO] [NumExpr defaulting to 8 threads.] +[2025-03-18 12:52:26,083] [process.py[line:108]] [INFO] [[4] -> chaofen cost:1.9595112800598145s] +[2025-03-18 12:52:31,071] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:4, cost:6.948575258255005s] +[2025-03-18 12:52:31,116] [process.py[line:108]] [INFO] [>>> audio_transfer get message:8] +[2025-03-18 12:52:31,126] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:48] +[2025-03-18 12:52:31,347] [process.py[line:108]] [INFO] [[8] -> chaofen cost:0.2294461727142334s] +[2025-03-18 12:52:31,576] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:8, cost:0.45979762077331543s] +[2025-03-18 12:52:31,605] [process.py[line:108]] [INFO] [>>> audio_transfer get message:12] +[2025-03-18 12:52:31,615] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:52] +[2025-03-18 12:52:31,818] [process.py[line:108]] [INFO] [[12] -> chaofen cost:0.21271824836730957s] +[2025-03-18 12:52:32,036] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:12, cost:0.43187427520751953s] +[2025-03-18 12:52:32,060] [process.py[line:108]] [INFO] [>>> audio_transfer get message:16] +[2025-03-18 12:52:32,072] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:56] +[2025-03-18 12:52:32,279] [process.py[line:108]] [INFO] [[16] -> chaofen cost:0.21899199485778809s] +[2025-03-18 12:52:32,530] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:16, cost:0.47049522399902344s] +[2025-03-18 12:52:32,552] [process.py[line:108]] [INFO] [>>> audio_transfer get message:20] +[2025-03-18 12:52:32,567] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:60] +[2025-03-18 12:52:32,766] [process.py[line:108]] [INFO] [[20] -> chaofen cost:0.21334147453308105s] +[2025-03-18 12:52:32,993] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:20, cost:0.4411466121673584s] +[2025-03-18 12:52:33,015] [process.py[line:108]] [INFO] [>>> audio_transfer get message:24] +[2025-03-18 12:52:33,028] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:64] +[2025-03-18 12:52:33,229] [process.py[line:108]] [INFO] [[24] -> chaofen cost:0.21344351768493652s] +[2025-03-18 12:52:33,457] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:24, cost:0.44205546379089355s] +[2025-03-18 12:52:33,479] [process.py[line:108]] [INFO] [>>> audio_transfer get message:28] +[2025-03-18 12:52:33,493] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:68] +[2025-03-18 12:52:33,697] [process.py[line:108]] [INFO] [[28] -> chaofen cost:0.21679949760437012s] +[2025-03-18 12:52:33,924] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:28, cost:0.4448537826538086s] +[2025-03-18 12:52:33,946] [process.py[line:108]] [INFO] [>>> audio_transfer get message:32] +[2025-03-18 12:52:33,960] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:72] +[2025-03-18 12:52:34,159] [process.py[line:108]] [INFO] [[32] -> chaofen cost:0.21156740188598633s] +[2025-03-18 12:52:34,381] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:32, cost:0.43474769592285156s] +[2025-03-18 12:52:34,403] [process.py[line:108]] [INFO] [>>> audio_transfer get message:36] +[2025-03-18 12:52:34,417] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:76] +[2025-03-18 12:52:34,618] [process.py[line:108]] [INFO] [[36] -> chaofen cost:0.21408891677856445s] +[2025-03-18 12:52:34,844] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:36, cost:0.4406392574310303s] +[2025-03-18 12:52:34,867] [process.py[line:108]] [INFO] [>>> audio_transfer get message:40] +[2025-03-18 12:52:34,881] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:80] +[2025-03-18 12:52:35,099] [process.py[line:108]] [INFO] [[40] -> chaofen cost:0.23105645179748535s] +[2025-03-18 12:52:35,328] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:40, cost:0.46161866188049316s] +[2025-03-18 12:52:35,350] [process.py[line:108]] [INFO] [>>> audio_transfer get message:44] +[2025-03-18 12:52:35,363] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:84] +[2025-03-18 12:52:35,577] [process.py[line:108]] [INFO] [[44] -> chaofen cost:0.22576594352722168s] +[2025-03-18 12:52:35,808] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:44, cost:0.4577639102935791s] +[2025-03-18 12:52:35,832] [process.py[line:108]] [INFO] [>>> audio_transfer get message:48] +[2025-03-18 12:52:35,846] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:88] +[2025-03-18 12:52:36,047] [process.py[line:108]] [INFO] [[48] -> chaofen cost:0.21441864967346191s] +[2025-03-18 12:52:36,278] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:48, cost:0.4459846019744873s] +[2025-03-18 12:52:36,301] [process.py[line:108]] [INFO] [>>> audio_transfer get message:52] +[2025-03-18 12:52:36,315] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:92] +[2025-03-18 12:52:36,521] [process.py[line:108]] [INFO] [[52] -> chaofen cost:0.2181704044342041s] +[2025-03-18 12:52:36,777] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:52, cost:0.47586750984191895s] +[2025-03-18 12:52:36,798] [process.py[line:108]] [INFO] [>>> audio_transfer get message:56] +[2025-03-18 12:52:36,817] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:96] +[2025-03-18 12:52:37,014] [process.py[line:108]] [INFO] [[56] -> chaofen cost:0.2147221565246582s] +[2025-03-18 12:52:37,247] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:56, cost:0.4486660957336426s] +[2025-03-18 12:52:37,266] [process.py[line:108]] [INFO] [>>> audio_transfer get message:60] +[2025-03-18 12:52:37,281] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:100] +[2025-03-18 12:52:37,483] [process.py[line:108]] [INFO] [[60] -> chaofen cost:0.21598410606384277s] +[2025-03-18 12:52:37,703] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:60, cost:0.43683695793151855s] +[2025-03-18 12:52:37,722] [process.py[line:108]] [INFO] [>>> audio_transfer get message:64] +[2025-03-18 12:52:37,736] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:104] +[2025-03-18 12:52:37,941] [process.py[line:108]] [INFO] [[64] -> chaofen cost:0.2180624008178711s] +[2025-03-18 12:52:38,163] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:64, cost:0.4412345886230469s] +[2025-03-18 12:52:38,183] [process.py[line:108]] [INFO] [>>> audio_transfer get message:68] +[2025-03-18 12:52:38,197] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:108] +[2025-03-18 12:52:38,397] [process.py[line:108]] [INFO] [[68] -> chaofen cost:0.21321654319763184s] +[2025-03-18 12:52:38,637] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:68, cost:0.45404863357543945s] +[2025-03-18 12:52:38,656] [process.py[line:108]] [INFO] [>>> audio_transfer get message:72] +[2025-03-18 12:52:38,670] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:112] +[2025-03-18 12:52:38,877] [process.py[line:108]] [INFO] [[72] -> chaofen cost:0.21999263763427734s] +[2025-03-18 12:52:39,100] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:72, cost:0.4440436363220215s] +[2025-03-18 12:52:39,119] [process.py[line:108]] [INFO] [>>> audio_transfer get message:76] +[2025-03-18 12:52:39,133] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:116] +[2025-03-18 12:52:39,347] [process.py[line:108]] [INFO] [[76] -> chaofen cost:0.22693967819213867s] +[2025-03-18 12:52:39,568] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:76, cost:0.4492220878601074s] +[2025-03-18 12:52:39,586] [process.py[line:108]] [INFO] [>>> audio_transfer get message:80] +[2025-03-18 12:52:39,601] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:120] +[2025-03-18 12:52:39,801] [process.py[line:108]] [INFO] [[80] -> chaofen cost:0.21407222747802734s] +[2025-03-18 12:52:40,024] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:80, cost:0.4377562999725342s] +[2025-03-18 12:52:40,052] [process.py[line:108]] [INFO] [>>> audio_transfer get message:84] +[2025-03-18 12:52:40,068] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:124] +[2025-03-18 12:52:40,270] [process.py[line:108]] [INFO] [[84] -> chaofen cost:0.21637320518493652s] +[2025-03-18 12:52:40,494] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:84, cost:0.44118523597717285s] +[2025-03-18 12:52:40,513] [process.py[line:108]] [INFO] [>>> audio_transfer get message:88] +[2025-03-18 12:52:40,527] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:128] +[2025-03-18 12:52:40,731] [process.py[line:108]] [INFO] [[88] -> chaofen cost:0.2170412540435791s] +[2025-03-18 12:52:40,951] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:88, cost:0.4383111000061035s] +[2025-03-18 12:52:40,971] [process.py[line:108]] [INFO] [>>> audio_transfer get message:92] +[2025-03-18 12:52:40,984] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:132] +[2025-03-18 12:52:41,187] [process.py[line:108]] [INFO] [[92] -> chaofen cost:0.2148122787475586s] +[2025-03-18 12:52:41,416] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:92, cost:0.4454326629638672s] +[2025-03-18 12:52:41,439] [process.py[line:108]] [INFO] [>>> audio_transfer get message:96] +[2025-03-18 12:52:41,451] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:136] +[2025-03-18 12:52:41,663] [process.py[line:108]] [INFO] [[96] -> chaofen cost:0.222761869430542s] +[2025-03-18 12:52:41,887] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:96, cost:0.4477369785308838s] +[2025-03-18 12:52:41,906] [process.py[line:108]] [INFO] [>>> audio_transfer get message:100] +[2025-03-18 12:52:41,920] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:140] +[2025-03-18 12:52:42,123] [process.py[line:108]] [INFO] [[100] -> chaofen cost:0.21576929092407227s] +[2025-03-18 12:52:42,359] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:100, cost:0.4525878429412842s] +[2025-03-18 12:52:42,379] [process.py[line:108]] [INFO] [>>> audio_transfer get message:104] +[2025-03-18 12:52:42,394] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:144] +[2025-03-18 12:52:42,596] [process.py[line:108]] [INFO] [[104] -> chaofen cost:0.21553897857666016s] +[2025-03-18 12:52:42,836] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:104, cost:0.45633435249328613s] +[2025-03-18 12:52:42,855] [process.py[line:108]] [INFO] [>>> audio_transfer get message:108] +[2025-03-18 12:52:42,870] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据大小:[4], current_idx:148] +[2025-03-18 12:52:42,873] [process.py[line:108]] [INFO] [append imgs over] +[2025-03-18 12:52:42,879] [process.py[line:108]] [INFO] [drivered_video >>>>>>>>>>>>>>>>>>>> 发送数据结束] +[2025-03-18 12:52:43,073] [process.py[line:108]] [INFO] [[108] -> chaofen cost:0.21662592887878418s] +[2025-03-18 12:52:43,297] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:108, cost:0.4421381950378418s] +[2025-03-18 12:52:43,318] [process.py[line:108]] [INFO] [>>> audio_transfer get message:112] +[2025-03-18 12:52:43,332] [process.py[line:108]] [INFO] [[1002]任务预处理进程结束] +[2025-03-18 12:52:43,531] [process.py[line:108]] [INFO] [[112] -> chaofen cost:0.21228814125061035s] +[2025-03-18 12:52:43,791] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:112, cost:0.47336626052856445s] +[2025-03-18 12:52:43,811] [process.py[line:108]] [INFO] [>>> audio_transfer get message:116] +[2025-03-18 12:52:44,034] [process.py[line:108]] [INFO] [[116] -> chaofen cost:0.2223985195159912s] +[2025-03-18 12:52:44,262] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:116, cost:0.4509873390197754s] +[2025-03-18 12:52:44,281] [process.py[line:108]] [INFO] [>>> audio_transfer get message:120] +[2025-03-18 12:52:44,499] [process.py[line:108]] [INFO] [[120] -> chaofen cost:0.21637916564941406s] +[2025-03-18 12:52:44,742] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:120, cost:0.46120476722717285s] +[2025-03-18 12:52:44,762] [process.py[line:108]] [INFO] [>>> audio_transfer get message:124] +[2025-03-18 12:52:44,981] [process.py[line:108]] [INFO] [[124] -> chaofen cost:0.21886157989501953s] +[2025-03-18 12:52:45,240] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:124, cost:0.4781684875488281s] +[2025-03-18 12:52:45,258] [process.py[line:108]] [INFO] [>>> audio_transfer get message:128] +[2025-03-18 12:52:45,474] [process.py[line:108]] [INFO] [[128] -> chaofen cost:0.21480226516723633s] +[2025-03-18 12:52:45,708] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:128, cost:0.44920992851257324s] +[2025-03-18 12:52:45,726] [process.py[line:108]] [INFO] [>>> audio_transfer get message:132] +[2025-03-18 12:52:45,943] [process.py[line:108]] [INFO] [[132] -> chaofen cost:0.21567535400390625s] +[2025-03-18 12:52:46,181] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:132, cost:0.45519399642944336s] +[2025-03-18 12:52:46,200] [process.py[line:108]] [INFO] [>>> audio_transfer get message:136] +[2025-03-18 12:52:46,418] [process.py[line:108]] [INFO] [[136] -> chaofen cost:0.21763992309570312s] +[2025-03-18 12:52:46,662] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:136, cost:0.4619452953338623s] +[2025-03-18 12:52:46,681] [process.py[line:108]] [INFO] [>>> audio_transfer get message:140] +[2025-03-18 12:52:46,900] [process.py[line:108]] [INFO] [[140] -> chaofen cost:0.21794748306274414s] +[2025-03-18 12:52:47,146] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:140, cost:0.4646177291870117s] +[2025-03-18 12:52:47,166] [process.py[line:108]] [INFO] [>>> audio_transfer get message:144] +[2025-03-18 12:52:47,382] [process.py[line:108]] [INFO] [[144] -> chaofen cost:0.21491503715515137s] +[2025-03-18 12:52:47,619] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:144, cost:0.4536001682281494s] +[2025-03-18 12:52:47,639] [process.py[line:108]] [INFO] [>>> audio_transfer get message:148] +[2025-03-18 12:52:47,857] [process.py[line:108]] [INFO] [[148] -> chaofen cost:0.21780657768249512s] +[2025-03-18 12:52:48,098] [process.py[line:108]] [INFO] [audio_transfer >>>>>>>>>>> 发送完成数据大小:4, frameId:148, cost:0.459348201751709s] +[2025-03-18 12:52:48,104] [process.py[line:108]] [INFO] [>>> audio_transfer get exception msg:-1] +[2025-03-18 12:52:48,105] [process.py[line:108]] [INFO] [[1002]任务数字人图片处理已完成] +[2025-03-18 12:52:48,146] [run.py[line:43]] [INFO] [Custom VideoWriter [1002]视频帧队列处理已结束] +[2025-03-18 12:52:48,151] [run.py[line:46]] [INFO] [Custom VideoWriter Silence Video saved in /mnt/nfs/bj4-v100-23/data1/yubosun/git_proj/heygem/heygem_ori_so/1002-t.mp4] +[2025-03-18 12:52:48,155] [run.py[line:118]] [INFO] [Custom command:ffmpeg -loglevel warning -y -i ./example/audio.wav -i ./1002-t.mp4 -c:a aac -c:v libx264 -crf 15 -strict -2 ./1002-r.mp4] +[2025-03-18 12:53:06,908] [run.py[line:147]] [INFO] [>>> 任务:1002 耗时:59.3451771736145 ] diff --git a/model_lib/__init__.py b/model_lib/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..80148b1d9239cce56a1735f751086b34ef7e5550 --- /dev/null +++ b/model_lib/__init__.py @@ -0,0 +1,4 @@ +from .base_wrapper import ONNXModel +from .model_base import ModelBase + + diff --git a/model_lib/base_wrapper/__init__.py b/model_lib/base_wrapper/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..02a0264c7d88762e2b94c51df87f7c0d497b66ef --- /dev/null +++ b/model_lib/base_wrapper/__init__.py @@ -0,0 +1,6 @@ +# -- coding: utf-8 -- +# @Time : 2022/8/26 + + +from .onnx_model import ONNXModel + diff --git a/model_lib/base_wrapper/onnx_model.cpython-38-x86_64-linux-gnu.so b/model_lib/base_wrapper/onnx_model.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..64c758f3607a44c47e876ed3d1565ec981944a30 --- /dev/null +++ b/model_lib/base_wrapper/onnx_model.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70f6dfb7c88ef61746c89bd35a818447cd58df393d321e5ca0147a2f17cd68ba +size 1143792 diff --git a/model_lib/model_base.py b/model_lib/model_base.py new file mode 100644 index 0000000000000000000000000000000000000000..3056664d290d3fa0c292fc717e1f125b08d90766 --- /dev/null +++ b/model_lib/model_base.py @@ -0,0 +1,51 @@ +# -- coding: utf-8 -- +# @Time : 2022/7/29 + + + +from .base_wrapper import ONNXModel +from pathlib import Path + + +try: + from .base_wrapper import TRTWrapper, TRTWrapperSelf +except: + pass + + +# from cv2box.utils import try_import + +class ModelBase: + def __init__(self, model_info, provider): + self.model_path = model_info['model_path'] + + if 'input_dynamic_shape' in model_info.keys(): + self.input_dynamic_shape = model_info['input_dynamic_shape'] + else: + self.input_dynamic_shape = None + + if 'picklable' in model_info.keys(): + picklable = model_info['picklable'] + else: + picklable = False + + if 'trt_wrapper_self' in model_info.keys(): + TRTWrapper = TRTWrapperSelf + + # init model + if Path(self.model_path).suffix == '.engine': + self.model_type = 'trt' + self.model = TRTWrapper(self.model_path) + elif Path(self.model_path).suffix == '.tjm': + self.model_type = 'tjm' + self.model = TJMWrapper(self.model_path, provider=provider) + elif Path(self.model_path).suffix in ['.onnx', '.bin']: + self.model_type = 'onnx' + if not picklable: + if 'encrypt' in model_info.keys(): + self.model_path = load_encrypt_model(self.model_path, key=model_info['encrypt']) + self.model = ONNXModel(self.model_path, provider=provider, input_dynamic_shape=self.input_dynamic_shape) + else: + self.model = OnnxModelPickable(self.model_path, provider=provider, ) + else: + raise 'check model suffix , support engine/tjm/onnx now.' diff --git a/pack/packaging_script.py b/pack/packaging_script.py new file mode 100644 index 0000000000000000000000000000000000000000..45dce18a9618d5ce413b98712a4b7d1e626c09e7 --- /dev/null +++ b/pack/packaging_script.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +from distutils.core import setup +from Cython.Build import cythonize +from distutils.extension import Extension +import shutil +import os +from multiprocessing import Pool + + + + +def packaged_search(path, directory_file=None): + ''' + 遍历当前目录下文件及文件夹 + :param path: + :param directory_file: + :return: + ''' + if directory_file: + for i in os.listdir(path): + if i == directory_file: + path = os.path.join(path, directory_file) + pack_so(path) + elif os.path.isdir(os.path.join(path, i)): + packaged_search(os.path.join(path, i), directory_file) + else: + pack_so(path) + + +def pack_so(path): + ''' + 递归遍历所有文件夹,并创建进程池,将任务放入进程 + :param path: + :return: + ''' + all_file_path = [] + for i in os.listdir(path): + all_file_path.append(os.path.join(path, i)) + # 创建进程池 + p = Pool(8) + for j in all_file_path: + p.apply_async(pack_to_so_and_del_src, args=(j, )) + p.close() + p.join() + for g in all_file_path: + # 是文件夹递归 + if os.path.isdir(os.path.join(g)): + pack_so(g) + + +def pack_to_so_and_del_src(path): + ''' + 将需要打包的.py脚本进行打包 + :param path: + :return: + ''' + if '.py' in path and '.pyc' not in path and '__init__.py' not in path: + setup( + ext_modules=cythonize(Extension(path.rsplit('/', 1)[1].rsplit('.', 1)[0], [path])), + compiler_directives={'language_level': 3} + ) + # path_os = os.getcwd().rsplit('/', 1)[0] + '/pack/build/lib.linux-x86_64-3.6' # TODO + path_os = os.getcwd().rsplit('/', 1)[0] + '/pack/build/lib.linux-x86_64-3.8' + for j in os.listdir(path_os): + # 将打好的包放入原文件夹下 + shutil.move(os.path.join(path_os, j), os.path.join(path.rsplit('/', 1)[0], j)) + # 删除.py文件 + # if path.rsplit('/', 1)[1] not in ['packaging_script.py', 'manage.py', 'client.py']: + if path.rsplit('/', 1)[1] not in ['packaging_script.py', 'app.py', 'app_local.py', 'tts_config.py']: + os.remove(path) + # shutil.rmtree('./build') + # 删除.c文件 + elif len(path.rsplit('.', 1)) == 2: + if path.rsplit('.', 1)[1] == 'c': + os.remove(path) + + +def view_log(): + ''' + 删除log日志文件 + :return: + ''' + pass + + +if __name__ == '__main__': + path = os.getcwd().rsplit('/', 1)[0] + packaged_search(path) + # 查看版本号并创建外文件写入 + # edition = os.popen('git show') + # with open('./edition.txt', 'w') as e: + # e.write(edition.readline()) + +""" + +usage: + python3 packaging_script.py build_ext +打包说明: + +""" \ No newline at end of file diff --git a/preprocess_audio_and_3dmm.cpython-38-x86_64-linux-gnu.so b/preprocess_audio_and_3dmm.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..b3ee50a7a7fb83e99cf0f03ebcbdfd3d1c28101c --- /dev/null +++ b/preprocess_audio_and_3dmm.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7be130473effd974a04755c90cbc1149944144c7425321d92fa5bf699edb5b7 +size 2746048 diff --git a/pretrain_models/face_lib/face_parsing/79999_iter.onnx b/pretrain_models/face_lib/face_parsing/79999_iter.onnx new file mode 100644 index 0000000000000000000000000000000000000000..d34346fbb5f9f2e80122952fcaa2d36159a2438b --- /dev/null +++ b/pretrain_models/face_lib/face_parsing/79999_iter.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:506b4c6e514a348980a49fd3f19d53ce3807dac57fedac127e917165fac0e928 +size 52588603 diff --git a/pretrain_models/face_lib/face_restore/gfpgan/GFPGANv1.4.onnx b/pretrain_models/face_lib/face_restore/gfpgan/GFPGANv1.4.onnx new file mode 100644 index 0000000000000000000000000000000000000000..c385c7a13abdc4403376e2c623b8d65532987b1e --- /dev/null +++ b/pretrain_models/face_lib/face_restore/gfpgan/GFPGANv1.4.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f061a99cb6c8025dd5d29bf04e4d0349d613bb8dc1484555ea79403cee2cf448 +size 340255593 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..77e00d188001fa488431387fcb4d77abf7ed2b02 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,65 @@ +cv2box==0.5.9 +apstone==0.0.8 +appdirs==1.4.4 +audioread==2.1.9 +typeguard==2.13.3 +cffi==1.15.0 +charset-normalizer==2.0.12 +click==8.1.3 +colorama==0.4.4 +cycler==0.11.0 +decorator==5.1.1 +filelock==3.7.1 +flatbuffers==2.0 +fonttools==4.36.0 +freetype-py==2.3.0 +huggingface-hub==0.0.8 +idna==3.3 +imageio==2.19.3 +importlib-metadata==4.11.4 +joblib==1.1.0 +kiwisolver==1.4.4 +kornia==0.6.6 +librosa==0.8.1 +matplotlib==3.5.3 +networkx==2.6.3 +numba==0.55.2 +numexpr==2.8.6 +numpy==1.21.6 +onnxruntime-gpu==1.9.0 +opencv-python==4.7.0.72 +packaging==21.3 +pillow==9.1.1 +pooch==1.6.0 +protobuf==4.21.5 +psutil==5.9.1 +pycparser==2.21 +pyglet==1.5.26 +pyopengl==3.1.0 +pyparsing==3.0.9 +pyrender==0.1.45 +python-dateutil==2.8.2 +pywavelets==1.3.0 +pyyaml==6.0 +regex==2022.6.2 +requests==2.27.1 +resampy==0.2.2 +sacremoses==0.0.53 +scikit-image==0.19.3 +scikit-learn==1.0.2 +scipy==1.7.1 +six==1.16.0 +soundfile==0.10.3.post1 +threadpoolctl==3.1.0 +tifffile==2021.11.2 +tokenizers==0.10.3 +torch==1.11.0+cu113 +torchaudio==0.11.0+cu113 +torchvision==0.12.0+cu113 +tqdm==4.64.0 +transformers==4.6.1 +trimesh==3.12.7 +typeguard==2.13.3 +typing-extensions==4.2.0 +urllib3==1.26.9 +zipp==3.8.0 diff --git a/requirements_0.txt b/requirements_0.txt new file mode 100644 index 0000000000000000000000000000000000000000..5f4d175967c2695f2d1e2e783996d83f518acdf5 --- /dev/null +++ b/requirements_0.txt @@ -0,0 +1,104 @@ +aiofiles==23.2.1 +annotated-types==0.7.0 +anyio==4.5.2 +apstone==0.0.8 +audioread==3.0.1 +blinker==1.8.2 +certifi==2025.1.31 +cffi==1.17.1 +charset-normalizer==3.4.1 +click==8.1.8 +coloredlogs==15.0.1 +contourpy==1.1.1 +cv2box==0.5.9 +cycler==0.12.1 +decorator==5.2.1 +einops==0.8.1 +exceptiongroup==1.2.2 +fastapi==0.115.11 +ffmpy==0.5.0 +filelock==3.16.1 +Flask==3.0.3 +flatbuffers==25.2.10 +fonttools==4.56.0 +fsspec==2025.3.0 +gradio==4.44.1 +gradio_client==1.3.0 +h11==0.14.0 +httpcore==1.0.7 +httpx==0.28.1 +huggingface-hub==0.29.3 +humanfriendly==10.0 +idna==3.10 +imageio==2.35.1 +importlib_metadata==8.5.0 +importlib_resources==6.4.5 +itsdangerous==2.2.0 +Jinja2==3.1.6 +joblib==1.4.2 +kiwisolver==1.4.7 +lazy_loader==0.4 +librosa==0.11.0 +llvmlite==0.41.1 +markdown-it-py==3.0.0 +MarkupSafe==2.1.5 +matplotlib==3.7.5 +mdurl==0.1.2 +mpmath==1.3.0 +msgpack==1.1.0 +networkx==3.1 +numba==0.58.1 +numexpr==2.8.6 +numpy==1.24.4 +onnxruntime-gpu==1.16.0 +opencv-python==4.11.0.86 +orjson==3.10.15 +packaging==24.2 +pandas==2.0.3 +pillow==10.4.0 +platformdirs==4.3.6 +pooch==1.8.2 +protobuf==5.29.4 +pycparser==2.22 +pydantic==2.10.6 +pydantic_core==2.27.2 +pydub==0.25.1 +Pygments==2.19.1 +pyparsing==3.1.4 +python-dateutil==2.9.0.post0 +python-multipart==0.0.20 +pytz==2025.1 +PyWavelets==1.4.1 +PyYAML==6.0.2 +requests==2.32.3 +rich==13.9.4 +ruff==0.11.1 +scikit-image==0.21.0 +scikit-learn==1.3.2 +scipy==1.10.1 +semantic-version==2.10.0 +shellingham==1.5.4 +six==1.17.0 +sniffio==1.3.1 +soundfile==0.13.1 +soxr==0.3.7 +spark-parser==1.8.9 +starlette==0.44.0 +sympy==1.13.3 +threadpoolctl==3.5.0 +tifffile==2023.7.10 +tomlkit==0.12.0 +torch==1.11.0+cu113 +torchaudio==0.11.0+cu113 +torchvision==0.12.0+cu113 +tqdm==4.67.1 +typeguard==2.13.3 +typer==0.15.2 +typing_extensions==4.12.2 +tzdata==2025.1 +urllib3==2.2.3 +uvicorn==0.33.0 +websockets==12.0 +Werkzeug==3.0.6 +xdis==6.1.3 +zipp==3.20.2 diff --git a/run.py b/run.py new file mode 100644 index 0000000000000000000000000000000000000000..046776a676869d38e1c967145883f65b035cb291 --- /dev/null +++ b/run.py @@ -0,0 +1,191 @@ +import argparse +import gc +import json +import os +import subprocess +import sys +import threading +import time +import traceback +import uuid +from enum import Enum + +import queue +import cv2 +from flask import Flask, request + +import service.trans_dh_service + +from h_utils.custom import CustomError +from y_utils.config import GlobalConfig +from y_utils.logger import logger + + +def get_args(): + parser = argparse.ArgumentParser( + formatter_class=(argparse.ArgumentDefaultsHelpFormatter) + ) + + parser.add_argument( + "--audio_path", + type=str, + default="example/audio.wav", + help="path to local audio file", + ) + parser.add_argument( + "--video_path", + type=str, + default="example/video.mp4", + help="path to local video file", + ) + opt = parser.parse_args() + return opt + + +def write_video( + output_imgs_queue, + temp_dir, + result_dir, + work_id, + audio_path, + result_queue, + width, + height, + fps, + watermark_switch=0, + digital_auth=0, +): + output_mp4 = os.path.join(temp_dir, "{}-t.mp4".format(work_id)) + fourcc = cv2.VideoWriter_fourcc(*"mp4v") + result_path = os.path.join(result_dir, "{}-r.mp4".format(work_id)) + video_write = cv2.VideoWriter(output_mp4, fourcc, fps, (width, height)) + print("Custom VideoWriter init done") + try: + while True: + state, reason, value_ = output_imgs_queue.get() + if type(state) == bool and state == True: + logger.info( + "Custom VideoWriter [{}]视频帧队列处理已结束".format(work_id) + ) + logger.info( + "Custom VideoWriter Silence Video saved in {}".format( + os.path.realpath(output_mp4) + ) + ) + video_write.release() + break + else: + if type(state) == bool and state == False: + logger.error( + "Custom VideoWriter [{}]任务视频帧队列 -> 异常原因:[{}]".format( + work_id, reason + ) + ) + raise CustomError(reason) + for result_img in value_: + video_write.write(result_img) + if video_write is not None: + video_write.release() + if watermark_switch == 1 and digital_auth == 1: + logger.info( + "Custom VideoWriter [{}]任务需要水印和数字人标识".format(work_id) + ) + if width > height: + command = 'ffmpeg -y -i {} -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:(main_h-overlay_h)-10,overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format( + audio_path, + output_mp4, + GlobalConfig.instance().watermark_path, + GlobalConfig.instance().digital_auth_path, + result_path, + ) + logger.info("command:{}".format(command)) + else: + command = 'ffmpeg -y -i {} -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:(main_h-overlay_h)-10,overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format( + audio_path, + output_mp4, + GlobalConfig.instance().watermark_path, + GlobalConfig.instance().digital_auth_path, + result_path, + ) + logger.info("command:{}".format(command)) + elif watermark_switch == 1 and digital_auth == 0: + logger.info("Custom VideoWriter [{}]任务需要水印".format(work_id)) + command = 'ffmpeg -y -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:(main_h-overlay_h)-10" -c:a aac -crf 15 -strict -2 {}'.format( + audio_path, + output_mp4, + GlobalConfig.instance().watermark_path, + result_path, + ) + logger.info("command:{}".format(command)) + elif watermark_switch == 0 and digital_auth == 1: + logger.info("Custom VideoWriter [{}]任务需要数字人标识".format(work_id)) + if width > height: + command = 'ffmpeg -loglevel warning -y -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format( + audio_path, + output_mp4, + GlobalConfig.instance().digital_auth_path, + result_path, + ) + logger.info("command:{}".format(command)) + else: + command = 'ffmpeg -loglevel warning -y -i {} -i {} -i {} -filter_complex "overlay=(main_w-overlay_w)-10:10" -c:a aac -crf 15 -strict -2 {}'.format( + audio_path, + output_mp4, + GlobalConfig.instance().digital_auth_path, + result_path, + ) + logger.info("command:{}".format(command)) + else: + command = "ffmpeg -loglevel warning -y -i {} -i {} -c:a aac -c:v libx264 -crf 15 -strict -2 {}".format( + audio_path, output_mp4, result_path + ) + logger.info("Custom command:{}".format(command)) + subprocess.call(command, shell=True) + print("###### Custom Video Writer write over") + print(f"###### Video result saved in {os.path.realpath(result_path)}") + exit(0) + result_queue.put([True, result_path]) + except Exception as e: + logger.error( + "Custom VideoWriter [{}]视频帧队列处理异常结束,异常原因:[{}]".format( + work_id, e.__str__() + ) + ) + result_queue.put( + [ + False, + "[{}]视频帧队列处理异常结束,异常原因:[{}]".format( + work_id, e.__str__() + ), + ] + ) + logger.info("Custom VideoWriter 后处理进程结束") + + +service.trans_dh_service.write_video = write_video + + +def main(): + opt = get_args() + if not os.path.exists(opt.audio_path): + audio_url = "example/audio.wav" + else: + audio_url = opt.audio_path + + if not os.path.exists(opt.video_path): + video_url = "example/video.mp4" + else: + video_url = opt.video_path + sys.argv = [sys.argv[0]] + task = service.trans_dh_service.TransDhTask() + time.sleep(10) # somehow, this works... + + code = "1004" + task.work(audio_url, video_url, code, 0, 0, 0, 0) + + +if __name__ == "__main__": + main() + +# python run.py +# python run.py --audio_path example/audio.wav --video_path example/video.mp4 diff --git a/service/__init__.py b/service/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8b21ce203e632922945642d636d583b9a4f05915 --- /dev/null +++ b/service/__init__.py @@ -0,0 +1,9 @@ +#!/user/bin/env python +# coding=utf-8 +""" +@project : face2face_train +@author : huyi +@file : __init__.py.py +@ide : PyCharm +@time : 2023-12-06 14:46:40 +""" diff --git a/service/server.cpython-38-x86_64-linux-gnu.so b/service/server.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..c38b0e84ab2002f340c4b497a8fccdb429bb989b --- /dev/null +++ b/service/server.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d46fb64f31e5eb10ee09a487715577fa81a89a96ae131f6c52401fad7f53998d +size 594112 diff --git a/service/trans_dh_service.cpython-38-x86_64-linux-gnu.so b/service/trans_dh_service.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..cdd62076606f22255257b7a8b8fe544ed038bb7e --- /dev/null +++ b/service/trans_dh_service.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b889283ed5cb58960f599388418e0acf4d00eddb6b62e0ebf0bf40c23b6b8e4 +size 16362968 diff --git a/sources.list b/sources.list new file mode 100644 index 0000000000000000000000000000000000000000..a247bbfa612a824e1abf8f91e169865ae3adb572 --- /dev/null +++ b/sources.list @@ -0,0 +1,13 @@ +# 默认注释了源码镜像以提高 apt update 速度,如有需要可自行取消注释 +deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal main restricted universe multiverse +# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal main restricted universe multiverse +deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-updates main restricted universe multiverse +# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-updates main restricted universe multiverse +deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-backports main restricted universe multiverse +# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-backports main restricted universe multiverse +deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-security main restricted universe multiverse +# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-security main restricted universe multiverse + +# 预发布软件源,不建议启用 +# deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-proposed main restricted universe multiverse +# deb-src https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ focal-proposed main restricted universe multiverse diff --git a/wenet/compute_ctc_att_bnf.cpython-38-x86_64-linux-gnu.so b/wenet/compute_ctc_att_bnf.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..d4d148b37f716c653238111b135bf067d19d972d --- /dev/null +++ b/wenet/compute_ctc_att_bnf.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c09cd4d98e4b6e35361a5c2822c6b38cdc776c00f46f80ffce1d22b75e7bdf8 +size 2096032 diff --git a/wenet/examples/aishell/aidata/conf/train_conformer_multi_cn.yaml b/wenet/examples/aishell/aidata/conf/train_conformer_multi_cn.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fd2bd417acfd107cf335619bb81a918df913af8e --- /dev/null +++ b/wenet/examples/aishell/aidata/conf/train_conformer_multi_cn.yaml @@ -0,0 +1,85 @@ +# network architecture +# encoder related +encoder: conformer +encoder_conf: + output_size: 256 # dimension of attention + attention_heads: 4 + linear_units: 2048 # the number of units of position-wise feed forward + num_blocks: 12 # the number of encoder blocks + dropout_rate: 0.1 + positional_dropout_rate: 0.1 + attention_dropout_rate: 0.0 + input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8 + normalize_before: true + cnn_module_kernel: 15 + use_cnn_module: True + activation_type: 'swish' + pos_enc_layer_type: 'rel_pos' + selfattention_layer_type: 'rel_selfattn' + +# decoder related +decoder: transformer +decoder_conf: + attention_heads: 4 + linear_units: 2048 + num_blocks: 6 + dropout_rate: 0.1 + positional_dropout_rate: 0.1 + self_attention_dropout_rate: 0.0 + src_attention_dropout_rate: 0.0 + +# hybrid CTC/attention +model_conf: + ctc_weight: 0.3 + lsm_weight: 0.1 # label smoothing option + length_normalized_loss: false + +# use raw_wav or kaldi feature +raw_wav: false + +# feature extraction +collate_conf: + # waveform level config + wav_distortion_conf: + wav_dither: 0.1 + wav_distortion_rate: 0.0 + distortion_methods: [] + speed_perturb: true + feature_extraction_conf: + feature_type: 'fbank' + mel_bins: 80 + frame_shift: 10 + frame_length: 25 + using_pitch: false + # spec level config + # spec_swap: false + feature_dither: 0.0 # add dither [-feature_dither,feature_dither] on fbank feature + spec_aug: true + spec_aug_conf: + warp_for_time: False + num_t_mask: 2 + num_f_mask: 2 + max_t: 50 + max_f: 10 + max_w: 80 + + +# dataset related +dataset_conf: + max_length: 1300 #40960 + min_length: 0 + batch_type: 'static' # static or dynamic + batch_size: 40 + sort: true + +grad_clip: 5 +accum_grad: 4 +max_epoch: 240 +log_interval: 100 + +optim: adam +optim_conf: + lr: 0.0025 #0.0025 +scheduler: warmuplr # pytorch v1.1.0+ required +scheduler_conf: + warmup_steps: 100000 diff --git a/wenet/examples/aishell/aidata/conf/train_conformer_multi_cn_linear.yaml b/wenet/examples/aishell/aidata/conf/train_conformer_multi_cn_linear.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eeee99201ae15bab94f6cfb1effcb7319c293657 --- /dev/null +++ b/wenet/examples/aishell/aidata/conf/train_conformer_multi_cn_linear.yaml @@ -0,0 +1,85 @@ +# network architecture +# encoder related +encoder: conformer +encoder_conf: + output_size: 256 # dimension of attention + attention_heads: 4 + linear_units: 1024 # the number of units of position-wise feed forward + num_blocks: 6 # the number of encoder blocks + dropout_rate: 0.1 + positional_dropout_rate: 0.1 + attention_dropout_rate: 0.0 + input_layer: linear # encoder input type, you can chose linear,conv2d, conv2d6 and conv2d8 + normalize_before: true + cnn_module_kernel: 15 + use_cnn_module: True + activation_type: 'swish' + pos_enc_layer_type: 'rel_pos' + selfattention_layer_type: 'rel_selfattn' + +# decoder related +decoder: transformer +decoder_conf: + attention_heads: 4 + linear_units: 1024 + num_blocks: 3 + dropout_rate: 0.1 + positional_dropout_rate: 0.1 + self_attention_dropout_rate: 0.0 + src_attention_dropout_rate: 0.0 + +# hybrid CTC/attention +model_conf: + ctc_weight: 0.3 + lsm_weight: 0.1 # label smoothing option + length_normalized_loss: false + +# use raw_wav or kaldi feature +raw_wav: false + +# feature extraction +collate_conf: + # waveform level config + wav_distortion_conf: + wav_dither: 0.1 + wav_distortion_rate: 0.0 + distortion_methods: [] + speed_perturb: true + feature_extraction_conf: + feature_type: 'fbank' + mel_bins: 80 + frame_shift: 10 + frame_length: 25 + using_pitch: false + # spec level config + # spec_swap: false + feature_dither: 0.0 # add dither [-feature_dither,feature_dither] on fbank feature + spec_aug: true + spec_aug_conf: + warp_for_time: False + num_t_mask: 2 + num_f_mask: 2 + max_t: 50 + max_f: 10 + max_w: 80 + + +# dataset related +dataset_conf: + max_length: 1300 #40960 + min_length: 0 + batch_type: 'static' # static or dynamic + batch_size: 40 + sort: true + +grad_clip: 5 +accum_grad: 4 +max_epoch: 240 +log_interval: 100 + +optim: adam +optim_conf: + lr: 0.002 +scheduler: warmuplr # pytorch v1.1.0+ required +scheduler_conf: + warmup_steps: 50000 diff --git a/wenet/examples/aishell/aidata/exp/conformer/wenetmodel.pt b/wenet/examples/aishell/aidata/exp/conformer/wenetmodel.pt new file mode 100644 index 0000000000000000000000000000000000000000..60e81c19f221bfdb59560129788ab5e05048ac1f --- /dev/null +++ b/wenet/examples/aishell/aidata/exp/conformer/wenetmodel.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f59bf2ebb02c7fa560b44370a656392e8cc8d8214a0d0a99e8fd8edde39c792 +size 191093698 diff --git a/wenet/tools/_extract_feats.py b/wenet/tools/_extract_feats.py new file mode 100644 index 0000000000000000000000000000000000000000..c0dc9ad2f8af0ad7521faa55e6a89bcf86914658 --- /dev/null +++ b/wenet/tools/_extract_feats.py @@ -0,0 +1,452 @@ +import librosa +# import tensorflow as tf +import numpy as np +from scipy.io import wavfile +from scipy import signal + +import torchaudio.compliance.kaldi as kaldi +import torchaudio +# torchaudio.set_audio_backend("sox_io") + + +def _extract_feature(wav_path): + """ Extract acoustic fbank feature from origin waveform. + + Speed perturbation and wave amplitude distortion is optional. + + Args: + batch: a list of tuple (wav id , wave path). + speed_perturb: bool, whether or not to use speed pertubation. + wav_distortion_conf: a dict , the config of wave amplitude distortion. + feature_extraction_conf:a dict , the config of fbank extraction. + + Returns: + (keys, feats, labels) + """ + waveform, sample_rate = torchaudio.load_wav(wav_path) + + mat = kaldi.fbank( + waveform, + num_mel_bins=80, + frame_length=25, + frame_shift=10, + dither=0.1, + energy_floor=0.0, + sample_frequency=sample_rate) + mat = mat.detach().numpy() + + return mat + +def _extract_feature_norm(wav_path): + """ Extract acoustic fbank feature from origin waveform. + + Speed perturbation and wave amplitude distortion is optional. + + Args: + batch: a list of tuple (wav id , wave path). + speed_perturb: bool, whether or not to use speed pertubation. + wav_distortion_conf: a dict , the config of wave amplitude distortion. + feature_extraction_conf:a dict , the config of fbank extraction. + + Returns: + (keys, feats, labels) + """ + + waveform, sample_rate = torchaudio.load_wav(wav_path) + + mat = kaldi.fbank( + waveform, + num_mel_bins=80, + frame_length=25, + frame_shift=10, + dither=0.1, + energy_floor=0.0, + sample_frequency=sample_rate) + mat = mat.detach().numpy() + + return mat + + +hparams = { + 'sample_rate': 16000,#一秒16000个采样点 + 'preemphasis': 0.97, + 'n_fft': 1024, + 'hop_length': 200,#80个采样点为帧移动步长 5ms + 'win_length': 800,#400个采样点为帧宽度,25ms + 'num_mels': 80, + 'n_mfcc': 13, + 'window': 'hann', + 'fmin': 0., + 'fmax': 8000., + 'ref_db': 20, # + 'min_db': -80.0, # restrict the dynamic range of log power + 'iterations': 100, # griffin_lim #iterations + 'silence_db': -28.0, + 'center': True,#是否将MFCC作为当前帧中间向量的结果。(数个向量作为一帧生成一个mfcc) +} + +_mel_basis = None + + +def load_wav(wav_f, sr=None): + # wav_arr, _ = librosa.load(wav_f, sr=sr) + # return wav_arr + if type(wav_f)==str: + wav_arr, _ = librosa.load(wav_f, sr=sr) + else: + wav_arr = wav_f + return wav_arr + +def write_wav(write_path, wav_arr, sr): + wav_arr *= 32767 / max(0.01, np.max(np.abs(wav_arr))) + wavfile.write(write_path, sr, wav_arr.astype(np.int16)) + return + +def preempahsis(wav_arr, pre_param=hparams['preemphasis']): + return signal.lfilter([1, -pre_param], [1], wav_arr) + +def deemphasis(wav_arr, pre_param=hparams['preemphasis']): + return signal.lfilter([1], [1, -pre_param], wav_arr) + +def split_wav(wav_arr, top_db=-hparams['silence_db']): + intervals = librosa.effects.split(wav_arr, top_db=top_db) + return intervals + +def mulaw_encode(wav_arr, quantization_channels): + mu = float(quantization_channels - 1) + safe_wav_abs = np.minimum(np.abs(wav_arr), 1.0) + encoded = np.sign(wav_arr) * np.log1p(mu * safe_wav_abs) / np.log1p(mu) + return encoded + +def mulaw_encode_quantize(wav_arr, quantization_channels): + mu = float(quantization_channels - 1) + safe_wav_abs = np.minimum(np.abs(wav_arr), 1.0) + encoded = np.sign(wav_arr) * np.log1p(mu * safe_wav_abs) / np.log1p(mu) + return ((encoded + 1.) / 2 * mu + 0.5).astype(np.int32) + +def mulaw_decode(encoded, quantization_channels): + mu = float(quantization_channels - 1) + magnitude = (1 / mu) * ((1 + mu) ** abs(encoded) - 1.) + return np.sign(encoded) * magnitude + +def mulaw_decode_quantize(encoded, quantization_channels): + mu = float(quantization_channels - 1) + signal = 2 * (encoded.astype(np.float32) / mu) - 1. + magnitude = (1 / mu) * ((1 + mu) ** abs(signal) - 1.) + return np.sign(signal) * magnitude + +def mulaw_encode_quantize_tf(wav_batch, quantization_channels): + with tf.variable_scope('mulaw_encode'): + mu = tf.cast(quantization_channels - 1, tf.float32) + safe_wav_abs = tf.minimum(tf.abs(wav_batch), 1.0) + encoded = tf.sign(wav_batch) * tf.log1p(mu * safe_wav_abs) / tf.log1p(mu) + return tf.cast((encoded + 1.) / 2 * mu + 0.5, tf.int32) + +# def mulaw_encode_tf(wav_batch, quantization_channels): +# with tf.variable_scope('mulaw_encode'): +# mu = tf.cast(quantization_channels - 1, tf.float32) +# safe_wav_abs = tf.minimum(tf.abs(wav_batch), 1.0) +# encoded = tf.sign(wav_batch) * tf.log1p(mu * safe_wav_abs) / tf.log1p(mu) +# return encoded + +# def mulaw_decode_quantize_tf(encoded, quantization_channels): + with tf.variable_scope('mulaw_decode'): + mu = tf.cast(quantization_channels - 1, tf.float32) + signal = 2 * (tf.cast(encoded, tf.float32) / mu) - 1. + magnitude = (1 / mu) * ((1 + mu) ** abs(signal) - 1.) + return tf.sign(signal) * magnitude + +# def mulaw_decode_tf(encoded, quantization_channels): + with tf.variable_scope('mulaw_decode'): + mu = tf.cast(quantization_channels - 1, tf.float32) + magnitude = (1 / mu) * ((1 + mu) ** abs(encoded) - 1.) + return tf.sign(encoded) * magnitude + +def stft(wav_arr, n_fft=hparams['n_fft'],#短时傅里叶变化 + hop_len=hparams['hop_length'], + win_len=hparams['win_length'], + window=hparams['window'], + center=hparams['center']): + # return shape: [n_freqs, time] + return librosa.core.stft(wav_arr, n_fft=n_fft, hop_length=hop_len, + win_length=win_len, window=window, center=center) + +# def stft_tf(wav_arr, n_fft=hparams['n_fft'], +# hop_len=hparams['hop_length'], +# win_len=hparams['win_length'], +# window=hparams['window']): +# window_f = {'hann': tf.contrib.signal.hann_window, +# 'hamming': tf.contrib.signal.hamming_window}[window] +# # returned value is of shape [..., frames, fft_bins] and complex64 value +# return tf.contrib.signal.stft(signals=wav_arr, frame_length=win_len, +# frame_step=hop_len, fft_length=n_fft, +# window_fn=window_f) + +def istft(stft_matrix, hop_len=hparams['hop_length'], + win_len=hparams['win_length'], window=hparams['window']): + # stft_matrix should be complex stft results instead of magnitude spectrogram + # or power spectrogram, and of shape [n_freqs, time] + return librosa.core.istft(stft_matrix, hop_length=hop_len, + win_length=win_len, window=window) + +# def istft_tf(stft_matrix, hop_len=hparams['hop_length'], n_fft=hparams['n_fft'], +# win_len=hparams['win_length'], window=hparams['window']): +# window_f = {'hann': tf.contrib.signal.hann_window, +# 'hamming': tf.contrib.signal.hamming_window}[window] +# # stft_matrix should be of shape [..., frames, fft_bins] +# return tf.contrib.signal.inverse_stft(stft_matrix, frame_length=win_len, +# frame_step=hop_len, fft_length=n_fft, +# window_fn=window_f) + +def spectrogram(wav_arr, n_fft=hparams['n_fft'], + hop_len=hparams['hop_length'], + win_len=hparams['win_length'], + window=hparams['window'], + center=hparams['center']): + # return shape: [time, n_freqs] + s = stft(wav_arr, n_fft=n_fft, hop_len=hop_len, + win_len=win_len, window=window, center=center).T + magnitude = np.abs(s) #幅度谱 + power = magnitude ** 2 #能量谱 #经过短时傅里叶变换得到magnitude(?)和其平方 为什么不是快速傅里叶变化 + return {'magnitude': magnitude, + 'power': power, + 'stft':s.T} + +def power_spec2mel(power_spec, sr=hparams['sample_rate'], n_fft=hparams['n_fft'], + num_mels=hparams['num_mels'], fmin=hparams['fmin'], fmax=hparams['fmax']): + # power_spec should be of shape [time, 1+n_fft/2] + power_spec_t = power_spec.T + global _mel_basis + _mel_basis = (librosa.filters.mel(sr=sr, n_fft=n_fft, n_mels=num_mels, fmin=fmin, fmax=fmax) + if _mel_basis is None else _mel_basis) # [n_mels, 1+n_fft/2] + mel_spec = np.dot(_mel_basis, power_spec_t) # [n_mels, time] + return mel_spec.T # mel谱 + +def wav2melspec(wav_arr, sr=hparams['sample_rate'], n_fft=hparams['n_fft'], + hop_len=hparams['hop_length'], win_len=hparams['win_length'], + window=hparams['window'], num_mels=hparams['num_mels'], + fmin=hparams['fmin'], fmax=hparams['fmax']): + power_spec = spectrogram(wav_arr, n_fft, hop_len, win_len, window)['power'] + melspec = power_spec2mel(power_spec.T, sr, n_fft, num_mels, fmin, fmax) + return melspec # [time, num_mels] + +def wav2mfcc(wav_arr, sr=hparams['sample_rate'], n_mfcc=hparams['n_mfcc'], + n_fft=hparams['n_fft'], hop_len=hparams['hop_length'], + win_len=hparams['win_length'], window=hparams['window'], + num_mels=hparams['num_mels'], fmin=0.0, + fmax=None, ref_db=hparams['ref_db']): + from scipy.fftpack import dct + print("wav_arr1:",wav_arr.shape) + wav_arr = preempahsis(wav_arr) + print("wav_arr2:",wav_arr.shape) + + mag_spec = spectrogram(wav_arr, n_fft=n_fft, hop_len=hop_len, + win_len=win_len, window=window)['magnitude'] + mel_spec = power_spec2mel(mag_spec, sr=sr, n_fft=n_fft, num_mels=num_mels, + fmin=fmin, fmax=fmax) + # log_melspec = power2db(mel_spec, ref_db=ref_db) + log_melspec = librosa.amplitude_to_db(mel_spec) + mfcc = dct(x=log_melspec.T, axis=0, type=2, norm='ortho')[:n_mfcc] + # mfcc = np.dot(librosa.filters.dct(n_mfcc, log_melspec.shape[1]), log_melspec.T) + deltas = librosa.feature.delta(mfcc) + delta_deltas = librosa.feature.delta(mfcc, order=2) + mfcc_feature = np.concatenate((mfcc, deltas, delta_deltas), axis=0) + + return mfcc_feature.T + +def wav2mfcc_v2(wav_arr, sr=hparams['sample_rate'], n_mfcc=hparams['n_mfcc'],#使用这个 + n_fft=hparams['n_fft'], hop_len=hparams['hop_length'], + win_len=hparams['win_length'], window=hparams['window'], + num_mels=hparams['num_mels'], fmin=0.0, + fmax=None, ref_db=hparams['ref_db'], + center=hparams['center']): + from scipy.fftpack import dct + wav_arr = preempahsis(wav_arr) + #经过一次滤波 + power_spec = spectrogram(wav_arr, n_fft=n_fft, hop_len=hop_len, + win_len=win_len, window=window, center=center)['power'] + mel_spec = power_spec2mel(power_spec, sr=sr, n_fft=n_fft, num_mels=num_mels, + fmin=fmin, fmax=fmax) # mel谱 + log_melspec = power2db(mel_spec, ref_db=ref_db) #对数mel谱 + + + """下面是MFCC""" + # mfcc = dct(x=log_melspec.T, axis=0, type=2, norm='ortho')[:n_mfcc] + # deltas = librosa.feature.delta(mfcc) + # delta_deltas = librosa.feature.delta(mfcc, order=2) + # mfcc_feature = np.concatenate((mfcc, deltas, delta_deltas), axis=0) + # return mfcc_feature.T + x_stft = spectrogram(wav_arr, n_fft=n_fft, hop_len=hop_len, + win_len=win_len, window=window, center=center)['stft'] + # print("log_melspec:", x_stft.shape) + return log_melspec,x_stft + + +def wav2linear_v2(wav_arr, sr=hparams['sample_rate'], n_mfcc=hparams['n_mfcc'], # 使用这个 + n_fft=hparams['n_fft'], hop_len=hparams['hop_length'], + win_len=hparams['win_length'], window=hparams['window'], + num_mels=hparams['num_mels'], fmin=0.0, + fmax=None, ref_db=hparams['ref_db'], + center=hparams['center']): + from scipy.fftpack import dct + wav_arr = preempahsis(wav_arr) + # 经过一次滤波 + power_spec = spectrogram(wav_arr, n_fft=n_fft, hop_len=hop_len, + win_len=win_len, window=window, center=center)['power'] + linear = _amp_to_db(power_spec, ref_db=ref_db) # 对数mel谱 + normalized_linear = _db_normalize(linear, min_db=hparams['min_db']) + x_stft = spectrogram(wav_arr, n_fft=n_fft, hop_len=hop_len, + win_len=win_len, window=window, center=center)['stft'] + + + return normalized_linear,x_stft + +def _amp_to_db(x,ref_db=20): + return 20 * np.log10(np.maximum(1e-5, x)) + ref_db + + +def mel2log_mel(mel_spec, ref_db=hparams['ref_db'], min_db=hparams['min_db']): + log_mel = power2db(mel_spec, ref_db) + normalized = log_power_normalize(log_mel, min_db) + return normalized + +def power2db(power_spec, ref_db=hparams['ref_db'], tol=1e-5): + # power spectrogram is stft ** 2 + # returned value: (10. * log10(power_spec) - ref_db) + return 10. * np.log10(power_spec + tol) - ref_db + +def db2power(power_db, ref_db=hparams['ref_db']): + return np.power(10.0, 0.1 * (power_db + ref_db)) +# +# def db2power_tf(power_db, ref_db=hparams['ref_db']): +# return tf.pow(10.0, 0.1 * (power_db + ref_db)) + +def log_power_normalize(log_power, min_db=hparams['min_db']): + """ + :param log_power: in db, computed by power2db(spectrogram(wav_arr)['power']) + :param min_db: minimum value of log_power in db + :return: log_power normalized to [0., 1.] + """ + assert min_db < 0. or "min_db should be a negative value like -80.0 or -100.0" + return np.clip((log_power - min_db) / -min_db, 0., 1.) + +def log_power_denormalize(normalized_logpower, min_db=hparams['min_db']): + return np.clip(normalized_logpower, 0., 1.) * -min_db + min_db + +# def log_power_denormalize_tf(normalized_logpower, min_db=hparams['min_db']): +# return tf.clip_by_value(normalized_logpower, 0., 1.) * -min_db + min_db + +def griffin_lim(magnitude_spec, iterations=hparams['iterations']): + """ + :param magnitude_spec: magnitude spectrogram of shape [time, n_freqs] + obtained from spectrogram(wav_arr)['magnitude] + :param iterations: number of iterations to estimate phase + :return: waveform array + """ + mag = magnitude_spec.T # transpose to [n_freqs, time] + angles = np.exp(2j * np.pi * np.random.rand(*mag.shape)) + complex_mag = np.abs(mag).astype(np.complex) + stft_0 = complex_mag * angles + y = istft(stft_0) + for i in range(iterations): + angles = np.exp(1j * np.angle(stft(y))) + y = istft(complex_mag * angles) + return y + +# def grinffin_lim_tf(magnitude_spec, iterations=hparams['iterations']): +# # magnitude_spec: [frames, fft_bins], of type tf.float32 +# angles = tf.cast( +# tf.exp(2j * np.pi * tf.cast( +# tf.random_uniform( +# tf.shape(magnitude_spec)), +# dtype=tf.complex64)), +# dtype=tf.complex64) +# complex_mag = tf.cast(tf.abs(magnitude_spec), tf.complex64) +# stft_0 = complex_mag * angles +# y = istft_tf(stft_0) +# for i in range(iterations): +# angles = tf.exp(1j * tf.cast(tf.angle(stft_tf(y)), tf.complex64)) +# y = istft_tf(complex_mag * angles) +# return y + +def griffin_lim_test(wav_f, n_fft=hparams['n_fft'], + hop_len=hparams['hop_length'], + win_len=hparams['win_length'], + window=hparams['window']): + wav_arr = load_wav(wav_f) + spec_dict = spectrogram(wav_arr, n_fft=n_fft, hop_len=hop_len, + win_len=win_len, window=window) + mag_spec = spec_dict['magnitude'] + y = griffin_lim(mag_spec) + write_wav('reconstructed1.wav', y, sr=16000) + +def stft2wav_test(stft_f, mean_f, std_f): + spec = np.load(stft_f) + mean = np.load(mean_f) + std = np.load(std_f) + spec = spec * std + mean + spec = log_power_denormalize(spec) + power_spec = db2power(spec) + mag_spec = power_spec ** 0.5 + y = griffin_lim(mag_spec) + y = deemphasis(y) + write_wav('reconstructed2.wav', y, sr=16000) + return y +# +# def stft2wav_tf_test(stft_f, mean_f, std_f): +# # get inputs +# spec = np.load(stft_f) +# mean = np.load(mean_f) +# std = np.load(std_f) +# spec = spec * std + mean +# # build graph +# spec_pl = tf.placeholder(tf.float32, [None, None, 513]) +# denormalized = log_power_denormalize_tf(spec_pl) +# mag_spec = tf.pow(db2power_tf(denormalized), 0.5) +# wav = grinffin_lim_tf(mag_spec) +# # set session and run +# config = tf.ConfigProto() +# config.gpu_options.allow_growth = True +# sess = tf.Session(config=config) +# wav_arr = sess.run(wav, feed_dict={spec_pl: np.expand_dims(spec, axis=0)}) +# sess.close() +# y = deemphasis(np.squeeze(wav_arr)) +# write_wav('reconstructed_tf.wav', y, sr=16000) +# return y + +# 超参数个数:1 +# return: db normalized to [0., 1.] +def _db_normalize(db, min_db): + return np.clip((db - min_db) / -min_db, 0., 1.) + + + + +def mfcc_test(): + wav_f = './test.wav' + wav_arr = load_wav(wav_f) + + + mfcc = wav2mfcc_v2(wav_arr) + mfcc1 = np.load('test.npy') + print(mfcc.min(), mfcc1.min()) + print(mfcc.max(), mfcc1.max()) + print(mfcc.mean(), mfcc1.mean()) + print(np.abs(mfcc - mfcc1)) + print(np.mean(np.abs(mfcc - mfcc1))) + import matplotlib.pyplot as plt + plt.figure() + plt.subplot(211) + plt.imshow(mfcc.T, origin='lower') + # plt.colorbar() + plt.subplot(212) + plt.imshow(mfcc1.T, origin='lower') + # plt.colorbar() + plt.tight_layout() + plt.show() + return + + + +if __name__ == '__main__': + mfcc_test() diff --git a/wenet/transformer/__init__.py b/wenet/transformer/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/wenet/transformer/asr_model.cpython-38-x86_64-linux-gnu.so b/wenet/transformer/asr_model.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..a184d5ed71f15923f40068f9df666666a7f3c149 --- /dev/null +++ b/wenet/transformer/asr_model.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec56eda7711fe563fe8c2f933ddb72fa1369d45ed0b730db235fd27cf1aebcf9 +size 5865400 diff --git a/wenet/transformer/attention.cpython-38-x86_64-linux-gnu.so b/wenet/transformer/attention.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..a55e32b0714f2ee9788b06f4fa499e5757db7bda --- /dev/null +++ b/wenet/transformer/attention.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acd8fabea0da032a4c4d01ffdff4f19a56ec2597155b4630afcb94fc41443c3e +size 1671488 diff --git a/wenet/transformer/cmvn.cpython-38-x86_64-linux-gnu.so b/wenet/transformer/cmvn.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..281c1d50deb1700399317cd9618341e8fade9956 --- /dev/null +++ b/wenet/transformer/cmvn.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f2e40cca572e36742df40c4631154ae95ec7ba28f219271b313b08ad7f05aa6 +size 368416 diff --git a/wenet/transformer/convolution.cpython-38-x86_64-linux-gnu.so b/wenet/transformer/convolution.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..fc7bba8a92b1db7267d052aa050e70ae94a271f1 --- /dev/null +++ b/wenet/transformer/convolution.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70d2836540994e363691eec27b7c1a2c906d641f467d61b6cbd9503d5dd663ad +size 851632 diff --git a/wenet/transformer/ctc.cpython-38-x86_64-linux-gnu.so b/wenet/transformer/ctc.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..0ee76e98fd5d5e8d74e68bbd9a71fb14cc6b3527 --- /dev/null +++ b/wenet/transformer/ctc.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4124cee25e02edea8a864be87486eb2c1ca20d01ec7f312a9172d997b636c56d +size 617048 diff --git a/wenet/transformer/decoder.cpython-38-x86_64-linux-gnu.so b/wenet/transformer/decoder.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..2c33b37ca509e166cf4c6a1c3d85d342636b41a5 --- /dev/null +++ b/wenet/transformer/decoder.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3aeae40976ddbe78c9557995afd69b27d812f2c3c1a413ec4c1e0791eaffb3e +size 1541184 diff --git a/wenet/transformer/decoder_layer.cpython-38-x86_64-linux-gnu.so b/wenet/transformer/decoder_layer.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..11c50e16438f5419d9f9e4fbadc88cd98fd40872 --- /dev/null +++ b/wenet/transformer/decoder_layer.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe5675e1cb453ecbd5e605b6aa58ce3142e1470b8c949c9af0c0f76efb5f68a4 +size 846672 diff --git a/wenet/transformer/embedding.cpython-38-x86_64-linux-gnu.so b/wenet/transformer/embedding.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..2059f8e084a8d1eff5d6e30fc5fc7216574324de --- /dev/null +++ b/wenet/transformer/embedding.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e861789f9d9509dcf2e2512d5df62404dc144a7a6c0a4b7fca2e1f005aa5872 +size 1261192 diff --git a/wenet/transformer/encoder.cpython-38-x86_64-linux-gnu.so b/wenet/transformer/encoder.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..b8d76ceafe2d23930e911c49b51155047e9eae6e --- /dev/null +++ b/wenet/transformer/encoder.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f65f8f9169a4e2b67634517636245ba5d2ba434f88f3fd8075bcb52e1c57559a +size 2290992 diff --git a/wenet/transformer/encoder_layer.cpython-38-x86_64-linux-gnu.so b/wenet/transformer/encoder_layer.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..5122561b360b07955b44daaad8346f8c9fa3f060 --- /dev/null +++ b/wenet/transformer/encoder_layer.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed85cbb0f3511cfb4f536a78b1f5c37f563d54691df1dcd774f1f29b5fe3cc39 +size 1481440 diff --git a/wenet/transformer/label_smoothing_loss.cpython-38-x86_64-linux-gnu.so b/wenet/transformer/label_smoothing_loss.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..963999d7b873d6359c11015c90de6e767d811114 --- /dev/null +++ b/wenet/transformer/label_smoothing_loss.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:977392bd2e4169ca8ae3c6fb6e9328b92c2f1ae22d9021587259fab1bd64cc04 +size 667904 diff --git a/wenet/transformer/positionwise_feed_forward.cpython-38-x86_64-linux-gnu.so b/wenet/transformer/positionwise_feed_forward.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..79e5ab273762b759b403c1c0829c4b4cb6370237 --- /dev/null +++ b/wenet/transformer/positionwise_feed_forward.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0416be1596a6b89764998a3b3005179865c654fa98272b2fa0ca95c06d346a69 +size 440744 diff --git a/wenet/transformer/subsampling.cpython-38-x86_64-linux-gnu.so b/wenet/transformer/subsampling.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..84b4158ee39d43cbda2aceec29b2f8243df22669 --- /dev/null +++ b/wenet/transformer/subsampling.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69cd56f257c7d0af1e34faf23f457791f96325ae8b69fba67176a2a216485a84 +size 2521760 diff --git a/wenet/transformer/swish.cpython-38-x86_64-linux-gnu.so b/wenet/transformer/swish.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..d7cf2ceefc72399a4f3f91f9a8235da0216a90e5 --- /dev/null +++ b/wenet/transformer/swish.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccbb4b3e109f824530cf5a0d7c82e98f43ca12ee020e3a67ba992a32e83211bc +size 287656 diff --git a/wenet/utils/checkpoint.cpython-38-x86_64-linux-gnu.so b/wenet/utils/checkpoint.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..1337f679c3b077f571b180e08f0bd467ea239b8c --- /dev/null +++ b/wenet/utils/checkpoint.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dd41ea6d72902483379b808266bf9e49334ebc15be37a9219a6a6959acdace7 +size 695712 diff --git a/wenet/utils/cmvn.py b/wenet/utils/cmvn.py new file mode 100644 index 0000000000000000000000000000000000000000..d262143210dde2c73b7dabd67eba87ecdbc2a7b4 --- /dev/null +++ b/wenet/utils/cmvn.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 +# Copyright (c) 2020 Mobvoi Inc (Binbin Zhang) +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import math + +import numpy as np + + +def _load_json_cmvn(json_cmvn_file): + """ Load the json format cmvn stats file and calculate cmvn + + Args: + json_cmvn_file: cmvn stats file in json format + + Returns: + a numpy array of [means, vars] + """ + with open(json_cmvn_file) as f: + cmvn_stats = json.load(f) + + means = cmvn_stats['mean_stat'] + variance = cmvn_stats['var_stat'] + count = cmvn_stats['frame_num'] + for i in range(len(means)): + means[i] /= count + variance[i] = variance[i] / count - means[i] * means[i] + if variance[i] < 1.0e-20: + variance[i] = 1.0e-20 + variance[i] = 1.0 / math.sqrt(variance[i]) + cmvn = np.array([means, variance]) + return cmvn + + +def _load_kaldi_cmvn(kaldi_cmvn_file): + """ Load the kaldi format cmvn stats file and calculate cmvn + + Args: + kaldi_cmvn_file: kaldi text style global cmvn file, which + is generated by: + compute-cmvn-stats --binary=false scp:feats.scp global_cmvn + + Returns: + a numpy array of [means, vars] + """ + means = [] + variance = [] + with open(kaldi_cmvn_file, 'r') as fid: + # kaldi binary file start with '\0B' + if fid.read(2) == '\0B': + logging.error('kaldi cmvn binary file is not supported, please ' + 'recompute it by: compute-cmvn-stats --binary=false ' + ' scp:feats.scp global_cmvn') + sys.exit(1) + fid.seek(0) + arr = fid.read().split() + assert (arr[0] == '[') + assert (arr[-2] == '0') + assert (arr[-1] == ']') + feat_dim = int((len(arr) - 2 - 2) / 2) + for i in range(1, feat_dim + 1): + means.append(float(arr[i])) + count = float(arr[feat_dim + 1]) + for i in range(feat_dim + 2, 2 * feat_dim + 2): + variance.append(float(arr[i])) + + for i in range(len(means)): + means[i] /= count + variance[i] = variance[i] / count - means[i] * means[i] + if variance[i] < 1.0e-20: + variance[i] = 1.0e-20 + variance[i] = 1.0 / math.sqrt(variance[i]) + cmvn = np.array([means, variance]) + return cmvn + + +def load_cmvn(cmvn_file, is_json): + if is_json: + cmvn = _load_json_cmvn(cmvn_file) + else: + cmvn = _load_kaldi_cmvn(cmvn_file) + return cmvn[0], cmvn[1] diff --git a/wenet/utils/common.cpython-38-x86_64-linux-gnu.so b/wenet/utils/common.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..c79034e27e4ea53fa3494e8bdfdb0fb5ae2c6fc6 --- /dev/null +++ b/wenet/utils/common.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f65e0e494a4b6606d030618584725497d2128b93f249006e28fcd8869deaf92 +size 1319320 diff --git a/wenet/utils/ctc_util.cpython-38-x86_64-linux-gnu.so b/wenet/utils/ctc_util.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..f44df0873561c4d13eb973374374552ba1cd5c19 --- /dev/null +++ b/wenet/utils/ctc_util.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:541fcb132b6cf217e53fffd15614c974622f3e28b665a851fb43d928387b3f92 +size 1057752 diff --git a/wenet/utils/executor.cpython-38-x86_64-linux-gnu.so b/wenet/utils/executor.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..fda548ad5231f9c9b89c20cb0a00a54e3c969316 --- /dev/null +++ b/wenet/utils/executor.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:417593caf90505d54a90eea11c45278d8193e896895e44a209b0a4741d854c92 +size 1957912 diff --git a/wenet/utils/mask.cpython-38-x86_64-linux-gnu.so b/wenet/utils/mask.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..2f59e9d8d7613f6c660b37de988e75e1cf761c7a --- /dev/null +++ b/wenet/utils/mask.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c24d803544574ac73daf71519b29926d6dbce47fba5c7d01068dcaf882bc3bf6 +size 983672 diff --git a/wenet/utils/scheduler.cpython-38-x86_64-linux-gnu.so b/wenet/utils/scheduler.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..f08f71daf83e31635f02f2eb1ea892e0fb9b4862 --- /dev/null +++ b/wenet/utils/scheduler.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f76fd1d22cd5dd82d2e6e60168aaa947691151b33670e51b819f2d863d4ffb3 +size 459744 diff --git a/xseg/dfl_xseg_api.cpython-38-x86_64-linux-gnu.so b/xseg/dfl_xseg_api.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..45b03ecf300f7377a08186693aee3d6e3d649e5f --- /dev/null +++ b/xseg/dfl_xseg_api.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f20b6ca257385d27be2079242895b5c3dba9c54ff5a9ae60c717693f7dc45982 +size 505632 diff --git a/xseg/xseg_211104_4790000.onnx b/xseg/xseg_211104_4790000.onnx new file mode 100644 index 0000000000000000000000000000000000000000..a2951f8345ad6b7c055d5a79362bbeedac9794cd --- /dev/null +++ b/xseg/xseg_211104_4790000.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8867760aeb83176e392d58c9ca123945cc36732c16763915b1d97f3acb0f1d64 +size 70324585 diff --git a/y_utils/__init__.py b/y_utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bfd8b1d1c5317d10ab6a65a58e02cad14bae6bd6 --- /dev/null +++ b/y_utils/__init__.py @@ -0,0 +1,9 @@ +#!/user/bin/env python +# coding=utf-8 +""" +@project : dhp-service +@author : huyi +@file : __init__.py.py +@ide : PyCharm +@time : 2021-08-18 16:29:13 +""" \ No newline at end of file diff --git a/y_utils/config.cpython-38-x86_64-linux-gnu.so b/y_utils/config.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..389cce5cd178dbe4525c213c8d74aeee6566f231 --- /dev/null +++ b/y_utils/config.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4ab5543f166c2957ead83d6e3dc08227196ae23e26456ee2d93e20bf223e2ed +size 938840 diff --git a/y_utils/lcr.cpython-38-x86_64-linux-gnu.so b/y_utils/lcr.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..29883a0e2c429d844b8a592b24b8db02dec61c9e --- /dev/null +++ b/y_utils/lcr.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc0b56827ff7c8a085e6b9d9b6f5ed14154eba06d0ce826869e4f4d55354acc1 +size 443008 diff --git a/y_utils/liblcr.so b/y_utils/liblcr.so new file mode 100644 index 0000000000000000000000000000000000000000..da6d206415beddcd7158001aeb2a5662471c1120 --- /dev/null +++ b/y_utils/liblcr.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:083c5efea750de193af50008cb1e7ca9959ff61224524c1d490e80963e72a169 +size 25712808 diff --git a/y_utils/logger.cpython-38-x86_64-linux-gnu.so b/y_utils/logger.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..91d819d017f890ee72220ddc2da73ea2de4f2e8d --- /dev/null +++ b/y_utils/logger.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da30ffc850398063969cb4ffecb77c1e6258c98cf1f512dbc9aebf2318c7f093 +size 444096 diff --git a/y_utils/md5.cpython-38-x86_64-linux-gnu.so b/y_utils/md5.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..19d0e9a600de1f31982ada30ef7be1d653870748 --- /dev/null +++ b/y_utils/md5.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c881874fb1de10b45dee2bb42a6ba62b3ee502575d78312535c6deb7931d527a +size 567408 diff --git a/y_utils/time_utils.cpython-38-x86_64-linux-gnu.so b/y_utils/time_utils.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..107b9bb90c61111eb6c19d476cef0bb990d03fde --- /dev/null +++ b/y_utils/time_utils.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f5e75839b5b9b75fdd5c91b08da8e45862e0fd5ceb78ce7f0ce99252039644b +size 528776 diff --git a/y_utils/tools.cpython-38-x86_64-linux-gnu.so b/y_utils/tools.cpython-38-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..0325b92e24c83de2b09c3e1f9a1780ab1710b3a8 --- /dev/null +++ b/y_utils/tools.cpython-38-x86_64-linux-gnu.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddd26efd5171f0e0a6b35e67dd70750306bb2bf419847ce110062fc9ca7b05aa +size 375008