huaichang commited on Dec 13, 2025

Commit

ba25f75

verified ·

1 Parent(s): c2b2486

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.DS_Store +0 -0
.gitattributes +6 -0
README.md +0 -3
configs/.DS_Store +0 -0
configs/inference/inference_stage3.yaml +47 -0
configs/prompts/personalive_offline.yaml +13 -0
configs/prompts/personalive_online.yaml +28 -0
demo/driving_video.mp4 +3 -0
demo/ref_image.png +3 -0
pose2vid_offline.py +254 -0
pose2vid_online.py +323 -0
pretrained_weights/.DS_Store +0 -0
pretrained_weights/onnx/.DS_Store +0 -0
pretrained_weights/onnx/unet_opt/unet_opt.onnx +3 -0
pretrained_weights/onnx/unet_opt/unet_opt.onnx.data +3 -0
pretrained_weights/personalive/denoising_unet.pth +3 -0
pretrained_weights/personalive/motion_encoder.pth +3 -0
pretrained_weights/personalive/motion_extractor.pth +3 -0
pretrained_weights/personalive/pose_guider.pth +3 -0
pretrained_weights/personalive/reference_unet.pth +3 -0
pretrained_weights/personalive/temporal_module.pth +3 -0
pretrained_weights/tensorrt/.DS_Store +0 -0
pretrained_weights/tensorrt/unet_work(H100).engine +3 -0
results/20251209--personalive_offline/concat_vid/ref_image_driving_video.mp4 +3 -0
results/20251209--personalive_offline/split_vid/ref_image_driving_video.mp4 +3 -0
src/.DS_Store +0 -0
src/__pycache__/wrapper.cpython-310.pyc +0 -0
src/__pycache__/wrapper_trt.cpython-310.pyc +0 -0
src/liveportrait/__pycache__/camera.cpython-310.pyc +0 -0
src/liveportrait/__pycache__/camera.cpython-39.pyc +0 -0
src/liveportrait/__pycache__/convnextv2.cpython-310.pyc +0 -0
src/liveportrait/__pycache__/convnextv2.cpython-39.pyc +0 -0
src/liveportrait/__pycache__/motion_extractor.cpython-310.pyc +0 -0
src/liveportrait/__pycache__/motion_extractor.cpython-39.pyc +0 -0
src/liveportrait/__pycache__/util.cpython-310.pyc +0 -0
src/liveportrait/__pycache__/util.cpython-39.pyc +0 -0
src/liveportrait/camera.py +73 -0
src/liveportrait/convnextv2.py +216 -0
src/liveportrait/motion_extractor.py +212 -0
src/liveportrait/util.py +492 -0
src/modeling/__pycache__/engine_model.cpython-310.pyc +0 -0
src/modeling/__pycache__/framed_models.cpython-310.pyc +0 -0
src/modeling/__pycache__/onnx_export.cpython-310.pyc +0 -0
src/modeling/engine_model.py +308 -0
src/modeling/framed_models.py +177 -0
src/modeling/onnx_export.py +102 -0
src/models/__pycache__/attention.cpython-310.pyc +0 -0
src/models/__pycache__/attention.cpython-39.pyc +0 -0
src/models/__pycache__/motion_module.cpython-310.pyc +0 -0
src/models/__pycache__/motion_module.cpython-39.pyc +0 -0

.DS_Store ADDED Viewed

Binary file (8.2 kB). View file

.gitattributes CHANGED Viewed

@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+demo/driving_video.mp4 filter=lfs diff=lfs merge=lfs -text
+demo/ref_image.png filter=lfs diff=lfs merge=lfs -text
+pretrained_weights/onnx/unet_opt/unet_opt.onnx.data filter=lfs diff=lfs merge=lfs -text
+pretrained_weights/tensorrt/unet_work(H100).engine filter=lfs diff=lfs merge=lfs -text
+results/20251209--personalive_offline/concat_vid/ref_image_driving_video.mp4 filter=lfs diff=lfs merge=lfs -text
+results/20251209--personalive_offline/split_vid/ref_image_driving_video.mp4 filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,3 +0,0 @@
----
-license: apache-2.0
----

configs/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

configs/inference/inference_stage3.yaml ADDED Viewed

	@@ -0,0 +1,47 @@

+unet_additional_kwargs:
+  use_inflated_groupnorm: true
+  unet_use_cross_frame_attention: false
+  unet_use_temporal_attention: false
+  use_motion_module: true
+  motion_module_resolutions:
+  - 1
+  - 2
+  - 4
+  - 8
+  motion_module_mid_block: true
+  motion_module_decoder_only: false
+  motion_module_type: Vanilla
+  motion_module_kwargs:
+    num_attention_heads: 8
+    num_transformer_block: 1
+    cross_attention_dim: 16
+    attention_block_types:
+    - Spatial_Cross
+    - Spatial_Cross
+    temporal_position_encoding: false
+    temporal_position_encoding_max_len: 32
+    temporal_attention_dim_div: 1
+  use_temporal_module: true
+  temporal_module_type: Vanilla
+  temporal_module_kwargs:
+    num_attention_heads: 8
+    num_transformer_block: 1
+    attention_block_types:
+    - Temporal_Self
+    - Temporal_Self
+    temporal_position_encoding: true
+    temporal_position_encoding_max_len: 32
+    temporal_attention_dim_div: 1
+noise_scheduler_kwargs:
+  beta_start: 0.00085
+  beta_end: 0.02
+  beta_schedule: "scaled_linear"
+  clip_sample: false
+  steps_offset: 1
+  prediction_type: "epsilon"
+  timestep_spacing: "trailing"
+sampler: DDIM

configs/prompts/personalive_offline.yaml ADDED Viewed

	@@ -0,0 +1,13 @@

+pretrained_base_model_path: '/public_hw/home/cit_xdcun/zyli/x-nemo-inference/pretrained_weights/sd-image-variations-diffusers'
+image_encoder_path: '/public_hw/home/cit_xdcun/zyli/x-nemo-inference/pretrained_weights/sd-image-variations-diffusers/image_encoder'
+vae_path: '/public_hw/home/cit_xdcun/zyli/x-nemo-inference/pretrained_weights/sd-vae-ft-mse'
+vae_tiny_path: '/public_hw/home/cit_xdcun/zyli/x-nemo-inference/pretrained_weights/taesd'
+denoising_unet_path: "./pretrained_weights/personalive/denoising_unet.pth"
+inference_config: "configs/inference/inference_stage3.yaml"
+weight_dtype: 'fp16'
+test_cases:
+  'demo/ref_image.png':
+  - 'demo/driving_video.mp4'

configs/prompts/personalive_online.yaml ADDED Viewed

	@@ -0,0 +1,28 @@

+batch_size: 1
+height: 512
+width: 512
+reference_image_height: 512
+reference_image_width: 512
+temporal_adaptive_step: 4
+temporal_window_size: 4
+num_inference_steps: 4
+dtype: "fp16"
+fps: 16
+vae_model_path: '/public_hw/home/cit_xdcun/zyli/x-nemo-inference/pretrained_weights/sd-vae-ft-mse'
+image_encoder_path: '/public_hw/home/cit_xdcun/zyli/x-nemo-inference/pretrained_weights/sd-image-variations-diffusers/image_encoder'
+pretrained_base_model_path: '/public_hw/home/cit_xdcun/zyli/x-nemo-inference/pretrained_weights/sd-image-variations-diffusers'
+reference_unet_weight_path: "./pretrained_weights/personalive/reference_unet.pth"
+denoising_unet_path: "./pretrained_weights/personalive/denoising_unet.pth"
+pose_guider_path: "./pretrained_weights/personalive/pose_guider.pth"
+motion_encoder_path: './pretrained_weights/personalive/motion_encoder.pth'
+temporal_module_path: "./pretrained_weights/personalive/temporal_module.pth"
+pose_encoder_path: './pretrained_weights/personalive/motion_extractor.pth'
+onnx_path: './pretrained_weights/onnx/unet/unet.onnx'
+onnx_opt_path: './pretrained_weights/onnx/unet_opt/unet_opt.onnx'
+tensorrt_target_model: './pretrained_weights/tensorrt/unet_work.engine'
+inference_config: "./configs/inference/inference_stage3.yaml"
+seed: 42

demo/driving_video.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a67895a319bf48323ba63d15050299908e2cd6d99f79f766033423eb53662e07
+size 2923884

demo/ref_image.png ADDED Viewed

Git LFS Details

SHA256: a0b1e353e33cda46135494c5625e689d9ffa42d65bfd83690dd0cd4449a74e3f
Pointer size: 131 Bytes
Size of remote file: 451 kB

pose2vid_offline.py ADDED Viewed

	@@ -0,0 +1,254 @@

+import argparse
+import os
+import sys
+from datetime import datetime
+import mediapipe as mp
+import numpy as np
+import cv2
+import torch
+from skimage.transform import resize
+from diffusers import AutoencoderKLTemporalDecoder, AutoencoderKL, AutoencoderTiny
+from src.scheduler.scheduler_ddim import DDIMScheduler
+import random
+from omegaconf import OmegaConf
+from PIL import Image
+from torchvision import transforms
+from transformers import CLIPVisionModelWithProjection
+from src.models.unet_2d_condition import UNet2DConditionModel
+from src.models.unet_3d import UNet3DConditionModel
+from src.pipelines.pipeline_pose2vid import Pose2VideoPipeline
+from src.utils.util import save_videos_grid, crop_face
+from decord import VideoReader
+from diffusers.utils.import_utils import is_xformers_available
+from src.models.motion_encoder.encoder import MotEncoder
+from src.liveportrait.motion_extractor import MotionExtractor
+from src.models.pose_guider import PoseGuider
+from tqdm import tqdm
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--config", type=str, default='configs/prompts/personalive_offline.yaml')
+    parser.add_argument("--name", type=str, default='personalive_offline')
+    parser.add_argument("-W", type=int, default=512)
+    parser.add_argument("-H", type=int, default=512)
+    parser.add_argument("-L", type=int, default=1500)
+    parser.add_argument("--seed", type=int, default=42)
+    parser.add_argument("--device", type=str, default="cuda")
+    args = parser.parse_args()
+    return args
+def main(args):
+    device = args.device
+    print('device', device)
+    config = OmegaConf.load(args.config)
+    if config.weight_dtype == "fp16":
+        weight_dtype = torch.float16
+    else:
+        weight_dtype = torch.float32
+    vae = AutoencoderKL.from_pretrained(config.vae_path).to(device, dtype=weight_dtype)
+    # if use tiny VAE
+    # vae_tiny = AutoencoderTiny.from_pretrained(config.vae_tiny_path).to(device, dtype=weight_dtype)
+    infer_config = OmegaConf.load(config.inference_config)
+    reference_unet = UNet2DConditionModel.from_pretrained(
+        config.pretrained_base_model_path,
+        subfolder="unet",
+    ).to(device=device, dtype=weight_dtype)
+    denoising_unet = UNet3DConditionModel.from_pretrained_2d(
+        config.pretrained_base_model_path,
+        "",
+        subfolder="unet",
+        unet_additional_kwargs=infer_config.unet_additional_kwargs,
+    ).to(dtype=weight_dtype, device=device)
+    motion_encoder = MotEncoder().to(dtype=weight_dtype, device=device).eval()
+    pose_guider = PoseGuider().to(device=device, dtype=weight_dtype)
+    pose_encoder = MotionExtractor(num_kp=21).to(device=device, dtype=weight_dtype).eval()
+    image_enc = CLIPVisionModelWithProjection.from_pretrained(
+        config.image_encoder_path
+    ).to(dtype=weight_dtype, device=device)
+    sched_kwargs = OmegaConf.to_container(
+        OmegaConf.load(config.inference_config).noise_scheduler_kwargs
+    )
+    scheduler = DDIMScheduler(**sched_kwargs)
+    generator = torch.manual_seed(args.seed)
+    width, height = args.W, args.H
+    # load pretrained weights
+    denoising_unet.load_state_dict(
+        torch.load(config.denoising_unet_path, map_location="cpu"), strict=False
+    )
+    reference_unet.load_state_dict(
+        torch.load(
+            config.denoising_unet_path.replace('denoising_unet', 'reference_unet'),
+            map_location="cpu",
+        ),
+        strict=True,
+    )
+    motion_encoder.load_state_dict(
+        torch.load(
+            config.denoising_unet_path.replace('denoising_unet', 'motion_encoder'),
+            map_location="cpu",
+        ),
+        strict=True,
+    )
+    pose_guider.load_state_dict(
+        torch.load(
+            config.denoising_unet_path.replace('denoising_unet', 'pose_guider'),
+            map_location="cpu",
+        ),
+        strict=True,
+    )
+    denoising_unet.load_state_dict(
+        torch.load(
+            config.denoising_unet_path.replace('denoising_unet', 'temporal_module'),
+            map_location="cpu",
+        ),
+        strict=False,
+    )
+    pose_encoder.load_state_dict(
+        torch.load(
+            config.denoising_unet_path.replace('denoising_unet', 'motion_extractor'),
+            map_location="cpu",
+        ),
+        strict=False,
+    )
+    if is_xformers_available():
+        reference_unet.enable_xformers_memory_efficient_attention()
+        denoising_unet.enable_xformers_memory_efficient_attention()
+    else:
+        raise ValueError(
+            "xformers is not available. Make sure it is installed correctly"
+        )
+    mp_face_mesh = mp.solutions.face_mesh
+    face_mesh = mp_face_mesh.FaceMesh(static_image_mode=True, max_num_faces=1)
+    pipe = Pose2VideoPipeline(
+        vae=vae,
+        # vae_tiny=vae_tiny,
+        image_encoder=image_enc,
+        reference_unet=reference_unet,
+        denoising_unet=denoising_unet,
+        motion_encoder=motion_encoder,
+        pose_encoder=pose_encoder,
+        pose_guider=pose_guider,
+        scheduler=scheduler,
+    )
+    pipe = pipe.to(device)
+    date_str = datetime.now().strftime("%Y%m%d")
+    if args.name is None:
+        time_str = datetime.now().strftime("%H%M")
+        save_dir_name = f"{date_str}--{time_str}"
+    else:
+        save_dir_name = f"{date_str}--{args.name}"
+    save_vid_dir = os.path.join('results', save_dir_name, 'concat_vid')
+    os.makedirs(save_vid_dir, exist_ok=True)
+    save_split_vid_dir = os.path.join('results', save_dir_name, 'split_vid')
+    os.makedirs(save_split_vid_dir, exist_ok=True)
+    pose_transform = transforms.Compose(
+        [transforms.Resize((height, width)), transforms.ToTensor()]
+    )
+    args.test_cases = OmegaConf.load(args.config)["test_cases"]
+    for ref_image_path in list(args.test_cases.keys()):
+        for pose_video_path in args.test_cases[ref_image_path]:
+            video_name = os.path.basename(pose_video_path).split(".")[0]
+            source_name = os.path.basename(ref_image_path).split(".")[0]
+            vid_name = f"{source_name}_{video_name}.mp4"
+            save_vid_path = os.path.join(save_vid_dir, vid_name)
+            print(save_vid_path)
+            if os.path.exists(save_vid_path):
+                continue
+            if ref_image_path.endswith('.mp4'):
+                src_vid = VideoReader(ref_image_path)
+                ref_img = src_vid[0].asnumpy()
+                ref_img = Image.fromarray(ref_img).convert("RGB")
+            else:
+                ref_img = Image.open(ref_image_path).convert("RGB")
+            control = VideoReader(pose_video_path)
+            video_length = min(len(control) // 4 * 4, args.L)
+            sel_idx = range(len(control))[:video_length]
+            control = control.get_batch([sel_idx]).asnumpy() # N, H, W, C
+            ref_image_pil = ref_img.copy()
+            ref_patch = crop_face(ref_image_pil, face_mesh)
+            ref_face_pil = Image.fromarray(ref_patch).convert("RGB")
+            size = args.H
+            generator = torch.Generator(device=device)
+            generator.manual_seed(42)
+            dri_faces = []
+            ori_pose_images = []
+            for idx_control, pose_image_pil in tqdm(enumerate(control[:video_length]), total=video_length, desc='cropping faces'):
+                pose_image_pil = Image.fromarray(pose_image_pil).convert("RGB")
+                ori_pose_images.append(pose_image_pil)
+                dri_face = crop_face(pose_image_pil, face_mesh)
+                dri_face_pil = Image.fromarray(dri_face).convert("RGB")
+                dri_faces.append(dri_face_pil)
+            face_tensor_list = []
+            ori_pose_tensor_list = []
+            ref_tensor_list = []
+            for idx, pose_image_pil in enumerate(ori_pose_images):
+                face_tensor_list.append(pose_transform(dri_faces[idx]))
+                ori_pose_tensor_list.append(pose_transform(pose_image_pil))
+                ref_tensor_list.append(pose_transform(ref_image_pil))
+            ref_tensor = torch.stack(ref_tensor_list, dim=0)  # (f, c, h, w)
+            ref_tensor = ref_tensor.transpose(0, 1).unsqueeze(0)  # (c, f, h, w)
+            face_tensor = torch.stack(face_tensor_list, dim=0)  # (f, c, h, w)
+            face_tensor = face_tensor.transpose(0, 1).unsqueeze(0)
+            ori_pose_tensor = torch.stack(ori_pose_tensor_list, dim=0)  # (f, c, h, w)
+            ori_pose_tensor = ori_pose_tensor.transpose(0, 1).unsqueeze(0)
+            gen_video = pipe(
+                ori_pose_images,
+                ref_image_pil,
+                dri_faces,
+                ref_face_pil,
+                width,
+                height,
+                len(dri_faces),
+                num_inference_steps=4,
+                guidance_scale=1.0,
+                generator=generator,
+                temporal_window_size = 4,
+                temporal_adaptive_step = 4,
+            ).videos
+            #Concat it with pose tensor
+            video = torch.cat([ref_tensor, face_tensor, ori_pose_tensor, gen_video], dim=0)
+            save_videos_grid(
+                video,
+                save_vid_path,
+                n_rows=4,
+                fps=25,
+            )
+            if True:
+                save_vid_path = save_vid_path.replace(save_vid_dir, save_split_vid_dir)
+                save_videos_grid(gen_video, save_vid_path, n_rows=1, fps=25, crf=18)
+if __name__ == "__main__":
+    args = parse_args()
+    main(args)

pose2vid_online.py ADDED Viewed

	@@ -0,0 +1,323 @@

+import os
+import signal
+import sys
+import json
+from fastapi import FastAPI, WebSocket, HTTPException, WebSocketDisconnect, UploadFile, File
+from fastapi.responses import JSONResponse
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.staticfiles import StaticFiles
+from fastapi import Request
+import markdown2
+import threading
+import logging
+import uuid
+import time
+from types import SimpleNamespace
+import asyncio
+import mimetypes
+import torch
+from webcam.config import config, Args
+from webcam.util import pil_to_frame, bytes_to_pil, is_firefox, bytes_to_tensor
+from webcam.connection_manager import ConnectionManager, ServerFullException
+import multiprocessing as mp
+use_trt = True
+if use_trt:
+    from webcam.vid2vid_trt import Pipeline
+else:
+    from webcam.vid2vid import Pipeline
+mimetypes.add_type("application/javascript", ".js")
+THROTTLE = 0.001
+class App:
+    def __init__(self, config: Args, pipeline: Pipeline):
+        self.args = config
+        self.pipeline = pipeline
+        self.app = FastAPI()
+        self.conn_manager = ConnectionManager()
+        self.produce_predictions_stop_event = None
+        self.produce_predictions_task = None
+        self.shutdown_event = asyncio.Event()
+        self.init_app()
+    def init_app(self):
+        self.app.add_middleware(
+            CORSMiddleware,
+            allow_origins=["*"],
+            allow_credentials=True,
+            allow_methods=["*"],
+            allow_headers=["*"],
+        )
+        @self.app.websocket("/api/ws/{user_id}")
+        async def websocket_endpoint(user_id: uuid.UUID, websocket: WebSocket):
+            try:
+                await self.conn_manager.connect(
+                    user_id, websocket, self.args.max_queue_size
+                )
+                sender_task = asyncio.create_task(push_results_to_client(user_id, websocket))
+                if self.produce_predictions_task is None or self.produce_predictions_task.done():
+                    start_prediction_thread(user_id)
+                await handle_websocket_input(user_id, websocket)
+            except ServerFullException as e:
+                logging.error(f"Server Full: {e}")
+            except WebSocketDisconnect:
+                logging.info(f"User disconnected: {user_id}")
+            except Exception as e:
+                logging.error(f"WS Error: {e}")
+            finally:
+                if 'sender_task' in locals():
+                    sender_task.cancel()
+                await self.conn_manager.disconnect(user_id, self.pipeline)
+                if self.produce_predictions_stop_event is not None:
+                    self.produce_predictions_stop_event.set()
+                logging.info(f"Cleaned up user: {user_id}")
+        async def handle_websocket_input(user_id: uuid.UUID, websocket: WebSocket):
+            if not self.conn_manager.check_user(user_id):
+                raise HTTPException(status_code=404, detail="User not found")
+            try:
+                while True:
+                    message = await websocket.receive()
+                    if "text" in message:
+                        try:
+                            text_data = message["text"]
+                            data = json.loads(text_data)
+                            status = data.get("status")
+                            if status == "pause":
+                                params = SimpleNamespace(**{"restart": True})
+                                await self.conn_manager.update_data(user_id, params)
+                            elif status == "resume":
+                                await self.conn_manager.send_json(user_id, {"status": "send_frame"})
+                        except Exception as e:
+                            logging.error(f"JSON Parse Error: {e}")
+                    elif "bytes" in message:
+                        image_data = message["bytes"]
+                        if len(image_data) > 0:
+                            input_tensor = bytes_to_tensor(image_data)
+                            params = SimpleNamespace()
+                            params.image = input_tensor
+                            self.pipeline.accept_new_params(params)
+            except WebSocketDisconnect:
+                raise
+            except Exception as e:
+                logging.error(f"Input Loop Error: {e}")
+                raise
+        async def push_results_to_client(user_id: uuid.UUID, websocket: WebSocket):
+            MIN_FPS = 10
+            MAX_FPS = 30
+            SMOOTHING = 0.8  # EMA smoothing factor
+            last_burst_time = time.time()
+            last_queue_size = 0
+            sleep_time = 1 / 40  # Initial guess
+            last_frame_time = None
+            frame_time_list = []
+            ema_frame_interval = sleep_time
+            try:
+                while True:
+                    queue_size = await self.conn_manager.get_output_queue_size(user_id)
+                    if queue_size > last_queue_size:
+                        current_burst_time = time.time()
+                        elapsed = current_burst_time - last_burst_time
+                        if queue_size > 0 and elapsed > 0:
+                            raw_interval = elapsed / queue_size
+                            ema_frame_interval = SMOOTHING * ema_frame_interval + (1 - SMOOTHING) * raw_interval
+                            sleep_time = min(max(ema_frame_interval, 1 / MAX_FPS), 1 / MIN_FPS)
+                        last_burst_time = current_burst_time
+                    last_queue_size = queue_size
+                    frame = await self.conn_manager.get_frame(user_id)
+                    if frame is None:
+                        await asyncio.sleep(0.001)
+                        continue
+                    await websocket.send_bytes(frame)
+                    if last_frame_time is None:
+                        last_frame_time = time.time()
+                    else:
+                        frame_time_list.append(time.time() - last_frame_time)
+                        if len(frame_time_list) > 100:
+                            frame_time_list.pop(0)
+                        last_frame_time = time.time()
+                    await asyncio.sleep(sleep_time)
+            except asyncio.CancelledError:
+                pass
+            except Exception as e:
+                logging.error(f"Push Result Error: {e}")
+        def start_prediction_thread(user_id):
+            self.produce_predictions_stop_event = threading.Event()
+            def prediction_loop(uid, loop, stop_event):
+                while not stop_event.is_set():
+                    images = self.pipeline.produce_outputs()
+                    if len(images) == 0:
+                        time.sleep(THROTTLE)
+                        continue
+                    frames = list(map(pil_to_frame, images))
+                    asyncio.run_coroutine_threadsafe(
+                        self.conn_manager.put_frames_to_output_queue(uid, frames),
+                        loop
+                    )
+            self.produce_predictions_task = asyncio.create_task(asyncio.to_thread(
+                prediction_loop, user_id, asyncio.get_running_loop(), self.produce_predictions_stop_event
+            ))
+        @self.app.get("/api/queue")
+        async def get_queue_size():
+            queue_size = self.conn_manager.get_user_count()
+            return JSONResponse({"queue_size": queue_size})
+        @self.app.get("/api/settings")
+        async def settings():
+            info_schema = pipeline.Info.schema()
+            info = pipeline.Info()
+            if info.page_content:
+                page_content = markdown2.markdown(info.page_content)
+            input_params = pipeline.InputParams.schema()
+            return JSONResponse(
+                {
+                    "info": info_schema,
+                    "input_params": input_params,
+                    "max_queue_size": self.args.max_queue_size,
+                    "page_content": page_content if info.page_content else "",
+                }
+            )
+        @self.app.post("/api/upload_reference_image")
+        async def upload_reference_image(ref_image: UploadFile = File(...)):
+            try:
+                data = await ref_image.read()
+                img = bytes_to_pil(data)
+                self.pipeline.fuse_reference(img)
+                return {"status": "ok"}
+            except Exception as e:
+                logging.error(f"Reference image error: {e}")
+                raise HTTPException(status_code=500, detail="Failed to process reference image")
+        if not os.path.exists("./demo_w_camera/frontend/public"):
+            os.makedirs("./demo_w_camera/frontend/public")
+        self.app.mount(
+            "/", StaticFiles(directory="./demo_w_camera/frontend/public", html=True), name="public"
+        )
+        @self.app.on_event("shutdown")
+        async def shutdown_event():
+            await self.cleanup()
+    async def cleanup(self):
+        print("[App] Starting cleanup process...")
+        self.shutdown_event.set()
+        if self.produce_predictions_stop_event is not None:
+            self.produce_predictions_stop_event.set()
+        if self.produce_predictions_task is not None:
+            self.produce_predictions_task.cancel()
+            try:
+                await self.produce_predictions_task
+            except asyncio.CancelledError:
+                pass
+        try:
+            await self.conn_manager.disconnect_all(self.pipeline)
+        except Exception as e:
+            print(f"[App] Error during disconnect_all: {e}")
+        print("[App] Cleanup completed")
+app_instance = None
+def signal_handler(signum, frame):
+    print(f"\n[Main] Received signal {signum}, shutting down gracefully...")
+    if app_instance:
+        import threading
+        def trigger_cleanup():
+            try:
+                loop = asyncio.new_event_loop()
+                asyncio.set_event_loop(loop)
+                loop.run_until_complete(app_instance.cleanup())
+                loop.close()
+            except Exception as e:
+                print(f"[Main] Error during cleanup: {e}")
+        cleanup_thread = threading.Thread(target=trigger_cleanup)
+        cleanup_thread.daemon = True
+        cleanup_thread.start()
+        cleanup_thread.join(timeout=5)
+    sys.exit(0)
+if __name__ == "__main__":
+    import uvicorn
+    signal.signal(signal.SIGINT, signal_handler)
+    signal.signal(signal.SIGTERM, signal_handler)
+    mp.set_start_method("spawn", force=True)
+    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+    pipeline = Pipeline(config, device)
+    app_obj = App(config, pipeline)
+    app = app_obj.app
+    app_instance = app_obj
+    print('init done')
+    try:
+        uvicorn.run(
+            app,
+            host=config.host,
+            port=config.port,
+            reload=config.reload,
+            ssl_certfile=config.ssl_certfile,
+            ssl_keyfile=config.ssl_keyfile,
+        )
+    except KeyboardInterrupt:
+        try:
+            import asyncio
+            loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
+            loop.run_until_complete(app_obj.cleanup())
+            loop.close()
+        except Exception as e:
+            print(f"[Main] Error during cleanup: {e}")
+        sys.exit(0)
+    except Exception as e:
+        print(f"[Main] Error: {e}")
+        sys.exit(1)

pretrained_weights/.DS_Store ADDED Viewed

Binary file (8.2 kB). View file

pretrained_weights/onnx/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

pretrained_weights/onnx/unet_opt/unet_opt.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:484aee7e8c45cddaac227b6ad331a88a77121dee0886f2152cc4bd0e9974b6fa
+size 96224343

pretrained_weights/onnx/unet_opt/unet_opt.onnx.data ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aa08ee8770f202be841e00f2bb94809c2ca6ca95ad8663c2917c4c6fa35d963e
+size 3593537864

pretrained_weights/personalive/denoising_unet.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d0446c4d2387f259d5f3c1ac54a5aefa93400f4672f942856bff2538df046162
+size 4927015578

pretrained_weights/personalive/motion_encoder.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ff7c6b0a84cd750046e7687f7a6f6bbc21317055bfcacef950ed347debae4d2c
+size 246719031

pretrained_weights/personalive/motion_extractor.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:251e6a94ad667a1d0c69526d292677165110ef7f0cf0f6d199f0e414e8aa0ca5
+size 112545506

pretrained_weights/personalive/pose_guider.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8b997db63343a6a5d489778172d9544bcccaf27e6756505dc6353d84e877269d
+size 4351790

pretrained_weights/personalive/reference_unet.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:85eb03e6c34fab69f9246ff14b3016789232e56dc4892d0581fea21a3a8480f6
+size 3438324340

pretrained_weights/personalive/temporal_module.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:295e8942a453adb48756432d99de103ecba9b840b5b8f6635a0687311cdff30e
+size 1817903018

pretrained_weights/tensorrt/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

pretrained_weights/tensorrt/unet_work(H100).engine ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:34bd6f7693300be8cf72a099f1160bfaedab7a677bcaf66f18ee33a5b871de50
+size 3697605036

results/20251209--personalive_offline/concat_vid/ref_image_driving_video.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9bf93d55acd386d689cda4588e636545219acf9910f1d6292eb6db0bed82c64b
+size 7700854

results/20251209--personalive_offline/split_vid/ref_image_driving_video.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a064eb1a2effcb3514450e157ec6903973bca4c1d50a888e9c94c0f40a397213
+size 7605688

src/.DS_Store ADDED Viewed

Binary file (8.2 kB). View file

src/__pycache__/wrapper.cpython-310.pyc ADDED Viewed

Binary file (11 kB). View file

src/__pycache__/wrapper_trt.cpython-310.pyc ADDED Viewed

Binary file (10.4 kB). View file

src/liveportrait/__pycache__/camera.cpython-310.pyc ADDED Viewed

Binary file (1.78 kB). View file

src/liveportrait/__pycache__/camera.cpython-39.pyc ADDED Viewed

Binary file (1.77 kB). View file

src/liveportrait/__pycache__/convnextv2.cpython-310.pyc ADDED Viewed

Binary file (6.19 kB). View file

src/liveportrait/__pycache__/convnextv2.cpython-39.pyc ADDED Viewed

Binary file (6.45 kB). View file

src/liveportrait/__pycache__/motion_extractor.cpython-310.pyc ADDED Viewed

Binary file (6.61 kB). View file

src/liveportrait/__pycache__/motion_extractor.cpython-39.pyc ADDED Viewed

Binary file (6.61 kB). View file

src/liveportrait/__pycache__/util.cpython-310.pyc ADDED Viewed

Binary file (15.7 kB). View file

src/liveportrait/__pycache__/util.cpython-39.pyc ADDED Viewed

Binary file (16.1 kB). View file

src/liveportrait/camera.py ADDED Viewed

	@@ -0,0 +1,73 @@

+# coding: utf-8
+"""
+functions for processing and transforming 3D facial keypoints
+"""
+import numpy as np
+import torch
+import torch.nn.functional as F
+PI = np.pi
+def headpose_pred_to_degree(pred):
+    """
+    pred: (bs, 66) or (bs, 1) or others
+    """
+    if pred.ndim > 1 and pred.shape[1] == 66:
+        # NOTE: note that the average is modified to 97.5
+        device = pred.device
+        idx_tensor = [idx for idx in range(0, 66)]
+        idx_tensor = torch.FloatTensor(idx_tensor).to(device)
+        pred = F.softmax(pred, dim=1)
+        degree = torch.sum(pred*idx_tensor, axis=1) * 3 - 97.5
+        return degree
+    return pred
+def get_rotation_matrix(pitch_, yaw_, roll_):
+    """ the input is in degree
+    """
+    # transform to radian
+    pitch = pitch_ / 180 * PI
+    yaw = yaw_ / 180 * PI
+    roll = roll_ / 180 * PI
+    device = pitch.device
+    if pitch.ndim == 1:
+        pitch = pitch.unsqueeze(1)
+    if yaw.ndim == 1:
+        yaw = yaw.unsqueeze(1)
+    if roll.ndim == 1:
+        roll = roll.unsqueeze(1)
+    # calculate the euler matrix
+    bs = pitch.shape[0]
+    ones = torch.ones([bs, 1]).to(device)
+    zeros = torch.zeros([bs, 1]).to(device)
+    x, y, z = pitch, yaw, roll
+    rot_x = torch.cat([
+        ones, zeros, zeros,
+        zeros, torch.cos(x), -torch.sin(x),
+        zeros, torch.sin(x), torch.cos(x)
+    ], dim=1).reshape([bs, 3, 3])
+    rot_y = torch.cat([
+        torch.cos(y), zeros, torch.sin(y),
+        zeros, ones, zeros,
+        -torch.sin(y), zeros, torch.cos(y)
+    ], dim=1).reshape([bs, 3, 3])
+    rot_z = torch.cat([
+        torch.cos(z), -torch.sin(z), zeros,
+        torch.sin(z), torch.cos(z), zeros,
+        zeros, zeros, ones
+    ], dim=1).reshape([bs, 3, 3])
+    rot = rot_z @ rot_y @ rot_x
+    return rot.permute(0, 2, 1)  # transpose

src/liveportrait/convnextv2.py ADDED Viewed

	@@ -0,0 +1,216 @@

+# coding: utf-8
+"""
+This moudle is adapted to the ConvNeXtV2 version for the extraction of implicit keypoints, poses, and expression deformation.
+"""
+import torch
+import torch.nn as nn
+# from timm.models.layers import trunc_normal_, DropPath
+from src.liveportrait.util import LayerNorm, DropPath, trunc_normal_, GRN
+from einops import rearrange
+__all__ = ['convnextv2_tiny']
+class Block(nn.Module):
+    """ ConvNeXtV2 Block.
+    Args:
+        dim (int): Number of input channels.
+        drop_path (float): Stochastic depth rate. Default: 0.0
+    """
+    def __init__(self, dim, drop_path=0.):
+        super().__init__()
+        self.dwconv = nn.Conv2d(dim, dim, kernel_size=7, padding=3, groups=dim)  # depthwise conv
+        self.norm = LayerNorm(dim, eps=1e-6)
+        self.pwconv1 = nn.Linear(dim, 4 * dim)  # pointwise/1x1 convs, implemented with linear layers
+        self.act = nn.GELU()
+        self.grn = GRN(4 * dim)
+        self.pwconv2 = nn.Linear(4 * dim, dim)
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+    def forward(self, x):
+        input = x
+        x = self.dwconv(x)
+        x = x.permute(0, 2, 3, 1)  # (N, C, H, W) -> (N, H, W, C)
+        x = self.norm(x)
+        x = self.pwconv1(x)
+        x = self.act(x)
+        x = self.grn(x)
+        x = self.pwconv2(x)
+        x = x.permute(0, 3, 1, 2)  # (N, H, W, C) -> (N, C, H, W)
+        x = input + self.drop_path(x)
+        return x
+class ConvNeXtV2(nn.Module):
+    """ ConvNeXt V2
+    Args:
+        in_chans (int): Number of input image channels. Default: 3
+        num_classes (int): Number of classes for classification head. Default: 1000
+        depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3]
+        dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768]
+        drop_path_rate (float): Stochastic depth rate. Default: 0.
+        head_init_scale (float): Init scaling value for classifier weights and biases. Default: 1.
+    """
+    def __init__(
+        self,
+        in_chans=3,
+        depths=[3, 3, 9, 3],
+        dims=[96, 192, 384, 768],
+        drop_path_rate=0.,
+        **kwargs
+    ):
+        super().__init__()
+        self.depths = depths
+        self.downsample_layers = nn.ModuleList()  # stem and 3 intermediate downsampling conv layers
+        stem = nn.Sequential(
+            nn.Conv2d(in_chans, dims[0], kernel_size=4, stride=4),
+            LayerNorm(dims[0], eps=1e-6, data_format="channels_first")
+        )
+        self.downsample_layers.append(stem)
+        for i in range(3):
+            downsample_layer = nn.Sequential(
+                LayerNorm(dims[i], eps=1e-6, data_format="channels_first"),
+                nn.Conv2d(dims[i], dims[i+1], kernel_size=2, stride=2),
+            )
+            self.downsample_layers.append(downsample_layer)
+        self.stages = nn.ModuleList()  # 4 feature resolution stages, each consisting of multiple residual blocks
+        dp_rates = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]
+        cur = 0
+        for i in range(4):
+            stage = nn.Sequential(
+                *[Block(dim=dims[i], drop_path=dp_rates[cur + j]) for j in range(depths[i])]
+            )
+            self.stages.append(stage)
+            cur += depths[i]
+        self.norm = nn.LayerNorm(dims[-1], eps=1e-6)  # final norm layer
+        # NOTE: the output semantic items
+        num_bins = kwargs.get('num_bins', 66)
+        num_kp = kwargs.get('num_kp', 24)  # the number of implicit keypoints
+        self.fc_kp = nn.Linear(dims[-1], 3 * num_kp)  # implicit keypoints
+        # print('dims[-1]: ', dims[-1])
+        self.fc_scale = nn.Linear(dims[-1], 1)  # scale
+        self.fc_pitch = nn.Linear(dims[-1], num_bins)  # pitch bins
+        self.fc_yaw = nn.Linear(dims[-1], num_bins)  # yaw bins
+        self.fc_roll = nn.Linear(dims[-1], num_bins)  # roll bins
+        self.fc_t = nn.Linear(dims[-1], 3)  # translation
+        self.fc_exp = nn.Linear(dims[-1], 3 * num_kp)  # expression / delta
+    def _init_weights(self, m):
+        if isinstance(m, (nn.Conv2d, nn.Linear)):
+            trunc_normal_(m.weight, std=.02)
+            nn.init.constant_(m.bias, 0)
+    def forward_features(self, x):
+        for i in range(4):
+            x = self.downsample_layers[i](x)
+            x = self.stages[i](x)
+        return self.norm(x.mean([-2, -1]))  # global average pooling, (N, C, H, W) -> (N, C)
+    def forward(self, x):
+        x = self.forward_features(x)
+        # implicit keypoints
+        kp = self.fc_kp(x)
+        # pose and expression deformation
+        pitch = self.fc_pitch(x)
+        yaw = self.fc_yaw(x)
+        roll = self.fc_roll(x)
+        t = self.fc_t(x)
+        # exp = self.fc_exp(x)
+        scale = self.fc_scale(x)
+        ret_dct = {
+            'pitch': pitch,
+            'yaw': yaw,
+            'roll': roll,
+            't': t,
+            # 'exp': exp,
+            'scale': scale,
+            'kp': kp,  # canonical keypoint
+        }
+        return ret_dct
+def convnextv2_tiny(**kwargs):
+    model = ConvNeXtV2(depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], **kwargs)
+    return model
+class ConvNeXt(nn.Module):
+    """ ConvNeXt V2
+    Args:
+        in_chans (int): Number of input image channels. Default: 3
+        num_classes (int): Number of classes for classification head. Default: 1000
+        depths (tuple(int)): Number of blocks at each stage. Default: [3, 3, 9, 3]
+        dims (int): Feature dimension at each stage. Default: [96, 192, 384, 768]
+        drop_path_rate (float): Stochastic depth rate. Default: 0.
+        head_init_scale (float): Init scaling value for classifier weights and biases. Default: 1.
+    """
+    def __init__(
+        self,
+        in_chans=3,
+        depths=[3, 3, 9, 3],
+        dims=[96, 192, 384, 768],
+        drop_path_rate=0.,
+        **kwargs
+    ):
+        super().__init__()
+        self.depths = depths
+        self.downsample_layers = nn.ModuleList()  # stem and 3 intermediate downsampling conv layers
+        stem = nn.Sequential(
+            nn.Conv2d(in_chans, dims[0], kernel_size=4, stride=4),
+            LayerNorm(dims[0], eps=1e-6, data_format="channels_first")
+        )
+        self.downsample_layers.append(stem)
+        for i in range(3):
+            downsample_layer = nn.Sequential(
+                LayerNorm(dims[i], eps=1e-6, data_format="channels_first"),
+                nn.Conv2d(dims[i], dims[i+1], kernel_size=2, stride=2),
+            )
+            self.downsample_layers.append(downsample_layer)
+        self.stages = nn.ModuleList()  # 4 feature resolution stages, each consisting of multiple residual blocks
+        dp_rates = [x.item() for x in torch.linspace(0, drop_path_rate, sum(depths))]
+        cur = 0
+        for i in range(4):
+            stage = nn.Sequential(
+                *[Block(dim=dims[i], drop_path=dp_rates[cur + j]) for j in range(depths[i])]
+            )
+            self.stages.append(stage)
+            cur += depths[i]
+        self.norm = nn.LayerNorm(dims[-1], eps=1e-6)  # final norm layer
+    def _init_weights(self, m):
+        if isinstance(m, (nn.Conv2d, nn.Linear)):
+            trunc_normal_(m.weight, std=.02)
+            nn.init.constant_(m.bias, 0)
+    def forward_features(self, x):
+        for i in range(4):
+            x = self.downsample_layers[i](x)
+            x = self.stages[i](x)
+        return self.norm(x.mean([-2, -1]))  # global average pooling, (N, C, H, W) -> (N, C)
+    def forward(self, x):
+        x = self.forward_features(x)
+        return x
+def convnextv2(**kwargs):
+    model = ConvNeXt(depths=[3, 3, 9, 3], dims=[96, 192, 384, 768], **kwargs)
+    return model

src/liveportrait/motion_extractor.py ADDED Viewed

	@@ -0,0 +1,212 @@

+# coding: utf-8
+"""
+Motion extractor(M), which directly predicts the canonical keypoints, head pose and expression deformation of the input image
+"""
+from torch import nn
+import torch
+from diffusers.models.modeling_utils import ModelMixin
+from src.liveportrait.convnextv2 import convnextv2_tiny
+from src.liveportrait.util import filter_state_dict
+from src.liveportrait.camera import headpose_pred_to_degree, get_rotation_matrix
+model_dict = {
+    'convnextv2_tiny': convnextv2_tiny,
+}
+class MotionExtractor(ModelMixin):
+    def __init__(self, **kwargs):
+        super(MotionExtractor, self).__init__()
+        # default is convnextv2_base
+        backbone = kwargs.get('backbone', 'convnextv2_tiny')
+        self.detector = model_dict.get(backbone)(**kwargs)
+        self.register_buffer('idx_tensor', torch.arange(66, dtype=torch.float32))
+    def headpose_pred_to_degree(self, pred):
+        """
+        pred: (bs, 66) or (bs, 1) or others
+        """
+        if pred.ndim > 1 and pred.shape[1] == 66:
+            # NOTE: note that the average is modified to 97.5
+            prob = torch.nn.functional.softmax(pred, dim=1)
+            degree = torch.matmul(prob, self.idx_tensor)
+            degree = degree * 3 - 97.5
+            return degree
+        return pred
+    def load_pretrained(self, init_path: str):
+        if init_path not in (None, ''):
+            state_dict = torch.load(init_path, map_location=lambda storage, loc: storage)['model']
+            state_dict = filter_state_dict(state_dict, remove_name='head')
+            ret = self.detector.load_state_dict(state_dict, strict=False)
+            print(f'Load pretrained model from {init_path}, ret: {ret}')
+    def forward(self, x):
+        kp_info = self.detector(x)
+        return self.get_kp(kp_info)
+    def get_kp(self, kp_info):
+        bs = kp_info['kp'].shape[0]
+        angles_raw = torch.cat([kp_info['pitch'], kp_info['yaw'], kp_info['roll']], dim=0) # (3, 66)
+        angles_deg = self.headpose_pred_to_degree(angles_raw)[:, None] # (B, 3)
+        pitch, yaw, roll = torch.chunk(angles_deg, chunks=3, dim=0)
+        kp = kp_info['kp'].reshape(bs, -1, 3)  # BxNx3
+        t, scale = kp_info['t'], kp_info['scale']
+        rot_mat = get_rotation_matrix(pitch, yaw, roll).to(self.dtype)    # (bs, 3, 3)
+        if kp.ndim == 2:
+            num_kp = kp.shape[1] // 3  # Bx(num_kpx3)
+        else:
+            num_kp = kp.shape[1]  # Bxnum_kpx3
+        # Eqn.2: s * (R * x_c,s) + t
+        kp_transformed = kp.view(bs, num_kp, 3) @ rot_mat# + exp.view(bs, num_kp, 3)
+        kp_transformed *= scale[..., None]  # (bs, k, 3) * (bs, 1, 1) = (bs, k, 3)
+        kp_transformed[:, :, 0:2] += t[:, None, 0:2]  # remove z, only apply tx ty
+        return kp_transformed
+    def interpolate_tensors(self, a: torch.Tensor, b: torch.Tensor, num: int = 10) -> torch.Tensor:
+        if a.shape != b.shape:
+            raise ValueError(f"Shape mismatch: a.shape={a.shape}, b.shape={b.shape}")
+        B, *rest = a.shape
+        alphas = torch.linspace(0, 1, num, device=a.device, dtype=a.dtype)
+        view_shape = (num,) + (1,) * len(rest)
+        alphas = alphas.view(view_shape)  # (1, num, 1, 1, ...)
+        result = (1 - alphas) * a + alphas * b
+        return result[:-1]
+    def interpolate_kps(self, ref, motion, num_interp, t_scale=0.5, s_scale=0):
+        kp1 = self.detector(ref.to(self.dtype))
+        kp2_list = []
+        for i in range(0, motion.shape[0], 256):
+            motion_chunk = motion[i:i+256]
+            kp2_chunk = self.detector(motion_chunk.to(self.dtype))
+            kp2_list.append(kp2_chunk)
+        kp2 = {}
+        for key in kp2_list[0].keys():
+            kp2[key] = torch.cat([kp2_chunk[key] for kp2_chunk in kp2_list], dim=0)
+        angles_raw = torch.cat([kp1['pitch'], kp1['yaw'], kp1['roll']], dim=0) # (3, 66)
+        angles_deg = self.headpose_pred_to_degree(angles_raw) # (B, 3)
+        pitch_1, yaw_1, roll_1 = torch.chunk(angles_deg, chunks=3, dim=0)
+        angles_raw = torch.cat([kp2['pitch'], kp2['yaw'], kp2['roll']], dim=0) # (3, 66)
+        angles_deg = self.headpose_pred_to_degree(angles_raw) # (B, 3)
+        pitch_2, yaw_2, roll_2 = torch.chunk(angles_deg, chunks=3, dim=0)
+        pitch_interp = self.interpolate_tensors(pitch_1, pitch_2[:1], num_interp)  # Bx(num_interp)x1
+        yaw_interp = self.interpolate_tensors(yaw_1, yaw_2[:1], num_interp) # Bx(num_interp)x1
+        roll_interp = self.interpolate_tensors(roll_1, roll_2[:1], num_interp)  # Bx(num_interp)x1
+        t_1 = kp1['t']
+        t_2 = kp2['t']
+        t_2 = (t_2 - t_2[0]) * t_scale + t_1
+        t_interp = self.interpolate_tensors(t_1, t_2[:1], num_interp)
+        s_1 = kp1['scale']
+        s_2 = kp2['scale']
+        s_2 = s_2 * s_scale + s_1
+        s_interp = self.interpolate_tensors(s_1, s_2[:1], num_interp)
+        kp = kp1['kp'].repeat(num_interp+motion.shape[0]-1, 1)
+        kps_interp = {
+            'pitch': torch.cat([pitch_interp, pitch_2], dim=0),
+            'yaw': torch.cat([yaw_interp, yaw_2], dim=0),
+            'roll': torch.cat([roll_interp, roll_2], dim=0),
+            't': torch.cat([t_interp, t_2], dim=0),
+            'scale': torch.cat([s_interp, s_2], dim=0),
+            'kp': kp
+        }
+        kp_intrep = self.get_kp(kps_interp)
+        return kp_intrep
+    def interpolate_kps_online(self, ref, motion, num_interp, t_scale=0.5, s_scale=0):
+        kp1 = self.detector(ref.to(self.dtype))
+        kp_frame1 = self.detector(motion[:1].to(self.dtype))
+        kp2 = self.detector(motion.to(self.dtype))
+        angles_raw = torch.cat([kp1['pitch'], kp1['yaw'], kp1['roll']], dim=0) # (3, 66)
+        angles_deg = self.headpose_pred_to_degree(angles_raw) # (B, 3)
+        pitch_1, yaw_1, roll_1 = torch.chunk(angles_deg, chunks=3, dim=0)
+        angles_raw = torch.cat([kp2['pitch'], kp2['yaw'], kp2['roll']], dim=0) # (3, 66)
+        angles_deg = self.headpose_pred_to_degree(angles_raw) # (B, 3)
+        pitch_2, yaw_2, roll_2 = torch.chunk(angles_deg, chunks=3, dim=0)
+        pitch_interp = self.interpolate_tensors(pitch_1, pitch_2[:1], num_interp)  # Bx(num_interp)x1
+        yaw_interp = self.interpolate_tensors(yaw_1, yaw_2[:1], num_interp) # Bx(num_interp)x1
+        roll_interp = self.interpolate_tensors(roll_1, roll_2[:1], num_interp)  # Bx(num_interp)x1
+        t_1 = kp1['t']
+        t_2 = kp2['t']
+        t_2 = (t_2 - t_2[0]) * t_scale + t_1
+        t_interp = self.interpolate_tensors(t_1, t_2[:1], num_interp)
+        s_1 = kp1['scale']
+        s_2 = kp2['scale']
+        s_2 = s_2 * s_scale + s_1
+        s_interp = self.interpolate_tensors(s_1, s_2[:1], num_interp)
+        kp = kp1['kp'].repeat(num_interp+motion.shape[0]-1, 1)
+        kps_interp = {
+            'pitch': torch.cat([pitch_interp, pitch_2], dim=0),
+            'yaw': torch.cat([yaw_interp, yaw_2], dim=0),
+            'roll': torch.cat([roll_interp, roll_2], dim=0),
+            't': torch.cat([t_interp, t_2], dim=0),
+            'scale': torch.cat([s_interp, s_2], dim=0),
+            'kp': kp
+        }
+        kp_intrep = self.get_kp(kps_interp)
+        kp_dri = self.get_kp(kp2)
+        return kp_intrep, kp1, kp_frame1, kp_dri
+    def get_kps(self, kp_ref, kp_frame1, motion, t_scale=0.5, s_scale=0):
+        kps_motion = self.detector(motion.to(self.dtype))
+        kps_dri = self.get_kp(kps_motion)
+        t_ref = kp_ref['t']
+        t_frame1 = kp_frame1['t']
+        t_motion = kps_motion['t']
+        kps_motion['t'] = (t_motion - t_frame1) * t_scale + t_ref
+        s_ref = kp_ref['scale']
+        s_motion = kps_motion['scale']
+        kps_motion['scale'] = s_motion * s_scale + s_ref
+        kps_motion['kp'] = kp_ref['kp'].repeat(motion.shape[0], 1)
+        kps_motion = self.get_kp(kps_motion)
+        return kps_motion, kps_dri
+    def inference(self, ref, motion):
+        kps_ref = self.detector(ref.to(self.dtype))
+        kps_motion = self.detector(motion.to(self.dtype))
+        kps_motion['kp'] = kps_ref['kp']
+        kp_s = self.get_kp(kps_ref)
+        kp_d = self.get_kp(kps_motion)
+        return kp_s, kp_d

src/liveportrait/util.py ADDED Viewed

	@@ -0,0 +1,492 @@

+# coding: utf-8
+"""
+This file defines various neural network modules and utility functions, including convolutional and residual blocks,
+normalizations, and functions for spatial transformation and tensor manipulation.
+"""
+from torch import nn
+import torch.nn.functional as F
+import torch
+import torch.nn.utils.spectral_norm as spectral_norm
+import math
+import warnings
+import collections.abc
+from itertools import repeat
+def kp2gaussian(kp, spatial_size, kp_variance):
+    """
+    Transform a keypoint into gaussian like representation
+    """
+    mean = kp
+    coordinate_grid = make_coordinate_grid(spatial_size, mean)
+    number_of_leading_dimensions = len(mean.shape) - 1
+    shape = (1,) * number_of_leading_dimensions + coordinate_grid.shape
+    coordinate_grid = coordinate_grid.view(*shape)
+    repeats = mean.shape[:number_of_leading_dimensions] + (1, 1, 1, 1)
+    coordinate_grid = coordinate_grid.repeat(*repeats)
+    # Preprocess kp shape
+    shape = mean.shape[:number_of_leading_dimensions] + (1, 1, 1, 3)
+    mean = mean.view(*shape)
+    mean_sub = (coordinate_grid - mean)
+    out = torch.exp(-0.5 * (mean_sub ** 2).sum(-1) / kp_variance)
+    return out
+def make_coordinate_grid(spatial_size, ref, **kwargs):
+    d, h, w = spatial_size
+    x = torch.arange(w).type(ref.dtype).to(ref.device)
+    y = torch.arange(h).type(ref.dtype).to(ref.device)
+    z = torch.arange(d).type(ref.dtype).to(ref.device)
+    # NOTE: must be right-down-in
+    x = (2 * (x / (w - 1)) - 1)  # the x axis faces to the right
+    y = (2 * (y / (h - 1)) - 1)  # the y axis faces to the bottom
+    z = (2 * (z / (d - 1)) - 1)  # the z axis faces to the inner
+    yy = y.view(1, -1, 1).repeat(d, 1, w)
+    xx = x.view(1, 1, -1).repeat(d, h, 1)
+    zz = z.view(-1, 1, 1).repeat(1, h, w)
+    meshed = torch.cat([xx.unsqueeze_(3), yy.unsqueeze_(3), zz.unsqueeze_(3)], 3)
+    return meshed
+class ConvT2d(nn.Module):
+    """
+    Upsampling block for use in decoder.
+    """
+    def __init__(self, in_features, out_features, kernel_size=3, stride=2, padding=1, output_padding=1):
+        super(ConvT2d, self).__init__()
+        self.convT = nn.ConvTranspose2d(in_features, out_features, kernel_size=kernel_size, stride=stride,
+                                        padding=padding, output_padding=output_padding)
+        self.norm = nn.InstanceNorm2d(out_features)
+    def forward(self, x):
+        out = self.convT(x)
+        out = self.norm(out)
+        out = F.leaky_relu(out)
+        return out
+class ResBlock3d(nn.Module):
+    """
+    Res block, preserve spatial resolution.
+    """
+    def __init__(self, in_features, kernel_size, padding):
+        super(ResBlock3d, self).__init__()
+        self.conv1 = nn.Conv3d(in_channels=in_features, out_channels=in_features, kernel_size=kernel_size, padding=padding)
+        self.conv2 = nn.Conv3d(in_channels=in_features, out_channels=in_features, kernel_size=kernel_size, padding=padding)
+        self.norm1 = nn.BatchNorm3d(in_features, affine=True)
+        self.norm2 = nn.BatchNorm3d(in_features, affine=True)
+    def forward(self, x):
+        out = self.norm1(x)
+        out = F.relu(out)
+        out = self.conv1(out)
+        out = self.norm2(out)
+        out = F.relu(out)
+        out = self.conv2(out)
+        out += x
+        return out
+class UpBlock3d(nn.Module):
+    """
+    Upsampling block for use in decoder.
+    """
+    def __init__(self, in_features, out_features, kernel_size=3, padding=1, groups=1):
+        super(UpBlock3d, self).__init__()
+        self.conv = nn.Conv3d(in_channels=in_features, out_channels=out_features, kernel_size=kernel_size,
+                              padding=padding, groups=groups)
+        self.norm = nn.BatchNorm3d(out_features, affine=True)
+    def forward(self, x):
+        out = F.interpolate(x, scale_factor=(1, 2, 2))
+        out = self.conv(out)
+        out = self.norm(out)
+        out = F.relu(out)
+        return out
+class DownBlock2d(nn.Module):
+    """
+    Downsampling block for use in encoder.
+    """
+    def __init__(self, in_features, out_features, kernel_size=3, padding=1, groups=1):
+        super(DownBlock2d, self).__init__()
+        self.conv = nn.Conv2d(in_channels=in_features, out_channels=out_features, kernel_size=kernel_size, padding=padding, groups=groups)
+        self.norm = nn.BatchNorm2d(out_features, affine=True)
+        self.pool = nn.AvgPool2d(kernel_size=(2, 2))
+    def forward(self, x):
+        out = self.conv(x)
+        out = self.norm(out)
+        out = F.relu(out)
+        out = self.pool(out)
+        return out
+class DownBlock3d(nn.Module):
+    """
+    Downsampling block for use in encoder.
+    """
+    def __init__(self, in_features, out_features, kernel_size=3, padding=1, groups=1):
+        super(DownBlock3d, self).__init__()
+        '''
+        self.conv = nn.Conv3d(in_channels=in_features, out_channels=out_features, kernel_size=kernel_size,
+                                padding=padding, groups=groups, stride=(1, 2, 2))
+        '''
+        self.conv = nn.Conv3d(in_channels=in_features, out_channels=out_features, kernel_size=kernel_size,
+                              padding=padding, groups=groups)
+        self.norm = nn.BatchNorm3d(out_features, affine=True)
+        self.pool = nn.AvgPool3d(kernel_size=(1, 2, 2))
+    def forward(self, x):
+        out = self.conv(x)
+        out = self.norm(out)
+        out = F.relu(out)
+        out = self.pool(out)
+        return out
+class SameBlock2d(nn.Module):
+    """
+    Simple block, preserve spatial resolution.
+    """
+    def __init__(self, in_features, out_features, groups=1, kernel_size=3, padding=1, lrelu=False):
+        super(SameBlock2d, self).__init__()
+        self.conv = nn.Conv2d(in_channels=in_features, out_channels=out_features, kernel_size=kernel_size, padding=padding, groups=groups)
+        self.norm = nn.BatchNorm2d(out_features, affine=True)
+        if lrelu:
+            self.ac = nn.LeakyReLU()
+        else:
+            self.ac = nn.ReLU()
+    def forward(self, x):
+        out = self.conv(x)
+        out = self.norm(out)
+        out = self.ac(out)
+        return out
+class Encoder(nn.Module):
+    """
+    Hourglass Encoder
+    """
+    def __init__(self, block_expansion, in_features, num_blocks=3, max_features=256):
+        super(Encoder, self).__init__()
+        down_blocks = []
+        for i in range(num_blocks):
+            down_blocks.append(DownBlock3d(in_features if i == 0 else min(max_features, block_expansion * (2 ** i)), min(max_features, block_expansion * (2 ** (i + 1))), kernel_size=3, padding=1))
+        self.down_blocks = nn.ModuleList(down_blocks)
+    def forward(self, x):
+        outs = [x]
+        for down_block in self.down_blocks:
+            outs.append(down_block(outs[-1]))
+        return outs
+class Decoder(nn.Module):
+    """
+    Hourglass Decoder
+    """
+    def __init__(self, block_expansion, in_features, num_blocks=3, max_features=256):
+        super(Decoder, self).__init__()
+        up_blocks = []
+        for i in range(num_blocks)[::-1]:
+            in_filters = (1 if i == num_blocks - 1 else 2) * min(max_features, block_expansion * (2 ** (i + 1)))
+            out_filters = min(max_features, block_expansion * (2 ** i))
+            up_blocks.append(UpBlock3d(in_filters, out_filters, kernel_size=3, padding=1))
+        self.up_blocks = nn.ModuleList(up_blocks)
+        self.out_filters = block_expansion + in_features
+        self.conv = nn.Conv3d(in_channels=self.out_filters, out_channels=self.out_filters, kernel_size=3, padding=1)
+        self.norm = nn.BatchNorm3d(self.out_filters, affine=True)
+    def forward(self, x):
+        out = x.pop()
+        for up_block in self.up_blocks:
+            out = up_block(out)
+            skip = x.pop()
+            out = torch.cat([out, skip], dim=1)
+        out = self.conv(out)
+        out = self.norm(out)
+        out = F.relu(out)
+        return out
+class Hourglass(nn.Module):
+    """
+    Hourglass architecture.
+    """
+    def __init__(self, block_expansion, in_features, num_blocks=3, max_features=256):
+        super(Hourglass, self).__init__()
+        self.encoder = Encoder(block_expansion, in_features, num_blocks, max_features)
+        self.decoder = Decoder(block_expansion, in_features, num_blocks, max_features)
+        self.out_filters = self.decoder.out_filters
+    def forward(self, x):
+        return self.decoder(self.encoder(x))
+class SPADE(nn.Module):
+    def __init__(self, norm_nc, label_nc):
+        super().__init__()
+        self.param_free_norm = nn.InstanceNorm2d(norm_nc, affine=False)
+        nhidden = 128
+        self.mlp_shared = nn.Sequential(
+            nn.Conv2d(label_nc, nhidden, kernel_size=3, padding=1),
+            nn.ReLU())
+        self.mlp_gamma = nn.Conv2d(nhidden, norm_nc, kernel_size=3, padding=1)
+        self.mlp_beta = nn.Conv2d(nhidden, norm_nc, kernel_size=3, padding=1)
+    def forward(self, x, segmap):
+        normalized = self.param_free_norm(x)
+        segmap = F.interpolate(segmap, size=x.size()[2:], mode='nearest')
+        actv = self.mlp_shared(segmap)
+        gamma = self.mlp_gamma(actv)
+        beta = self.mlp_beta(actv)
+        out = normalized * (1 + gamma) + beta
+        return out
+class SPADEResnetBlock(nn.Module):
+    def __init__(self, fin, fout, norm_G, label_nc, use_se=False, dilation=1):
+        super().__init__()
+        # Attributes
+        self.learned_shortcut = (fin != fout)
+        fmiddle = min(fin, fout)
+        self.use_se = use_se
+        # create conv layers
+        self.conv_0 = nn.Conv2d(fin, fmiddle, kernel_size=3, padding=dilation, dilation=dilation)
+        self.conv_1 = nn.Conv2d(fmiddle, fout, kernel_size=3, padding=dilation, dilation=dilation)
+        if self.learned_shortcut:
+            self.conv_s = nn.Conv2d(fin, fout, kernel_size=1, bias=False)
+        # apply spectral norm if specified
+        if 'spectral' in norm_G:
+            self.conv_0 = spectral_norm(self.conv_0)
+            self.conv_1 = spectral_norm(self.conv_1)
+            if self.learned_shortcut:
+                self.conv_s = spectral_norm(self.conv_s)
+        # define normalization layers
+        self.norm_0 = SPADE(fin, label_nc)
+        self.norm_1 = SPADE(fmiddle, label_nc)
+        if self.learned_shortcut:
+            self.norm_s = SPADE(fin, label_nc)
+    def forward(self, x, seg1):
+        x_s = self.shortcut(x, seg1)
+        dx = self.conv_0(self.actvn(self.norm_0(x, seg1)))
+        dx = self.conv_1(self.actvn(self.norm_1(dx, seg1)))
+        out = x_s + dx
+        return out
+    def shortcut(self, x, seg1):
+        if self.learned_shortcut:
+            x_s = self.conv_s(self.norm_s(x, seg1))
+        else:
+            x_s = x
+        return x_s
+    def actvn(self, x):
+        return F.leaky_relu(x, 2e-1)
+def filter_state_dict(state_dict, remove_name='fc'):
+    new_state_dict = {}
+    for key in state_dict:
+        if remove_name in key:
+            continue
+        new_state_dict[key] = state_dict[key]
+    return new_state_dict
+class GRN(nn.Module):
+    """ GRN (Global Response Normalization) layer
+    """
+    def __init__(self, dim):
+        super().__init__()
+        self.gamma = nn.Parameter(torch.zeros(1, 1, 1, dim))
+        self.beta = nn.Parameter(torch.zeros(1, 1, 1, dim))
+    def forward(self, x):
+        Gx = torch.norm(x, p=2, dim=(1, 2), keepdim=True)
+        Nx = Gx / (Gx.mean(dim=-1, keepdim=True) + 1e-6)
+        return self.gamma * (x * Nx) + self.beta + x
+class LayerNorm(nn.Module):
+    r""" LayerNorm that supports two data formats: channels_last (default) or channels_first.
+    The ordering of the dimensions in the inputs. channels_last corresponds to inputs with
+    shape (batch_size, height, width, channels) while channels_first corresponds to inputs
+    with shape (batch_size, channels, height, width).
+    """
+    def __init__(self, normalized_shape, eps=1e-6, data_format="channels_last"):
+        super().__init__()
+        self.weight = nn.Parameter(torch.ones(normalized_shape, dtype=torch.float32))
+        self.bias = nn.Parameter(torch.zeros(normalized_shape, dtype=torch.float32))
+        self.eps =  float(eps)
+        self.data_format = data_format
+        if self.data_format not in ["channels_last", "channels_first"]:
+            raise NotImplementedError
+        self.normalized_shape = (normalized_shape, )
+    def _apply(self, fn):
+        """
+        重写 _apply，完全接管参数的转换逻辑。
+        拦截所有 .cuda(), .cpu(), .half(), .to() 操作。
+        """
+        for name, param in self._parameters.items():
+            if param is not None:
+                dummy_probe = param.data.view(-1)[:1]
+                try:
+                    target_tensor = fn(dummy_probe)
+                    target_device = target_tensor.device
+                    target_dtype = target_tensor.dtype
+                except:
+                    target_device = param.device
+                    target_dtype = param.dtype
+                if name in ['weight', 'bias']:
+                    # 核心逻辑：如果是 weight/bias，且目标是半精度，则强制保持 FP32
+                    if target_dtype in [torch.float16, torch.bfloat16]:
+                        new_data = param.data.to(device=target_device, dtype=torch.float32)
+                    else:
+                        new_data = fn(param.data)
+                else:
+                    new_data = fn(param.data)
+                param.data = new_data
+                if param.grad is not None:
+                    param.grad.data = param.grad.data.to(device=new_data.device, dtype=new_data.dtype)
+        for name, buf in self._buffers.items():
+            if buf is not None:
+                self._buffers[name] = fn(buf)
+        return self
+    def forward(self, x):
+        dtype = x.dtype
+        x = x.float()
+        if self.data_format == "channels_last":
+            x =  F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
+        elif self.data_format == "channels_first":
+            x = x.permute(0, 2, 3, 1)  # BCHW → BHWC
+            x = F.layer_norm(x, self.normalized_shape, self.weight, self.bias, self.eps)
+            x = x.permute(0, 3, 1, 2)  # BHWC → BCHW
+        return x.to(dtype)
+def _no_grad_trunc_normal_(tensor, mean, std, a, b):
+    # Cut & paste from PyTorch official master until it's in a few official releases - RW
+    # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
+    def norm_cdf(x):
+        # Computes standard normal cumulative distribution function
+        return (1. + math.erf(x / math.sqrt(2.))) / 2.
+    if (mean < a - 2 * std) or (mean > b + 2 * std):
+        warnings.warn("mean is more than 2 std from [a, b] in nn.init.trunc_normal_. "
+                      "The distribution of values may be incorrect.",
+                      stacklevel=2)
+    with torch.no_grad():
+        # Values are generated by using a truncated uniform distribution and
+        # then using the inverse CDF for the normal distribution.
+        # Get upper and lower cdf values
+        l = norm_cdf((a - mean) / std)
+        u = norm_cdf((b - mean) / std)
+        # Uniformly fill tensor with values from [l, u], then translate to
+        # [2l-1, 2u-1].
+        tensor.uniform_(2 * l - 1, 2 * u - 1)
+        # Use inverse cdf transform for normal distribution to get truncated
+        # standard normal
+        tensor.erfinv_()
+        # Transform to proper mean, std
+        tensor.mul_(std * math.sqrt(2.))
+        tensor.add_(mean)
+        # Clamp to ensure it's in the proper range
+        tensor.clamp_(min=a, max=b)
+        return tensor
+def drop_path(x, drop_prob=0., training=False, scale_by_keep=True):
+    """ Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
+    This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
+    the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
+    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
+    changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use
+    'survival rate' as the argument.
+    """
+    if drop_prob == 0. or not training:
+        return x
+    keep_prob = 1 - drop_prob
+    shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets
+    random_tensor = x.new_empty(shape).bernoulli_(keep_prob)
+    if keep_prob > 0.0 and scale_by_keep:
+        random_tensor.div_(keep_prob)
+    return x * random_tensor
+class DropPath(nn.Module):
+    """ Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
+    """
+    def __init__(self, drop_prob=None, scale_by_keep=True):
+        super(DropPath, self).__init__()
+        self.drop_prob = drop_prob
+        self.scale_by_keep = scale_by_keep
+    def forward(self, x):
+        return drop_path(x, self.drop_prob, self.training, self.scale_by_keep)
+def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.):
+    return _no_grad_trunc_normal_(tensor, mean, std, a, b)
+# From PyTorch internals
+def _ntuple(n):
+    def parse(x):
+        if isinstance(x, collections.abc.Iterable) and not isinstance(x, str):
+            return tuple(x)
+        return tuple(repeat(x, n))
+    return parse
+to_2tuple = _ntuple(2)

src/modeling/__pycache__/engine_model.cpython-310.pyc ADDED Viewed

Binary file (9.13 kB). View file

src/modeling/__pycache__/framed_models.cpython-310.pyc ADDED Viewed

Binary file (5.97 kB). View file

src/modeling/__pycache__/onnx_export.cpython-310.pyc ADDED Viewed

Binary file (2.52 kB). View file

src/modeling/engine_model.py ADDED Viewed

	@@ -0,0 +1,308 @@

+import tensorrt as trt
+import pycuda.driver as cuda
+import pycuda.autoinit
+import numpy as np
+import torch
+import traceback
+import os
+from PIL import Image
+TRT_LOGGER = trt.Logger()
+SKIP_ENGINE_MODEL_CHECK = True
+def get_engine(engine_file_path):
+    if os.path.exists(engine_file_path):
+        print(f"Loading engine from file {engine_file_path}...")
+        with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
+            return runtime.deserialize_cuda_engine(f.read())
+    else:
+        print(f"No file named {engine_file_path}! Please check the input.")
+        return None
+def numpy_to_torch_dtype(np_dtype):
+    mapping = {
+        np.float32: torch.float,
+        np.float64: torch.double,
+        np.float16: torch.half,
+        np.int32: torch.int32,
+        np.int64: torch.int64,
+        np.int16: torch.int16,
+        np.int8: torch.int8,
+        np.uint8: torch.uint8,
+        np.bool_: torch.bool
+    }
+    return mapping.get(np_dtype, None)
+def match_shape(a, b):
+    if(len(a) == len(b)):
+        return tuple(a) == tuple(b)
+    elif len(a) > len(b):
+        if(a[0] == 1):
+            return match_shape(a[1:], b)
+    else:
+        if(b[0] == 1):
+            return match_shape(a, b[1:])
+    return False
+def match_dtype(a, b):
+    if(a.__class__ == torch.dtype):
+        a = torch.tensor(0,dtype=a).numpy().dtype
+    return a == b
+class EngineModel:
+    def __init__(self, engine_file_path, stream = None, device_int = 0, extra_lock = None):
+        self.device_int = device_int
+        self.extra_lock = extra_lock
+        if not(self.extra_lock is None):
+            self.extra_lock.acquire()
+        assert os.path.exists(engine_file_path), "Engine model path not exists!"
+        self.ctx = cuda.Device(self.device_int).make_context()
+        try:
+            self.engine = get_engine(engine_file_path) # 载入TensorRT引擎
+            input_nvars = 0
+            output_nvars = 0
+            self.input_names = []
+            self.output_names = []
+            # 【辅助函数】用于获取安全的 Shape (消除 -1)
+            def get_safe_shape(engine, name):
+                shape = engine.get_tensor_shape(name)
+                # 如果形状里包含 -1 (动态维度)
+                if -1 in shape:
+                    # 获取 Profile 0 的 (min, opt, max)
+                    # 取下标 [2] 即 Max Shape，确保分配足够大的显存
+                    profile = engine.get_tensor_profile_shape(name, 0)
+                    if profile:
+                        print(f"[EngineModel] Detected dynamic shape for {name}: {shape} -> Using Max Profile: {profile[2]}")
+                        return profile[2]
+                    else:
+                        # 如果获取不到 Profile (通常发生在 Output)，这是一个风险点
+                        # 这里为了防止报错，可以尝试打印警告
+                        print(f"[EngineModel] Warning: Dynamic output {name} has no profile. Mem alloc might fail.")
+                return shape
+            for binding in self.engine: # 遍历所有tensor，区分Input/Output
+                mode = self.engine.get_tensor_mode(binding)
+                if(mode== trt.TensorIOMode.INPUT):
+                    input_nvars += 1
+                    self.input_names.append(binding)
+                elif(mode == trt.TensorIOMode.OUTPUT):
+                    output_nvars += 1
+                    self.output_names.append(binding)
+            self.input_nvars = input_nvars # input的数量
+            self.output_nvars = output_nvars # output的数量
+            self.input_shapes = {name : get_safe_shape(self.engine, name) for name in self.input_names} # 获取每个 I/O 的 shape 和 dtype
+            self.input_dtypes = {name : self.engine.get_tensor_dtype(name) for name in self.input_names}
+            self.input_nbytes = {
+                name : trt.volume(self.input_shapes[name]) * trt.nptype(self.input_dtypes[name])().itemsize
+                for name in self.input_names
+            } # nbytes = tensor 占多少 CUDA 内存（字节数）
+            self.output_shapes = {name : get_safe_shape(self.engine, name) for name in self.output_names}
+            self.output_dtypes = {name : self.engine.get_tensor_dtype(name) for name in self.output_names}
+            self.output_nbytes = {
+                name : trt.volume(self.output_shapes[name]) * trt.nptype(self.output_dtypes[name])().itemsize
+                for name in self.output_names
+            }
+            self.dinputs = {name : cuda.mem_alloc(self.input_nbytes[name]) for name in self.input_names} # 为每个输入/输出分配 CUDA 设备内存
+            self.doutputs = {name :cuda.mem_alloc(self.output_nbytes[name]) for name in self.output_names}
+            self.context = self.engine.create_execution_context() # 创建 ExecutionContext（执行上下文）
+            if stream is None:
+                self.stream = cuda.Stream()
+            else:
+                self.stream = stream
+            for name in self.input_names: # 绑定 tensor 到 context
+                self.context.set_tensor_address(name, int(self.dinputs[name]))
+            for name in self.output_names:
+                self.context.set_tensor_address(name, int(self.doutputs[name]))
+            self.houtputs = {
+                name :
+                cuda.pagelocked_empty(
+                    trt.volume(self.output_shapes[name]), dtype=trt.nptype(self.output_dtypes[name])
+                ) for name in self.output_names
+            } # 分配 page-locked host 内存以存储输出
+        except:
+            self.ctx.pop()
+            raise Exception("CUDA Initialization Failed!")
+        self.ctx.pop()
+        if not(self.extra_lock is None):
+            self.extra_lock.release()
+    def __call__(self, skip_check=SKIP_ENGINE_MODEL_CHECK, output_list=[], return_tensor=False, **inputs):
+        if not skip_check:
+            for name in inputs:
+                assert name in self.input_names
+                assert match_shape(inputs[name].shape, self.input_shapes[name])
+                assert match_dtype(inputs[name].dtype, trt.nptype(self.input_dtypes[name]))
+        if not(self.extra_lock is None):
+            self.extra_lock.acquire()
+        self.ctx.push()
+        r = {}
+        try:
+            for name in inputs:
+                hinput = inputs[name]
+                if (isinstance(hinput,torch.Tensor) and hinput.device.type=="cuda" and hinput.device.index==self.device_int):
+                    hinput_con = hinput.contiguous()
+                    ptr = hinput_con.data_ptr()
+                    cuda.memcpy_dtod_async(self.dinputs[name], ptr, self.input_nbytes[name], self.stream)
+                else:
+                    hinput_con = np.ascontiguousarray(hinput)
+                    cuda.memcpy_htod_async(self.dinputs[name], hinput_con, self.stream)
+            for name in self.input_names:
+                if name not in inputs:
+                    self.context.set_input_shape(name, self.input_shapes[name])
+            self.context.execute_async_v3(self.stream.handle)
+            if(return_tensor):
+                for name in output_list:
+                    t = torch.zeros(trt.volume(self.output_shapes[name]), device=f"cuda:{self.device_int}", dtype=numpy_to_torch_dtype(trt.nptype(self.output_dtypes[name])))
+                    ptr = t.data_ptr()
+                    cuda.memcpy_dtod_async(ptr, self.doutputs[name], self.output_nbytes[name], self.stream)
+                    t = t.reshape(tuple(self.output_shapes[name]))
+                    r[name] = t
+            else:
+                for name in output_list:
+                    cuda.memcpy_dtoh_async(self.houtputs[name], self.doutputs[name], self.stream)
+                    r[name] = self.houtputs[name]
+            self.stream.synchronize()
+        except Exception as e:
+            print("TensorRT Execution Failed!")
+            traceback.print_exc()
+            self.ctx.pop()
+            if not(self.extra_lock is None):
+                self.extra_lock.release()
+            return None
+        self.ctx.pop()
+        if not(self.extra_lock is None):
+            self.extra_lock.release()
+        return r
+    def prefill(self, skip_check=SKIP_ENGINE_MODEL_CHECK, **inputs):
+        if not (skip_check):
+            for name in inputs:
+                in_input = (name in self.input_names)
+                assert in_input or (name in self.output_names)
+                assert match_shape(inputs[name].shape, self.input_shapes[name] if in_input else self.output_shapes[name])
+                assert match_dtype(inputs[name].dtype, trt.nptype(self.input_dtypes[name] if in_input else self.output_dtypes[name]))
+        if not(self.extra_lock is None):
+            self.extra_lock.acquire()
+        self.ctx.push()
+        try:
+            for name in inputs:
+                in_input = (name in self.input_names)
+                hinput = inputs[name]
+                dst_ptr = self.dinputs[name] if in_input else self.doutputs[name]
+                real_nbytes = 0
+                if isinstance(hinput, torch.Tensor):
+                    real_nbytes = hinput.numel() * hinput.element_size()
+                else:
+                    # 假设是 numpy
+                    real_nbytes = hinput.nbytes
+                if (isinstance(hinput,torch.Tensor) and hinput.device.type=="cuda" and hinput.device.index==self.device_int):
+                    hinput_con = hinput.contiguous()
+                    ptr = hinput_con.data_ptr()
+                    cuda.memcpy_dtod_async(dst_ptr, ptr, real_nbytes, self.stream)
+                else:
+                    hinput_con = np.ascontiguousarray(hinput)
+                    cuda.memcpy_htod_async(dst_ptr, hinput, self.stream)
+            self.stream.synchronize()
+        except Exception as e:
+            traceback.print_exc()
+            self.ctx.pop()
+            if not(self.extra_lock is None):
+                self.extra_lock.release()
+            return False
+        self.ctx.pop()
+        if not(self.extra_lock is None):
+            self.extra_lock.release()
+        return True
+    def __repr__(self):
+        r = "TensorRTEngineModel(\n\tInput=[\n"
+        for name in self.input_names:
+            r += f"\t\t{name}: \t{trt.nptype(self.input_dtypes[name]).__name__}{self.input_shapes[name]},\n"
+        r += "\t],Output=[\n"
+        for name in self.output_names:
+            r += f"\t\t{name}: \t{trt.nptype(self.output_dtypes[name]).__name__}{self.output_shapes[name]},\n"
+        r+="\t]\n)"
+        return r
+    def link(self, other, var_map, skip_check=SKIP_ENGINE_MODEL_CHECK):
+        assert self.device_int == other.device_int
+        if not (skip_check):
+            for source in var_map:
+                assert source in other.output_names
+                target = var_map[source]
+                assert target in self.input_names
+                assert match_shape(other.output_shapes[source], self.input_shapes[target])
+                assert match_dtype(other.output_dtypes[source], self.input_dtypes[target])
+        if not(self.extra_lock is None):
+            self.extra_lock.acquire()
+        self.ctx.push()
+        try:
+            for source in var_map:
+                target = var_map[source]
+                self.context.set_tensor_address(target, int(other.doutputs[source]))
+        except Exception as e:
+            traceback.print_exc()
+            self.ctx.pop()
+            if not(self.extra_lock is None):
+                self.extra_lock.release()
+            return False
+        self.ctx.pop()
+        if not(self.extra_lock is None):
+            self.extra_lock.release()
+        return True
+    def bind(self, var_map, skip_check=SKIP_ENGINE_MODEL_CHECK):
+        if not (skip_check):
+            for source in var_map:
+                assert source in self.output_names
+                target = var_map[source]
+                assert target in self.input_names
+                assert match_shape(self.output_shapes[source], self.input_shapes[target])
+                assert match_dtype(self.output_dtypes[source], self.input_dtypes[target])
+        if not(self.extra_lock is None):
+            self.extra_lock.acquire()
+        self.ctx.push()
+        try:
+            for source in var_map:
+                target = var_map[source]
+                self.context.set_tensor_address(target, int(self.doutputs[source]))
+        except Exception as e:
+            traceback.print_exc()
+            self.ctx.pop()
+            if not(self.extra_lock is None):
+                self.extra_lock.release()
+            return False
+        self.ctx.pop()
+        if not(self.extra_lock is None):
+            self.extra_lock.release()
+        return True
+    def unlink(self):
+        if not(self.extra_lock is None):
+            self.extra_lock.acquire()
+        self.ctx.push()
+        try:
+            for name in self.input_names:
+                self.context.set_tensor_address(name, int(self.dinputs[name]))
+        except:
+            self.ctx.pop()
+            if not(self.extra_lock is None):
+                self.extra_lock.release()
+            return False
+        self.ctx.pop()
+        if not(self.extra_lock is None):
+            self.extra_lock.release()
+        return True

src/modeling/framed_models.py ADDED Viewed

	@@ -0,0 +1,177 @@

+import torch
+from torch import nn
+from einops import rearrange
+from polygraphy.backend.trt import Profile
+class unet_work(nn.Module): # Ugly Power Strip
+    def __init__(self, pose_guider, motion_encoder, unet, vae, scheduler, timestep):
+        super().__init__()
+        self.pose_guider = pose_guider
+        self.motion_encoder = motion_encoder
+        self.unet = unet
+        self.vae = vae
+        self.scheduler = scheduler
+        self.timesteps = timestep
+    def decode_slice(self, vae, x):
+        x = x / 0.18215
+        x = vae.decode(x).sample
+        x = rearrange(x, "b c h w -> b h w c")
+        x = (x / 2 + 0.5).clamp(0, 1)
+        return x
+    def forward(self, sample, encoder_hidden_states, motion_hidden_states, motion, pose_cond_fea, pose, new_noise,
+        d00, d01, d10, d11, d20, d21, m, u10, u11, u12, u20, u21, u22, u30, u31, u32
+        ):
+        new_pose_cond_fea = self.pose_guider(pose)
+        pose_cond_fea = torch.cat([pose_cond_fea, new_pose_cond_fea], dim=2)
+        new_motion_hidden_states = self.motion_encoder(motion)
+        motion_hidden_states = torch.cat([motion_hidden_states, new_motion_hidden_states], dim=1)
+        encoder_hidden_states = [encoder_hidden_states, motion_hidden_states]
+        score = self.unet(sample, self.timesteps, encoder_hidden_states, pose_cond_fea, d00, d01, d10, d11, d20, d21, m, u10, u11, u12, u20, u21, u22, u30, u31, u32)
+        score = rearrange(score, 'b c f h w -> (b f) c h w')
+        sample = rearrange(sample, 'b c f h w -> (b f) c h w')
+        latents_model_input, pred_original_sample = self.scheduler.step(
+            score, self.timesteps, sample, return_dict=False
+        )
+        latents_model_input = latents_model_input.to(sample.dtype)
+        pred_original_sample = pred_original_sample.to(sample.dtype)
+        latents_model_input = rearrange(latents_model_input, '(b f) c h w -> b c f h w', f=16)
+        pred_video = self.decode_slice(self.vae, pred_original_sample[:4])
+        latents = torch.cat([latents_model_input[:, :, 4:, :, :], new_noise], dim=2)
+        pose_cond_fea_out = pose_cond_fea[:, :, 4:, :, :]
+        motion_hidden_states_out = motion_hidden_states[:, 4:, :, :]
+        motion_out = motion_hidden_states[:, :1, :, :]
+        return pred_video, latents, pose_cond_fea_out, motion_hidden_states_out, motion_out, pred_original_sample[:1]
+    def get_sample_input(self, batchsize, height, width, dtype, device):
+        tw, ts, tb = 4, 4, 16 # temporal window size| temporal adaptive steps | temporal batch size
+        ml, mc, mh, mw= 32, 16, 224, 224 # motion latent size | motion channels
+        b, h, w = batchsize, height, width
+        lh, lw = height // 8, width // 8 # latent height | width
+        cd0, cd1, cd2, cm, cu1, cu2, cu3 = 320, 640, 1280, 1280, 1280, 640, 320 # unet channels
+        emb = 768 # CLIP Embedding Dims | TAESDV Channels
+        lc, ic = 4, 3 # latent | image channels
+        profile = {
+            "sample" : [b, lc, tb, lh, lw],
+            "encoder_hidden_states" : [b, 1, emb],
+            "motion_hidden_states" : [b, tw * (ts - 1), ml, mc],
+            "motion": [b, ic, tw, mh, mw],
+            "pose_cond_fea" : [b, cd0, tw * (ts - 1), lh, lw],
+            "pose" : [b, ic, tw, h, w],
+            "new_noise" : [b, lc, tw, lh, lw],
+            "d00" : [b, lh * lw, cd0],
+            "d01" : [b, lh * lw, cd0],
+            "d10" : [b, lh * lw // 4, cd1],
+            "d11" : [b, lh * lw // 4, cd1],
+            "d20" : [b, lh * lw // 16, cd2],
+            "d21" : [b, lh * lw // 16, cd2],
+            "m" : [b, lh * lw // 64, cm],
+            "u10" : [b, lh * lw // 16, cu1],
+            "u11" : [b, lh * lw // 16, cu1],
+            "u12" : [b, lh * lw // 16, cu1],
+            "u20" : [b, lh * lw // 4, cu2],
+            "u21" : [b, lh * lw // 4, cu2],
+            "u22" : [b, lh * lw // 4, cu2],
+            "u30" : [b, lh * lw, cu3],
+            "u31" : [b, lh * lw, cu3],
+            "u32" : [b, lh * lw, cu3],
+        }
+        return {k: torch.randn(profile[k], dtype=dtype, device=device) for k in profile}
+    def get_input_names(self):
+        return ["sample", "encoder_hidden_states", "motion_hidden_states",
+                "motion", "pose_cond_fea", "pose", "new_noise",
+                "d00", "d01", "d10", "d11", "d20", "d21", "m", "u10", "u11", "u12",
+                "u20", "u21", "u22", "u30", "u31", "u32"]
+    def get_output_names(self):
+        return ["pred_video", "latents", "pose_cond_fea_out",
+                "motion_hidden_states_out", "motion_out", "latent_first"]
+    def get_dynamic_axes(self):
+        dynamic_axes = {
+            "sample": {3:"h_64", 4:"w_64"},
+            "pose_cond_fea": {3:"h_64", 4:"w_64"},
+            "pose": {3:"h_512", 4:"h_512"},
+            "new_noise": {3: "h_64", 4: "w_64"},
+            "d00" : {1: "len_4096"},
+            "d01" : {1: "len_4096"},
+            "u30" : {1: "len_4096"},
+            "u31" : {1: "len_4096"},
+            "u32" : {1: "len_4096"},
+            "d10" : {1: "len_1024"},
+            "d11" : {1: "len_1024"},
+            "u20" : {1: "len_1024"},
+            "u21" : {1: "len_1024"},
+            "u22" : {1: "len_1024"},
+            "d20" : {1: "len_256"},
+            "d21" : {1: "len_256"},
+            "u10" : {1: "len_256"},
+            "u11" : {1: "len_256"},
+            "u12" : {1: "len_256"},
+            "m"   : {1: "len_64"},
+        }
+        return dynamic_axes
+    def get_dynamic_map(self, batchsize, height, width):
+        tw, ts, tb = 4, 4, 16 # temporal window size| temporal adaptive steps | temporal batch size
+        ml, mc, mh, mw= 32, 16, 224, 224 # motion latent size | motion channels
+        b, h, w = batchsize, height, width
+        lh, lw = height // 8, width // 8 # latent height | width
+        cd0, cd1, cd2, cm, cu1, cu2, cu3 = 320, 640, 1280, 1280, 1280, 640, 320 # unet channels
+        emb = 768 # CLIP Embedding Dims | TAESDV Channels
+        lc, ic = 4, 3 # latent | image channels
+        fixed_inputs_map = {
+            "sample":                (b, lc, tb, lh, lw),
+            "encoder_hidden_states": (b, 1, emb),
+            "motion_hidden_states":  (b, tw * (ts - 1), ml, mc),
+            "motion":                (b, ic, tw, mh, mw),
+            "pose_cond_fea":         (b, cd0, tw * (ts - 1), lh, lw),
+            "pose":                  (b, ic, tw, h, w),
+            "new_noise":             (b, lc, tw, lh, lw),
+        }
+        dynamic_inputs_map = {
+            "d00": (b, lh * lw, cd0),
+            "d01": (b, lh * lw, cd0),
+            "d10": (b, lh * lw // 4, cd1),
+            "d11": (b, lh * lw // 4, cd1),
+            "d20": (b, lh * lw // 16, cd2),
+            "d21": (b, lh * lw // 16, cd2),
+            "m":   (b, lh * lw // 64, cm),
+            "u10": (b, lh * lw // 16, cu1),
+            "u11": (b, lh * lw // 16, cu1),
+            "u12": (b, lh * lw // 16, cu1),
+            "u20": (b, lh * lw // 4, cu2),
+            "u21": (b, lh * lw // 4, cu2),
+            "u22": (b, lh * lw // 4, cu2),
+            "u30": (b, lh * lw, cu3),
+            "u31": (b, lh * lw, cu3),
+            "u32": (b, lh * lw, cu3),
+        }
+        profile = Profile()
+        for name, shape in fixed_inputs_map.items():
+            shape_tuple = tuple(shape)
+            profile.add(name, min=shape_tuple, opt=shape_tuple, max=shape_tuple)
+        for name, base_shape in dynamic_inputs_map.items():
+            dim0, dim1_base, dim2 = base_shape
+            val_1x = dim1_base * 1
+            val_2x = dim1_base * 2
+            val_4x = dim1_base * 4
+            min_shape = (dim0, val_1x, dim2)
+            opt_shape = (dim0, val_2x, dim2)
+            max_shape = (dim0, val_4x, dim2)
+            profile.add(name, min=min_shape, opt=opt_shape, max=max_shape)
+            print(f"Dynamic: {name:<5} | Base(1x): {dim1_base:<5} | Range: {val_1x} ~ {val_4x} | Opt: {val_2x}")
+        return profile

src/modeling/onnx_export.py ADDED Viewed

	@@ -0,0 +1,102 @@

+# adapted from https://github.com/NVIDIA/TensorRT/blob/main/demo/Diffusion/utilities.py
+#
+# Copyright 2022 The HuggingFace Inc. team.
+# SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import onnx
+import gc
+import onnx_graphsurgeon as gs
+import torch
+from onnx import shape_inference
+from polygraphy.backend.onnx.loader import fold_constants
+import os
+from onnxsim import simplify
+@torch.no_grad()
+def export_onnx(
+    model,
+    onnx_path: str,
+    opt_image_height: int,
+    opt_image_width: int,
+    opt_batch_size: int,
+    onnx_opset: int,
+    dtype,
+    device,
+    auto_cast: bool = True,
+):
+    from contextlib import contextmanager
+    @contextmanager
+    def auto_cast_manager(enabled):
+        if enabled:
+            with torch.inference_mode(), torch.autocast("cuda"):
+                yield
+        else:
+            yield
+    # 确保父目录存在
+    os.makedirs(os.path.dirname(onnx_path), exist_ok=True)
+    with auto_cast_manager(auto_cast):
+        inputs = model.get_sample_input(opt_batch_size, opt_image_height, opt_image_width, dtype, device)
+        print(model.get_output_names())
+        print(f"开始导出 ONNX 模型到: {onnx_path} ...")
+        torch.onnx.utils.export(
+            model,
+            inputs,
+            onnx_path,
+            export_params=True,
+            opset_version=onnx_opset,
+            do_constant_folding=True,
+            input_names=model.get_input_names(),
+            output_names=model.get_output_names(),
+            dynamic_axes=model.get_dynamic_axes(),
+        )
+    del model
+    gc.collect()
+    torch.cuda.empty_cache()
+def optimize_onnx(onnx_path, onnx_opt_path):
+    model = onnx.load(onnx_path)
+    name = os.path.splitext(os.path.basename(onnx_opt_path))[0]
+    model_opt = model
+    print(f"Saving to {onnx_opt_path}...")
+    onnx.save(
+        model_opt,
+        onnx_opt_path,
+        save_as_external_data=True,
+        all_tensors_to_one_file=True,
+        location=f"{name}.onnx.data",
+        size_threshold=1024
+    )
+    print("Optimization done.")
+def handle_onnx_batch_norm(onnx_path: str):
+    onnx_model = onnx.load(onnx_path)
+    for node in onnx_model.graph.node:
+        if node.op_type == "BatchNormalization":
+            for attribute in node.attribute:
+                if attribute.name == "training_mode":
+                    if attribute.i == 1:
+                        node.output.remove(node.output[1])
+                        node.output.remove(node.output[1])
+                    attribute.i = 0
+    onnx.save_model(onnx_model, onnx_path)

src/models/__pycache__/attention.cpython-310.pyc ADDED Viewed

Binary file (9.71 kB). View file

src/models/__pycache__/attention.cpython-39.pyc ADDED Viewed

Binary file (9.57 kB). View file

src/models/__pycache__/motion_module.cpython-310.pyc ADDED Viewed

Binary file (10.6 kB). View file

src/models/__pycache__/motion_module.cpython-39.pyc ADDED Viewed

Binary file (10.4 kB). View file