semo

File size: 2,504 Bytes

bd546bf

import torch
from omegaconf import OmegaConf
from safetensors.torch import load_model
from diffusers.models import AutoencoderKL  
from pipeline.utils import RecEvalDataset
from pipeline.rec_pipeline import Rec_Pipeline
from model.model_AMD import AMDModel
from typing import Optional
from torch.utils.data import DataLoader
from omegaconf import OmegaConf
import os
import argparse

class rec_inferencer:
    def __init__(
        self,
        config,
        device,
        dtype
    ):
        self.config = config
        self.device = device
        self.dtype = dtype
        self.setup()
    def setup(self):
        vae_model = AutoencoderKL.from_pretrained(self.config.vae_path, subfolder="vae").to(self.device, self.dtype).requires_grad_(False)
        amd_model = AMDModel.from_config(AMDModel.load_config(self.config.amd_config_path)).to(self.device, self.dtype).requires_grad_(False)
        load_model(amd_model, self.config.amd_ckpt_path)
        self.pipeline = Rec_Pipeline(
            amd_model,
            vae_model,
            amd_sample_steps=self.config.amd_sample_steps,
            output_dir=self.config.output_dir,
        )
    def infer(self, video_path:str, refimg_path:Optional[str]=None, output_path:Optional[str] = None):
        video = self.pipeline.run(video_path, refimg_path, output_path, config = self.config)
        return video

    def eval(self, video_dir:str, num_frames:int = 96):
        evalset = RecEvalDataset(
            video_dir,
            num_frames,
        )
        evalloader = DataLoader(
            evalset, 12, shuffle=False,drop_last=False,collate_fn=evalset.collate,num_workers=8
        )
        self.pipeline.eval(evalloader, config = self.config)

if __name__ == "__main__":
    # TODO add argparse here
    parser = argparse.ArgumentParser()
    parser.add_argument("--config_path", type=str, default="/mnt/pfs-gv8sxa/tts/dhg/zqy/code/AMD2/config/inference/rec_spatial.yaml")
    parser.add_argument("--video_dir", type=str, default="/mnt/pfs-gv8sxa/tts/dhg/zqy/code/test/test_frame2frame_reconstruction/data/facevid/test")
    args = parser.parse_args()
    config_path = args.config_path
    video_dir = args.video_dir
    config = OmegaConf.load(config_path)
    inferencer = rec_inferencer(config, torch.device("cuda:0"), torch.float32)
    # video_path = "/mnt/pfs-gv8sxa/tts/dhg/zqy/data/FaceVid_240h/videos/2025.mp4"
    # video = inferencer.infer(video_path)
    
    inferencer.eval(
        video_dir,
        96
    )