import torch from omegaconf import OmegaConf from safetensors.torch import load_model from diffusers.models import AutoencoderKL from pipeline.utils import RecEvalDataset from pipeline.rec_pipeline import Rec_Pipeline from model.model_AMD import AMDModel from typing import Optional from torch.utils.data import DataLoader from omegaconf import OmegaConf import os import argparse class rec_inferencer: def __init__( self, config, device, dtype ): self.config = config self.device = device self.dtype = dtype self.setup() def setup(self): vae_model = AutoencoderKL.from_pretrained(self.config.vae_path, subfolder="vae").to(self.device, self.dtype).requires_grad_(False) amd_model = AMDModel.from_config(AMDModel.load_config(self.config.amd_config_path)).to(self.device, self.dtype).requires_grad_(False) load_model(amd_model, self.config.amd_ckpt_path) self.pipeline = Rec_Pipeline( amd_model, vae_model, amd_sample_steps=self.config.amd_sample_steps, output_dir=self.config.output_dir, ) def infer(self, video_path:str, refimg_path:Optional[str]=None, output_path:Optional[str] = None): video = self.pipeline.run(video_path, refimg_path, output_path, config = self.config) return video def eval(self, video_dir:str, num_frames:int = 96): evalset = RecEvalDataset( video_dir, num_frames, ) evalloader = DataLoader( evalset, 12, shuffle=False,drop_last=False,collate_fn=evalset.collate,num_workers=8 ) self.pipeline.eval(evalloader, config = self.config) if __name__ == "__main__": # TODO add argparse here parser = argparse.ArgumentParser() parser.add_argument("--config_path", type=str, default="/mnt/pfs-gv8sxa/tts/dhg/zqy/code/AMD2/config/inference/rec_spatial.yaml") parser.add_argument("--video_dir", type=str, default="/mnt/pfs-gv8sxa/tts/dhg/zqy/code/test/test_frame2frame_reconstruction/data/facevid/test") args = parser.parse_args() config_path = args.config_path video_dir = args.video_dir config = OmegaConf.load(config_path) inferencer = rec_inferencer(config, torch.device("cuda:0"), torch.float32) # video_path = "/mnt/pfs-gv8sxa/tts/dhg/zqy/data/FaceVid_240h/videos/2025.mp4" # video = inferencer.infer(video_path) inferencer.eval( video_dir, 96 )