Spaces:
Runtime error
Runtime error
| from typing import Optional | |
| import os | |
| from tqdm import tqdm | |
| from videogen_hub.infermodels import load_model | |
| import cv2, json | |
| import numpy as np | |
| import argparse | |
| from videogen_hub.utils.file_helper import get_file_path | |
| from moviepy.editor import ImageSequenceClip | |
| def infer_text_guided_vg_bench( | |
| model, | |
| result_folder: str = "results", | |
| experiment_name: str = "Exp_Text-Guided_VG", | |
| overwrite_model_outputs: bool = False, | |
| overwrite_inputs: bool = False, | |
| limit_videos_amount: Optional[int] = None, | |
| ): | |
| """ | |
| Performs inference on the VideogenHub dataset using the provided text-guided video generation model. | |
| Args: | |
| model: Instance of a model that supports text-guided video generation. Expected to have | |
| a method 'infer_one_video' for inferencing. | |
| result_folder (str, optional): Path to the root directory where the results should be saved. | |
| Defaults to 'results'. | |
| experiment_name (str, optional): Name of the folder inside 'result_folder' where results | |
| for this particular experiment will be stored. Defaults to "Exp_Text-Guided_IG". | |
| overwrite_model_outputs (bool, optional): If set to True, will overwrite any pre-existing | |
| model outputs. Useful for resuming runs. Defaults to False. | |
| overwrite_inputs (bool, optional): If set to True, will overwrite any pre-existing input | |
| samples. Typically, should be set to False unless there's a need to update the inputs. | |
| Defaults to False. | |
| limit_videos_amount (int, optional): Limits the number of videos to be processed. If set to | |
| None, all videos in the dataset will be processed. | |
| Returns: | |
| None. Results are saved in the specified directory. | |
| Notes: | |
| The function processes each sample from the dataset, uses the model to infer an video based | |
| on text prompts, and then saves the resulting videos in the specified directories. | |
| """ | |
| benchmark_prompt_path = "t2v_vbench_1000.json" | |
| prompts = json.load(open(get_file_path(benchmark_prompt_path), "r")) | |
| save_path = os.path.join(result_folder, experiment_name, "dataset_lookup.json") | |
| if overwrite_inputs or not os.path.exists(save_path): | |
| if not os.path.exists(os.path.join(result_folder, experiment_name)): | |
| os.makedirs(os.path.join(result_folder, experiment_name)) | |
| with open(save_path, "w") as f: | |
| json.dump(prompts, f, indent=4) | |
| print( | |
| "========> Running Benchmark Dataset:", | |
| experiment_name, | |
| "| Model:", | |
| model.__class__.__name__, | |
| ) | |
| for file_basename, prompt in tqdm(prompts.items()): | |
| idx = int(file_basename.split("_")[0]) | |
| dest_folder = os.path.join( | |
| result_folder, experiment_name, model.__class__.__name__ | |
| ) | |
| # file_basename = f"{idx}_{prompt['prompt_en'].replace(' ', '_')}.mp4" | |
| if not os.path.exists(dest_folder): | |
| os.mkdir(dest_folder) | |
| dest_file = os.path.join(dest_folder, file_basename) | |
| if overwrite_model_outputs or not os.path.exists(dest_file): | |
| print("========> Inferencing", dest_file) | |
| frames = model.infer_one_video(prompt=prompt["prompt_en"]) | |
| #special_treated_list = ["LaVie", "ModelScope", "T2VTurbo"] | |
| special_treated_list = [] | |
| if model.__class__.__name__ in special_treated_list: | |
| print("======> Saved through cv2.VideoWriter_fourcc") | |
| # save the video | |
| fps = 8 | |
| fourcc = cv2.VideoWriter_fourcc(*"mp4v") # Codec | |
| out = cv2.VideoWriter( | |
| dest_file, fourcc, fps, (frames.shape[2], frames.shape[1]) | |
| ) | |
| # Convert each tensor frame to numpy and write it to the video | |
| for i in range(frames.shape[0]): | |
| frame = frames[i].numpy().astype(np.uint8) | |
| out.write(frame) | |
| out.release() | |
| else: | |
| def tensor_to_video(tensor, output_path, fps=8): | |
| """ | |
| Converts a PyTorch tensor to a video file. | |
| Args: | |
| tensor (torch.Tensor): The input tensor of shape (T, C, H, W). | |
| output_path (str): The path to save the output video. | |
| fps (int): Frames per second for the output video. | |
| """ | |
| # Ensure the tensor is on the CPU and convert to NumPy array | |
| tensor = tensor.cpu().numpy() | |
| # Normalize the tensor values to [0, 1] | |
| tensor_min = tensor.min() | |
| tensor_max = tensor.max() | |
| tensor = (tensor - tensor_min) / (tensor_max - tensor_min) | |
| # Permute dimensions from (T, C, H, W) to (T, H, W, C) and scale to [0, 255] | |
| video_frames = (tensor.transpose(0, 2, 3, 1) * 255).astype(np.uint8) | |
| # Create a video clip from the frames | |
| clip = ImageSequenceClip(list(video_frames), fps=fps) | |
| # Write the video file | |
| clip.write_videofile(output_path, codec='libx264') | |
| if frames.shape[-1] == 3: | |
| frames = frames.permute(0, 3, 1, 2) | |
| print("======> corrected frames.shape", frames.shape) | |
| tensor_to_video(frames, dest_file) | |
| else: | |
| print("========> Skipping", dest_file, ", it already exists") | |
| if limit_videos_amount is not None and (idx >= limit_videos_amount): | |
| break | |
| # for testing | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser(description="Load a model by name") | |
| parser.add_argument("--model_name", type=str, required=True, help="Name of the model to load") | |
| args = parser.parse_args() | |
| model = load_model(args.model_name) | |
| infer_text_guided_vg_bench(model) | |