LVP / datasets /dummy.py
kiwhansong's picture
add demo
142a1ac
import torch
from torch.utils.data import Dataset
from omegaconf import DictConfig
from pathlib import Path
class DummyVideoDataset(Dataset):
def __init__(self, cfg: DictConfig, split: str = "training") -> None:
super().__init__()
self.cfg = cfg
self.split = split
self.height = cfg.height
self.width = cfg.width
self.n_frames = cfg.n_frames
self.load_video_latent = cfg.load_video_latent
self.load_prompt_embed = cfg.load_prompt_embed
self.image_to_video = cfg.image_to_video
self.max_text_tokens = cfg.max_text_tokens
@property
def metadata_path(self):
raise ValueError("Dummy dataset does not have a metadata path")
@property
def data_root(self):
raise ValueError("Dummy dataset does not have a data root path")
def __len__(self) -> int:
return 10000000 # Return fixed size of 10000000
def __getitem__(self, idx: int) -> dict:
# Generate dummy video tensor [T, C, H, W]
videos = torch.randn(self.n_frames, 3, self.height, self.width)
# Generate dummy image if needed
images = videos[:1].clone() if self.image_to_video else None
output = {
"prompts": f"A dummy video caption for debugging purpose",
"videos": videos,
"video_metadata": {
"num_frames": self.n_frames,
"height": self.height,
"width": self.width,
"has_caption": True,
},
"has_bbox": torch.tensor([False, False]),
"bbox_render": torch.zeros(2, self.height, self.width),
}
if images is not None:
output["images"] = images
if self.load_prompt_embed:
# Generate dummy prompt embeddings [self.max_text_tokens, 4096]
output["prompt_embeds"] = torch.randn(self.max_text_tokens, 4096)
output["prompt_embed_len"] = self.max_text_tokens
if self.load_video_latent:
# Generate dummy latents
if self.image_to_video:
output["image_latents"] = torch.randn(
4,
self.n_frames // 4,
self.height // 8,
self.width // 8,
)
output["video_latents"] = torch.randn(
4,
self.n_frames // 4,
self.height // 8,
self.width // 8,
)
return output