Spaces:

KempnerInstituteAI
/

LVP

Running on Zero

App Files Files Community

LVP / datasets /ego4d.py

kiwhansong

add demo

142a1ac 18 days ago

raw

history blame contribute delete

6.18 kB

	import pandas as pd
	from pathlib import Path
	import ijson
	from typing import Dict, Any
	from .video_base import VideoDataset


	class Ego4DVideoDataset(VideoDataset):

	def download(self):
	from ego4d.cli.cli import main_cfg as download_ego4d
	from ego4d.cli.config import Config as Ego4DConfig

	raw_dir = self.data_root / "raw"
	raw_dir.mkdir(parents=True, exist_ok=True)

	aws_credentials_path = Path.home() / ".aws" / "credentials"
	if not aws_credentials_path.exists():
	raise FileNotFoundError(
	f"AWS credentials file not found at {aws_credentials_path}"
	"For Ego4D auto download, you need to request access and use the "
	"emailed key to set up AWS credentials first."
	"See https://ego4d-data.org/ for more information."
	)

	cfg = Ego4DConfig(
	output_directory=str(raw_dir),
	datasets=["annotations", "clips"],
	benchmarks=["FHO"],
	metadata=True,
	assume_yes=True,
	)

	import botocore

	try:
	download_ego4d(cfg)
	except botocore.exceptions.ClientError as e:
	print(e)
	raise RuntimeError(
	"Failed to download Ego4D dataset due to the above error."
	"If you see an error occurred (403) when calling the HeadObject operation: Forbidden",
	"It's likely due to an expired Ego4D AWS credential. Renew the dataset's online form and update the AWS credentials.",
	)

	annotation_file = "v2/annotations/fho_main.json"
	print("Creating metadata CSV...")
	records = []
	with open(raw_dir / annotation_file, "rb") as file:
	# Create a parser for the videos array
	videos = ijson.items(file, "videos.item")
	total = 0

	for v in videos:
	fps = round(v["video_metadata"]["fps"])
	n_frames = v["video_metadata"]["num_frames"]
	width = v["video_metadata"]["width"]
	height = v["video_metadata"]["height"]
	for c in v["annotated_intervals"]:
	video_path = "raw/v2/clips/" + c["clip_uid"] + ".mp4"

	if not Path(self.data_root / video_path).exists():
	continue

	for a in c["narrated_actions"]:
	total += 1
	critical_frames = a["clip_critical_frames"]
	is_valid_action = a["is_valid_action"]
	is_rejected = a["is_rejected"]
	is_invalid_annotation = a["is_invalid_annotation"]
	is_partial = a["is_partial"]
	if (
	not critical_frames
	or not is_valid_action
	or is_rejected
	or is_invalid_annotation
	or is_partial
	):
	continue
	caption = a["narration_text"]
	caption = (
	caption.replace("#cC c ", " ")
	.replace("#Cc C ", " ")
	.replace("#C C ", "")
	.replace("#c c ", " ")
	.replace("#c- c ", " ")
	.replace("#c C ", " ")
	.replace("#c c", " ")
	.replace("#CC ", " ")
	.replace("#C C ", " ")
	.replace("#C c ", " ")
	.replace("#cc ", " ")
	.replace("#C- C ", " ")
	.replace("#c C ", " ")
	.replace("#C ", " ")
	.replace("#c ", " ")
	.replace("#", " ")
	)
	pre_frame = critical_frames["pre_frame"]
	post_frame = critical_frames["post_frame"]
	pnr_frame = critical_frames["pnr_frame"]
	contact_frame = critical_frames["contact_frame"]

	# some manual heuristics to trim the video
	target_len = self._n_frames_in_src(fps)
	trim_start = pre_frame
	psudo_min_end = int((post_frame - pnr_frame) * 0.1) + pnr_frame
	if psudo_min_end - pre_frame >= target_len:
	trim_end = psudo_min_end
	elif post_frame - pnr_frame < target_len:
	trim_end = post_frame
	trim_start = max(trim_end - target_len, pre_frame - 15)
	else:
	trim_end = target_len + pre_frame

	trim_start = max(0, trim_start)
	trim_end = min(n_frames, trim_end)

	records.append(
	{
	"video_path": video_path,
	"height": height,
	"width": width,
	"n_frames": n_frames,
	"fps": fps,
	"original_caption": caption,
	"trim_start": trim_start,
	"trim_end": trim_end,
	"pre_frame": pre_frame,
	"pnr_frame": pnr_frame,
	"post_frame": post_frame,
	"contact_frame": contact_frame,
	}
	)
	metadata_path = self.data_root / self.metadata_path
	metadata_path.parent.mkdir(parents=True, exist_ok=True)
	df = pd.DataFrame.from_records(records)
	df.to_csv(metadata_path, index=False)
	print(f"Created metadata CSV with {len(records)} records")