File size: 6,176 Bytes
142a1ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import pandas as pd
from pathlib import Path
import ijson
from typing import Dict, Any
from .video_base import VideoDataset


class Ego4DVideoDataset(VideoDataset):

    def download(self):
        from ego4d.cli.cli import main_cfg as download_ego4d
        from ego4d.cli.config import Config as Ego4DConfig

        raw_dir = self.data_root / "raw"
        raw_dir.mkdir(parents=True, exist_ok=True)

        aws_credentials_path = Path.home() / ".aws" / "credentials"
        if not aws_credentials_path.exists():
            raise FileNotFoundError(
                f"AWS credentials file not found at {aws_credentials_path}"
                "For Ego4D auto download, you need to request access and use the "
                "emailed key to set up AWS credentials first."
                "See https://ego4d-data.org/ for more information."
            )

        cfg = Ego4DConfig(
            output_directory=str(raw_dir),
            datasets=["annotations", "clips"],
            benchmarks=["FHO"],
            metadata=True,
            assume_yes=True,
        )

        import botocore

        try:
            download_ego4d(cfg)
        except botocore.exceptions.ClientError as e:
            print(e)
            raise RuntimeError(
                "Failed to download Ego4D dataset due to the above error."
                "If you see an error occurred (403) when calling the HeadObject operation: Forbidden",
                "It's likely due to an expired Ego4D AWS credential. Renew the dataset's online form and update the AWS credentials.",
            )

        annotation_file = "v2/annotations/fho_main.json"
        print("Creating metadata CSV...")
        records = []
        with open(raw_dir / annotation_file, "rb") as file:
            # Create a parser for the videos array
            videos = ijson.items(file, "videos.item")
            total = 0

            for v in videos:
                fps = round(v["video_metadata"]["fps"])
                n_frames = v["video_metadata"]["num_frames"]
                width = v["video_metadata"]["width"]
                height = v["video_metadata"]["height"]
                for c in v["annotated_intervals"]:
                    video_path = "raw/v2/clips/" + c["clip_uid"] + ".mp4"

                    if not Path(self.data_root / video_path).exists():
                        continue

                    for a in c["narrated_actions"]:
                        total += 1
                        critical_frames = a["clip_critical_frames"]
                        is_valid_action = a["is_valid_action"]
                        is_rejected = a["is_rejected"]
                        is_invalid_annotation = a["is_invalid_annotation"]
                        is_partial = a["is_partial"]
                        if (
                            not critical_frames
                            or not is_valid_action
                            or is_rejected
                            or is_invalid_annotation
                            or is_partial
                        ):
                            continue
                        caption = a["narration_text"]
                        caption = (
                            caption.replace("#cC c ", " ")
                            .replace("#Cc C ", " ")
                            .replace("#C C ", "")
                            .replace("#c  c ", " ")
                            .replace("#c- c ", " ")
                            .replace("#c C ", " ")
                            .replace("#c c", " ")
                            .replace("#CC ", " ")
                            .replace("#C  C ", " ")
                            .replace("#C c ", " ")
                            .replace("#cc ", " ")
                            .replace("#C- C ", " ")
                            .replace("#c C ", " ")
                            .replace("#C ", " ")
                            .replace("#c ", " ")
                            .replace("#", " ")
                        )
                        pre_frame = critical_frames["pre_frame"]
                        post_frame = critical_frames["post_frame"]
                        pnr_frame = critical_frames["pnr_frame"]
                        contact_frame = critical_frames["contact_frame"]

                        # some manual heuristics to trim the video
                        target_len = self._n_frames_in_src(fps)
                        trim_start = pre_frame
                        psudo_min_end = int((post_frame - pnr_frame) * 0.1) + pnr_frame
                        if psudo_min_end - pre_frame >= target_len:
                            trim_end = psudo_min_end
                        elif post_frame - pnr_frame < target_len:
                            trim_end = post_frame
                            trim_start = max(trim_end - target_len, pre_frame - 15)
                        else:
                            trim_end = target_len + pre_frame

                        trim_start = max(0, trim_start)
                        trim_end = min(n_frames, trim_end)

                        records.append(
                            {
                                "video_path": video_path,
                                "height": height,
                                "width": width,
                                "n_frames": n_frames,
                                "fps": fps,
                                "original_caption": caption,
                                "trim_start": trim_start,
                                "trim_end": trim_end,
                                "pre_frame": pre_frame,
                                "pnr_frame": pnr_frame,
                                "post_frame": post_frame,
                                "contact_frame": contact_frame,
                            }
                        )
        metadata_path = self.data_root / self.metadata_path
        metadata_path.parent.mkdir(parents=True, exist_ok=True)
        df = pd.DataFrame.from_records(records)
        df.to_csv(metadata_path, index=False)
        print(f"Created metadata CSV with {len(records)} records")