|
|
import requests |
|
|
import subprocess |
|
|
import json |
|
|
import pandas as pd |
|
|
import zipfile |
|
|
import cv2 |
|
|
from pathlib import Path |
|
|
from tqdm import tqdm |
|
|
|
|
|
from .video_base import VideoDataset |
|
|
|
|
|
|
|
|
class SomethingSomethingDataset(VideoDataset): |
|
|
""" |
|
|
Something Something Dataset from https://arxiv.org/abs/1706.04261 |
|
|
""" |
|
|
|
|
|
def download(self): |
|
|
self.data_root.mkdir(parents=True, exist_ok=True) |
|
|
|
|
|
urls = [ |
|
|
"https://apigwx-aws.qualcomm.com/qsc/public/v1/api/download/software/dataset/AIDataset/Something-Something-V2/20bn-something-something-v2-00", |
|
|
"https://apigwx-aws.qualcomm.com/qsc/public/v1/api/download/software/dataset/AIDataset/Something-Something-V2/20bn-something-something-v2-01", |
|
|
"https://softwarecenter.qualcomm.com/api/download/software/dataset/AIDataset/Something-Something-V2/20bn-something-something-download-package-labels.zip", |
|
|
] |
|
|
|
|
|
for url in urls: |
|
|
filename = Path(url).name |
|
|
filepath = self.data_root / filename |
|
|
|
|
|
print(f"Downloading {filename}...") |
|
|
response = requests.get(url, stream=True) |
|
|
response.raise_for_status() |
|
|
|
|
|
with open(filepath, "wb") as f: |
|
|
for chunk in response.iter_content(chunk_size=8192): |
|
|
f.write(chunk) |
|
|
|
|
|
|
|
|
print("Concatenating and extracting tar files...") |
|
|
cmd = f"cd {self.data_root} && cat 20bn-something-something-v2-0? | tar -xvzf -" |
|
|
subprocess.run(cmd, shell=True, check=True) |
|
|
print(f"Deleting zip files for video data...") |
|
|
for zip_file in self.data_root.glob("20bn-something-something-v2-0*"): |
|
|
print(f"Deleting {zip_file.name}...") |
|
|
zip_file.unlink() |
|
|
|
|
|
|
|
|
labels_zip_path = ( |
|
|
self.data_root / "20bn-something-something-download-package-labels.zip" |
|
|
) |
|
|
if labels_zip_path.exists(): |
|
|
print(f"Extracting {labels_zip_path.name}...") |
|
|
with zipfile.ZipFile(labels_zip_path, "r") as zip_ref: |
|
|
zip_ref.extractall(self.data_root) |
|
|
print(f"Deleting zip file for labels...") |
|
|
labels_zip_path.unlink() |
|
|
|
|
|
|
|
|
print("Creating metadata CSV file for Something Something Dataset") |
|
|
|
|
|
json_files = { |
|
|
"training": "labels/train.json", |
|
|
"validation": "labels/validation.json", |
|
|
} |
|
|
|
|
|
records = [] |
|
|
for split, json_file in json_files.items(): |
|
|
with open(self.data_root / json_file, "r") as f: |
|
|
labels = json.load(f) |
|
|
|
|
|
for item in tqdm(labels, desc=f"Creating metadata for {split}"): |
|
|
webm_video_path = f"20bn-something-something-v2/{item['id']}.webm" |
|
|
mp4_video_path = f"20bn-something-something-v2/{item['id']}.mp4" |
|
|
|
|
|
total_videos = len(labels) |
|
|
successful_conversions = 0 |
|
|
|
|
|
if (self.data_root / webm_video_path).exists(): |
|
|
|
|
|
input_path = str(self.data_root / webm_video_path) |
|
|
output_path = str(self.data_root / mp4_video_path) |
|
|
cmd = f'ffmpeg -i {input_path} -vf "pad=ceil(iw/2)*2:ceil(ih/2)*2" -c:v libx264 -c:a aac {output_path}' |
|
|
try: |
|
|
subprocess.run( |
|
|
cmd, |
|
|
shell=True, |
|
|
check=True, |
|
|
stdout=subprocess.DEVNULL, |
|
|
stderr=subprocess.DEVNULL, |
|
|
) |
|
|
|
|
|
(self.data_root / webm_video_path).unlink() |
|
|
|
|
|
|
|
|
cap = cv2.VideoCapture(output_path) |
|
|
if not cap.isOpened(): |
|
|
continue |
|
|
|
|
|
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) |
|
|
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) |
|
|
fps = int(cap.get(cv2.CAP_PROP_FPS)) |
|
|
n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) |
|
|
cap.release() |
|
|
|
|
|
caption = item["label"].replace("pretending to ", "") |
|
|
|
|
|
records.append( |
|
|
{ |
|
|
"video_path": mp4_video_path, |
|
|
"caption": caption, |
|
|
"height": height, |
|
|
"width": width, |
|
|
"fps": fps, |
|
|
"n_frames": n_frames, |
|
|
"split": split, |
|
|
} |
|
|
) |
|
|
successful_conversions += 1 |
|
|
except subprocess.CalledProcessError: |
|
|
print(f"Conversion failed for {webm_video_path}") |
|
|
|
|
|
conversion_rate = (successful_conversions / total_videos) * 100 |
|
|
print(f"Conversion success rate: {conversion_rate:.2f}%") |
|
|
|
|
|
|
|
|
metadata_path = self.data_root / self.metadata_path |
|
|
metadata_path.parent.mkdir(parents=True, exist_ok=True) |
|
|
df = pd.DataFrame.from_records(records) |
|
|
df.to_csv(metadata_path, index=False) |
|
|
print(f"Created metadata CSV with {len(records)} videos") |
|
|
|