Spaces:

KempnerInstituteAI
/

LVP

Running on Zero

LVP

File size: 5,569 Bytes

142a1ac

import requests
import subprocess
import json
import pandas as pd
import zipfile
import cv2
from pathlib import Path
from tqdm import tqdm

from .video_base import VideoDataset


class SomethingSomethingDataset(VideoDataset):
    """
    Something Something Dataset from https://arxiv.org/abs/1706.04261
    """

    def download(self):
        self.data_root.mkdir(parents=True, exist_ok=True)

        urls = [
            "https://apigwx-aws.qualcomm.com/qsc/public/v1/api/download/software/dataset/AIDataset/Something-Something-V2/20bn-something-something-v2-00",
            "https://apigwx-aws.qualcomm.com/qsc/public/v1/api/download/software/dataset/AIDataset/Something-Something-V2/20bn-something-something-v2-01",
            "https://softwarecenter.qualcomm.com/api/download/software/dataset/AIDataset/Something-Something-V2/20bn-something-something-download-package-labels.zip",
        ]

        for url in urls:
            filename = Path(url).name
            filepath = self.data_root / filename

            print(f"Downloading {filename}...")
            response = requests.get(url, stream=True)
            response.raise_for_status()

            with open(filepath, "wb") as f:
                for chunk in response.iter_content(chunk_size=8192):
                    f.write(chunk)

        # Use shell command to concatenate and extract tar video files
        print("Concatenating and extracting tar files...")
        cmd = f"cd {self.data_root} && cat 20bn-something-something-v2-0? | tar -xvzf -"
        subprocess.run(cmd, shell=True, check=True)
        print(f"Deleting zip files for video data...")
        for zip_file in self.data_root.glob("20bn-something-something-v2-0*"):
            print(f"Deleting {zip_file.name}...")
            zip_file.unlink()

        # Unzip the labels package
        labels_zip_path = (
            self.data_root / "20bn-something-something-download-package-labels.zip"
        )
        if labels_zip_path.exists():
            print(f"Extracting {labels_zip_path.name}...")
            with zipfile.ZipFile(labels_zip_path, "r") as zip_ref:
                zip_ref.extractall(self.data_root)
        print(f"Deleting zip file for labels...")
        labels_zip_path.unlink()

        # Create metadata CSV from labels
        print("Creating metadata CSV file for Something Something Dataset")

        json_files = {
            "training": "labels/train.json",
            "validation": "labels/validation.json",
        }

        records = []
        for split, json_file in json_files.items():
            with open(self.data_root / json_file, "r") as f:
                labels = json.load(f)

            for item in tqdm(labels, desc=f"Creating metadata for {split}"):
                webm_video_path = f"20bn-something-something-v2/{item['id']}.webm"
                mp4_video_path = f"20bn-something-something-v2/{item['id']}.mp4"

                total_videos = len(labels)
                successful_conversions = 0

                if (self.data_root / webm_video_path).exists():
                    # Convert webm to mp4 using ffmpeg
                    input_path = str(self.data_root / webm_video_path)
                    output_path = str(self.data_root / mp4_video_path)
                    cmd = f'ffmpeg -i {input_path} -vf "pad=ceil(iw/2)*2:ceil(ih/2)*2" -c:v libx264 -c:a aac {output_path}'
                    try:
                        subprocess.run(
                            cmd,
                            shell=True,
                            check=True,
                            stdout=subprocess.DEVNULL,
                            stderr=subprocess.DEVNULL,
                        )
                        # Delete the webm file after successful conversion
                        (self.data_root / webm_video_path).unlink()

                        # Get video metadata using cv2
                        cap = cv2.VideoCapture(output_path)
                        if not cap.isOpened():
                            continue

                        width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
                        height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
                        fps = int(cap.get(cv2.CAP_PROP_FPS))
                        n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
                        cap.release()

                        caption = item["label"].replace("pretending to ", "")

                        records.append(
                            {
                                "video_path": mp4_video_path,
                                "caption": caption,
                                "height": height,
                                "width": width,
                                "fps": fps,
                                "n_frames": n_frames,
                                "split": split,
                            }
                        )
                        successful_conversions += 1
                    except subprocess.CalledProcessError:
                        print(f"Conversion failed for {webm_video_path}")

                conversion_rate = (successful_conversions / total_videos) * 100
                print(f"Conversion success rate: {conversion_rate:.2f}%")

        # Save as CSV
        metadata_path = self.data_root / self.metadata_path
        metadata_path.parent.mkdir(parents=True, exist_ok=True)
        df = pd.DataFrame.from_records(records)
        df.to_csv(metadata_path, index=False)
        print(f"Created metadata CSV with {len(records)} videos")