ash56
/

ssl-aasist

Model card Files Files and versions

ssl-aasist / fairseq /examples /MMPT /scripts /video_feature_extractor /pathbuilder.py

ash56's picture

Add files using upload-large-folder tool

878264b verified 12 months ago

3.41 kB

	# Copyright (c) Facebook, Inc. and its affiliates.
	#
	# This source code is licensed under the MIT license found in the
	# LICENSE file in the root directory of this source tree.
	import os
	import urllib.parse
	import json
	import pandas as pd

	from tqdm import tqdm


	# TODO: extending to other datasets.
	supported_formats = {}


	class PathBuilder(object):
	@classmethod
	def build(cls, video_dirs, feature_dir, ext, shards=0, split=None):
	meta_fn = os.path.join(feature_dir, "meta_plan.json")
	os.makedirs(feature_dir, exist_ok=True)
	if os.path.isfile(meta_fn):
	with open(meta_fn) as fr:
	meta = json.load(fr)
	return meta
	print("searching videos...")

	video_id_to_path = {}
	for video_dir in video_dirs.split(","):
	# TODO: add supports of recursive listdir.
	if video_dir in supported_formats:
	supported_formats[video_dir].load(video_dir, video_id_to_path)
	else:
	for idx, fn in enumerate(tqdm(os.listdir(video_dir))):
	video_fn = os.path.join(video_dir, fn)
	if os.path.isfile(video_fn):
	video_id = os.path.splitext(fn)[0]
	video_id_to_path[video_id] = video_fn
	elif os.path.isdir(video_fn):
	# shards of folders.
	shard_dir = video_fn
	for idx, fn in enumerate(os.listdir(shard_dir)):
	video_fn = os.path.join(shard_dir, fn)
	if os.path.isfile(video_fn):
	video_id = os.path.splitext(fn)[0]
	video_id_to_path[video_id] = video_fn

	video_path, feature_path = [], []
	valid_ext = set()
	for idx, video_id in enumerate(video_id_to_path):
	video_path.append(video_id_to_path[video_id])
	if ext is None:
	# use original file ext for format compatibility.
	video_id_to_path[video_id]
	path = urllib.parse.urlparse(video_id_to_path[video_id]).path
	ext = os.path.splitext(path)[1]
	if ext not in valid_ext:
	valid_ext.add(ext)
	print("adding", ext)
	if shards:
	shard_id = str(idx % shards)
	feature_fn = os.path.join(
	feature_dir, shard_id, video_id + ext)
	else:
	feature_fn = os.path.join(
	feature_dir, video_id + ext)
	feature_path.append(feature_fn)

	print("targeting", len(feature_path), "videos")
	meta = {
	"video_path": video_path, "feature_path": feature_path}
	with open(meta_fn, "w") as fw:
	json.dump(meta, fw)

	if split is not None:
	splits = split.split("/")
	assert len(splits) == 2
	cur, total = int(splits[0]), int(splits[1])
	assert cur < total
	import math
	chunk = math.ceil(len(meta["video_path"]) / total)
	start = cur * chunk
	end = (cur + 1) * chunk
	meta = {
	"video_path": meta["video_path"][start:end],
	"feature_path": meta["feature_path"][start:end]
	}

	return meta