UniRig / src /data /hf_blender_extract.py
MajorDaniel's picture
Create hf_blender_extract.py
d3c02b2 verified
# src/data/hf_blender_extract.py
import os
import sys
import time
from pathlib import Path
def _parse(argv):
# argv comes after `--`
args = {"input": None, "output_dir": None, "target_count": 50000}
it = iter(argv)
for k in it:
if k == "--input":
args["input"] = next(it)
elif k == "--output_dir":
args["output_dir"] = next(it)
elif k == "--target_count":
args["target_count"] = int(next(it))
if not args["input"] or not args["output_dir"]:
raise SystemExit("Usage: --input <file> --output_dir <dir> [--target_count N]")
return args
def main():
argv = sys.argv
if "--" in argv:
argv = argv[argv.index("--") + 1 :]
else:
argv = []
args = _parse(argv)
# Ensure output dir
out = Path(args["output_dir"])
out.mkdir(parents=True, exist_ok=True)
from src.data.extract import extract_builtin, get_files
files = get_files(
data_name="raw_data.npz",
inputs=str(args["input"]),
input_dataset_dir=None,
output_dataset_dir=str(out),
force_override=True,
warning=False,
)
if not files:
raise RuntimeError("No files to extract")
timestamp = str(int(time.time()))
extract_builtin(
output_folder=str(out),
target_count=int(args["target_count"]),
num_runs=1,
id=0,
time=timestamp,
files=files,
)
if __name__ == "__main__":
main()