MajorDaniel commited on
Commit
d3c02b2
·
verified ·
1 Parent(s): 48cf75e

Create hf_blender_extract.py

Browse files
Files changed (1) hide show
  1. src/data/hf_blender_extract.py +58 -0
src/data/hf_blender_extract.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # src/data/hf_blender_extract.py
2
+ import os
3
+ import sys
4
+ import time
5
+ from pathlib import Path
6
+
7
+ def _parse(argv):
8
+ # argv comes after `--`
9
+ args = {"input": None, "output_dir": None, "target_count": 50000}
10
+ it = iter(argv)
11
+ for k in it:
12
+ if k == "--input":
13
+ args["input"] = next(it)
14
+ elif k == "--output_dir":
15
+ args["output_dir"] = next(it)
16
+ elif k == "--target_count":
17
+ args["target_count"] = int(next(it))
18
+ if not args["input"] or not args["output_dir"]:
19
+ raise SystemExit("Usage: --input <file> --output_dir <dir> [--target_count N]")
20
+ return args
21
+
22
+ def main():
23
+ argv = sys.argv
24
+ if "--" in argv:
25
+ argv = argv[argv.index("--") + 1 :]
26
+ else:
27
+ argv = []
28
+ args = _parse(argv)
29
+
30
+ # Ensure output dir
31
+ out = Path(args["output_dir"])
32
+ out.mkdir(parents=True, exist_ok=True)
33
+
34
+ from src.data.extract import extract_builtin, get_files
35
+
36
+ files = get_files(
37
+ data_name="raw_data.npz",
38
+ inputs=str(args["input"]),
39
+ input_dataset_dir=None,
40
+ output_dataset_dir=str(out),
41
+ force_override=True,
42
+ warning=False,
43
+ )
44
+ if not files:
45
+ raise RuntimeError("No files to extract")
46
+
47
+ timestamp = str(int(time.time()))
48
+ extract_builtin(
49
+ output_folder=str(out),
50
+ target_count=int(args["target_count"]),
51
+ num_runs=1,
52
+ id=0,
53
+ time=timestamp,
54
+ files=files,
55
+ )
56
+
57
+ if __name__ == "__main__":
58
+ main()