Spaces:
Sleeping
Sleeping
| import glob | |
| import numpy as np | |
| import os | |
| import shutil | |
| from src.logger.logger import _logger | |
| def to_filelist(args, mode="train"): | |
| if mode == "train": | |
| flist = args.data_train | |
| elif mode == "val": | |
| flist = args.data_val | |
| elif mode == "test": | |
| flist = args.data_test | |
| else: | |
| raise NotImplementedError("Invalid mode %s" % mode) | |
| print(flist) | |
| # keyword-based: 'a:/path/to/a b:/path/to/b' | |
| file_dict = {} | |
| for f in flist: | |
| if ":" in f: | |
| name, fp = f.split(":") | |
| else: | |
| name, fp = "_", f | |
| files = glob.glob(fp) | |
| if name in file_dict: | |
| file_dict[name] += files | |
| else: | |
| file_dict[name] = files | |
| # sort files | |
| for name, files in file_dict.items(): | |
| file_dict[name] = sorted(files) | |
| if args.local_rank is not None: | |
| if mode == "train": | |
| local_world_size = int(os.environ["LOCAL_WORLD_SIZE"]) | |
| new_file_dict = {} | |
| for name, files in file_dict.items(): | |
| new_files = files[args.local_rank :: local_world_size] | |
| assert len(new_files) > 0 | |
| np.random.shuffle(new_files) | |
| new_file_dict[name] = new_files | |
| file_dict = new_file_dict | |
| if args.copy_inputs: | |
| import tempfile | |
| tmpdir = tempfile.mkdtemp() | |
| if os.path.exists(tmpdir): | |
| shutil.rmtree(tmpdir) | |
| new_file_dict = {name: [] for name in file_dict} | |
| for name, files in file_dict.items(): | |
| for src in files: | |
| dest = os.path.join(tmpdir, src.lstrip("/")) | |
| if not os.path.exists(os.path.dirname(dest)): | |
| os.makedirs(os.path.dirname(dest), exist_ok=True) | |
| shutil.copy2(src, dest) | |
| _logger.info("Copied file %s to %s" % (src, dest)) | |
| new_file_dict[name].append(dest) | |
| if len(files) != len(new_file_dict[name]): | |
| _logger.error( | |
| "Only %d/%d files copied for %s file group %s", | |
| len(new_file_dict[name]), | |
| len(files), | |
| mode, | |
| name, | |
| ) | |
| file_dict = new_file_dict | |
| filelist = sum(file_dict.values(), []) | |
| assert len(filelist) == len(set(filelist)) | |
| return file_dict, filelist | |
| def clear_empty_paths(dir): | |
| # clear the dirs in this folder that are empty (i.e. don't have any files or folders in them) | |
| for f in os.listdir(dir): | |
| if not os.path.isdir(os.path.join(dir, f)): | |
| continue | |
| if not os.listdir(os.path.join(dir, f)): | |
| shutil.rmtree(os.path.join(dir, f)) | |
| _logger.info("Removed empty path %s" % f) | |
| import io | |
| import torch | |
| import pickle | |
| class CPU_Unpickler(pickle.Unpickler): | |
| def find_class(self, module, name): | |
| if module == 'torch.storage' and name == '_load_from_bytes': | |
| return lambda b: torch.load(io.BytesIO(b), map_location='cpu') | |
| else: | |
| return super().find_class(module, name) | |