import yaml DEFAULTS = { # random seed for reproducibility "init_rand_seed": 0, # dataset loader, specify the dataset here "dataset_name": "epic", "devices": ['cuda:0'], # default: single gpu "train_split": ('training', ), "val_split": ('validation', ), "model_name": "LocPointTransformer", "dataset": { # temporal stride of the feats "feat_stride": 16, # number of frames for each feat "num_frames": 32, # default fps, may vary across datasets; Set to none for read from json file "default_fps": None, # input feat dim "input_dim": 2304, # number of classes "num_classes": 97, # downsampling rate of features, 1 to use original resolution "downsample_rate": 1, # max sequence length during training "max_seq_len": 2304, # threshold for truncating an action "trunc_thresh": 0.5, # set to a tuple (e.g., (0.9, 1.0)) to enable random feature cropping # might not be implemented by the dataloader "crop_ratio": None, # if true, force upsampling of the input features into a fixed size # only used for ActivityNet "force_upsampling": False, }, "loader": { "batch_size": 8, "num_workers": 4, }, # network architecture "model": { # type of backbone (convTransformer | conv) "backbone_type": 'convTransformer', # type of FPN (fpn | identity) "fpn_type": "identity", "backbone_arch": (2, 2, 5), # scale factor between pyramid levels "scale_factor": 2, # regression range for pyramid levels "regression_range": [(0, 4), (4, 8), (8, 16), (16, 32), (32, 64), (64, 10000)], # number of heads in self-attention "n_head": 4, # window size for self attention; <=1 to use full seq (ie global attention) "n_mha_win_size": -1, # kernel size for embedding network "embd_kernel_size": 3, # (output) feature dim for embedding network "embd_dim": 512, # if attach group norm to embedding network "embd_with_ln": True, # feat dim for FPN "fpn_dim": 512, # if add ln at the end of fpn outputs "fpn_with_ln": True, # starting level for fpn "fpn_start_level": 0, # feat dim for head "head_dim": 512, # kernel size for reg/cls/center heads "head_kernel_size": 3, # number of layers in the head (including the final one) "head_num_layers": 3, # if attach group norm to heads "head_with_ln": True, # defines the max length of the buffered points "max_buffer_len_factor": 6.0, # disable abs position encoding (added to input embedding) "use_abs_pe": False, # use rel position encoding (added to self-attention) "use_rel_pe": False, }, "train_cfg": { # radius | none (if to use center sampling) "center_sample": "radius", "center_sample_radius": 1.5, "loss_weight": 1.0, # on reg_loss, use -1 to enable auto balancing "cls_prior_prob": 0.01, "init_loss_norm": 2000, # gradient cliping, not needed for pre-LN transformer "clip_grad_l2norm": -1, # cls head without data (a fix to epic-kitchens / thumos) "head_empty_cls": [], # dropout ratios for tranformers "dropout": 0.0, # ratio for drop path "droppath": 0.1, # if to use label smoothing (>0.0) "label_smoothing": 0.0, }, "test_cfg": { "pre_nms_thresh": 0.001, "pre_nms_topk": 5000, "iou_threshold": 0.1, "min_score": 0.01, "max_seg_num": 1000, "nms_method": 'soft', # soft | hard | none "nms_sigma" : 0.5, "duration_thresh": 0.05, "multiclass_nms": True, "ext_score_file": None, "voting_thresh" : 0.75, }, # optimizer (for training) "opt": { # solver "type": "AdamW", # SGD or AdamW # solver params "momentum": 0.9, "weight_decay": 0.0, "learning_rate": 1e-3, # excluding the warmup epochs "epochs": 30, # lr scheduler: cosine / multistep "warmup": True, "warmup_epochs": 5, "schedule_type": "cosine", # in #epochs excluding warmup "schedule_steps": [], "schedule_gamma": 0.1, } } def _merge(src, dst): for k, v in src.items(): if k in dst: if isinstance(v, dict): _merge(src[k], dst[k]) else: dst[k] = v def load_default_config(): config = DEFAULTS return config def _update_config(config): # fill in derived fields config["model"]["input_dim"] = config["dataset"]["input_dim"] config["model"]["num_classes"] = config["dataset"]["num_classes"] config["model"]["max_seq_len"] = config["dataset"]["max_seq_len"] config["model"]["train_cfg"] = config["train_cfg"] config["model"]["test_cfg"] = config["test_cfg"] return config def load_config(config_file, defaults=DEFAULTS): with open(config_file, "r") as fd: config = yaml.load(fd, Loader=yaml.FullLoader) _merge(defaults, config) config = _update_config(config) return config