File size: 5,403 Bytes
33569f9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 | import yaml
DEFAULTS = {
# random seed for reproducibility
"init_rand_seed": 0,
# dataset loader, specify the dataset here
"dataset_name": "epic",
"devices": ['cuda:0'], # default: single gpu
"train_split": ('training', ),
"val_split": ('validation', ),
"model_name": "LocPointTransformer",
"dataset": {
# temporal stride of the feats
"feat_stride": 16,
# number of frames for each feat
"num_frames": 32,
# default fps, may vary across datasets; Set to none for read from json file
"default_fps": None,
# input feat dim
"input_dim": 2304,
# number of classes
"num_classes": 97,
# downsampling rate of features, 1 to use original resolution
"downsample_rate": 1,
# max sequence length during training
"max_seq_len": 2304,
# threshold for truncating an action
"trunc_thresh": 0.5,
# set to a tuple (e.g., (0.9, 1.0)) to enable random feature cropping
# might not be implemented by the dataloader
"crop_ratio": None,
# if true, force upsampling of the input features into a fixed size
# only used for ActivityNet
"force_upsampling": False,
},
"loader": {
"batch_size": 8,
"num_workers": 4,
},
# network architecture
"model": {
# type of backbone (convTransformer | conv)
"backbone_type": 'convTransformer',
# type of FPN (fpn | identity)
"fpn_type": "identity",
"backbone_arch": (2, 2, 5),
# scale factor between pyramid levels
"scale_factor": 2,
# regression range for pyramid levels
"regression_range": [(0, 4), (4, 8), (8, 16), (16, 32), (32, 64), (64, 10000)],
# number of heads in self-attention
"n_head": 4,
# window size for self attention; <=1 to use full seq (ie global attention)
"n_mha_win_size": -1,
# kernel size for embedding network
"embd_kernel_size": 3,
# (output) feature dim for embedding network
"embd_dim": 512,
# if attach group norm to embedding network
"embd_with_ln": True,
# feat dim for FPN
"fpn_dim": 512,
# if add ln at the end of fpn outputs
"fpn_with_ln": True,
# starting level for fpn
"fpn_start_level": 0,
# feat dim for head
"head_dim": 512,
# kernel size for reg/cls/center heads
"head_kernel_size": 3,
# number of layers in the head (including the final one)
"head_num_layers": 3,
# if attach group norm to heads
"head_with_ln": True,
# defines the max length of the buffered points
"max_buffer_len_factor": 6.0,
# disable abs position encoding (added to input embedding)
"use_abs_pe": False,
# use rel position encoding (added to self-attention)
"use_rel_pe": False,
},
"train_cfg": {
# radius | none (if to use center sampling)
"center_sample": "radius",
"center_sample_radius": 1.5,
"loss_weight": 1.0, # on reg_loss, use -1 to enable auto balancing
"cls_prior_prob": 0.01,
"init_loss_norm": 2000,
# gradient cliping, not needed for pre-LN transformer
"clip_grad_l2norm": -1,
# cls head without data (a fix to epic-kitchens / thumos)
"head_empty_cls": [],
# dropout ratios for tranformers
"dropout": 0.0,
# ratio for drop path
"droppath": 0.1,
# if to use label smoothing (>0.0)
"label_smoothing": 0.0,
},
"test_cfg": {
"pre_nms_thresh": 0.001,
"pre_nms_topk": 5000,
"iou_threshold": 0.1,
"min_score": 0.01,
"max_seg_num": 1000,
"nms_method": 'soft', # soft | hard | none
"nms_sigma" : 0.5,
"duration_thresh": 0.05,
"multiclass_nms": True,
"ext_score_file": None,
"voting_thresh" : 0.75,
},
# optimizer (for training)
"opt": {
# solver
"type": "AdamW", # SGD or AdamW
# solver params
"momentum": 0.9,
"weight_decay": 0.0,
"learning_rate": 1e-3,
# excluding the warmup epochs
"epochs": 30,
# lr scheduler: cosine / multistep
"warmup": True,
"warmup_epochs": 5,
"schedule_type": "cosine",
# in #epochs excluding warmup
"schedule_steps": [],
"schedule_gamma": 0.1,
}
}
def _merge(src, dst):
for k, v in src.items():
if k in dst:
if isinstance(v, dict):
_merge(src[k], dst[k])
else:
dst[k] = v
def load_default_config():
config = DEFAULTS
return config
def _update_config(config):
# fill in derived fields
config["model"]["input_dim"] = config["dataset"]["input_dim"]
config["model"]["num_classes"] = config["dataset"]["num_classes"]
config["model"]["max_seq_len"] = config["dataset"]["max_seq_len"]
config["model"]["train_cfg"] = config["train_cfg"]
config["model"]["test_cfg"] = config["test_cfg"]
return config
def load_config(config_file, defaults=DEFAULTS):
with open(config_file, "r") as fd:
config = yaml.load(fd, Loader=yaml.FullLoader)
_merge(defaults, config)
config = _update_config(config)
return config
|