CUDNN_BENCHMARK: false DATALOADER: ASPECT_RATIO_GROUPING: true FILTER_EMPTY_ANNOTATIONS: true NUM_WORKERS: 8 REPEAT_SQRT: true REPEAT_THRESHOLD: 0.0 SAMPLER_TRAIN: TrainingSampler DATASETS: DATASET_RATIO: - 1.0 - 0.25 PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000 PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000 PROPOSAL_FILES_TEST: [] PROPOSAL_FILES_TRAIN: [] TEST: - mevis_val TRAIN: - mevis_train FLOAT32_PRECISION: '' GLOBAL: HACK: 1.0 INPUT: AUGMENTATIONS: [] COLOR_AUG_SSD: false CROP: ENABLED: true SINGLE_CATEGORY_MAX_AREA: 1.0 SIZE: - 384 - 600 TYPE: absolute_range DATASET_MAPPER_NAME: mask_former_semantic FORMAT: RGB IMAGE_SIZE: 1024 LSJ_AUG: ENABLED: false IMAGE_SIZE: 768 MAX_SCALE: 2.0 MIN_SCALE: 0.1 MASK_FORMAT: polygon MAX_SCALE: 2.0 MAX_SIZE_TEST: 1333 MAX_SIZE_TRAIN: 768 MIN_SCALE: 0.1 MIN_SIZE_TEST: 448 MIN_SIZE_TRAIN: - 288 - 320 - 352 - 384 - 416 - 448 - 480 - 512 MIN_SIZE_TRAIN_SAMPLING: choice_by_clip PSEUDO: AUGMENTATIONS: - rotation CROP: ENABLED: true SIZE: - 384 - 600 TYPE: absolute_range MAX_SIZE_TRAIN: 768 MIN_SIZE_TRAIN: - 288 - 320 - 352 - 384 - 416 - 448 - 480 - 512 MIN_SIZE_TRAIN_SAMPLING: choice_by_clip RANDOM_FLIP: flip_by_clip SAMPLING_FRAME_NUM: 8 SAMPLING_FRAME_RANGE: 10 SAMPLING_FRAME_SHUFFLE: false SIZE_DIVISIBILITY: -1 MODEL: ANCHOR_GENERATOR: ANGLES: - - -90 - 0 - 90 ASPECT_RATIOS: - - 0.5 - 1.0 - 2.0 NAME: DefaultAnchorGenerator OFFSET: 0.0 SIZES: - - 32 - 64 - 128 - 256 - 512 BACKBONE: FREEZE_AT: 0 NAME: D2SwinTransformer DEVICE: cuda FPN: FUSE_TYPE: sum IN_FEATURES: [] NORM: '' OUT_CHANNELS: 256 KEYPOINT_ON: false LOAD_PROPOSALS: false MASK_FORMER: CLASS_WEIGHT: 2.0 DEC_LAYERS: 10 DEEP_SUPERVISION: true DICE_WEIGHT: 5.0 DIM_FEEDFORWARD: 2048 DROPOUT: 0.0 ENC_LAYERS: 0 ENFORCE_INPUT_PROJ: false HIDDEN_DIM: 256 IMPORTANCE_SAMPLE_RATIO: 0.75 MASK_WEIGHT: 5.0 NHEADS: 8 NO_OBJECT_WEIGHT: 0.1 NUM_OBJECT_QUERIES: 20 OVERSAMPLE_RATIO: 3.0 PRE_NORM: false SIZE_DIVISIBILITY: 32 TEST: INSTANCE_ON: true OBJECT_MASK_THRESHOLD: 0.8 OVERLAP_THRESHOLD: 0.8 PANOPTIC_ON: false SEMANTIC_ON: false SEM_SEG_POSTPROCESSING_BEFORE_INFERENCE: false TRAIN_NUM_POINTS: 12544 TRANSFORMER_DECODER_NAME: DshmpMultiScaleMaskedTransformerDecoder TRANSFORMER_IN_FEATURE: multi_scale_pixel_decoder MASK_ON: true META_ARCHITECTURE: DsHmp PANOPTIC_FPN: COMBINE: ENABLED: true INSTANCES_CONFIDENCE_THRESH: 0.5 OVERLAP_THRESH: 0.5 STUFF_AREA_LIMIT: 4096 INSTANCE_LOSS_WEIGHT: 1.0 PIXEL_MEAN: - 123.675 - 116.28 - 103.53 PIXEL_STD: - 58.395 - 57.12 - 57.37 PROPOSAL_GENERATOR: MIN_SIZE: 0 NAME: RPN RESNETS: DEFORM_MODULATED: false DEFORM_NUM_GROUPS: 1 DEFORM_ON_PER_STAGE: - false - false - false - false DEPTH: 50 NORM: FrozenBN NUM_GROUPS: 1 OUT_FEATURES: - res2 - res3 - res4 - res5 RES2_OUT_CHANNELS: 256 RES4_DILATION: 1 RES5_DILATION: 1 RES5_MULTI_GRID: - 1 - 1 - 1 STEM_OUT_CHANNELS: 64 STEM_TYPE: basic STRIDE_IN_1X1: false WIDTH_PER_GROUP: 64 RETINANET: BBOX_REG_LOSS_TYPE: smooth_l1 BBOX_REG_WEIGHTS: &id002 - 1.0 - 1.0 - 1.0 - 1.0 FOCAL_LOSS_ALPHA: 0.25 FOCAL_LOSS_GAMMA: 2.0 IN_FEATURES: - p3 - p4 - p5 - p6 - p7 IOU_LABELS: - 0 - -1 - 1 IOU_THRESHOLDS: - 0.4 - 0.5 NMS_THRESH_TEST: 0.5 NORM: '' NUM_CLASSES: 80 NUM_CONVS: 4 PRIOR_PROB: 0.01 SCORE_THRESH_TEST: 0.05 SMOOTH_L1_LOSS_BETA: 0.1 TOPK_CANDIDATES_TEST: 1000 ROI_BOX_CASCADE_HEAD: BBOX_REG_WEIGHTS: - &id001 - 10.0 - 10.0 - 5.0 - 5.0 - - 20.0 - 20.0 - 10.0 - 10.0 - - 30.0 - 30.0 - 15.0 - 15.0 IOUS: - 0.5 - 0.6 - 0.7 ROI_BOX_HEAD: BBOX_REG_LOSS_TYPE: smooth_l1 BBOX_REG_LOSS_WEIGHT: 1.0 BBOX_REG_WEIGHTS: *id001 CLS_AGNOSTIC_BBOX_REG: false CONV_DIM: 256 FC_DIM: 1024 FED_LOSS_FREQ_WEIGHT_POWER: 0.5 FED_LOSS_NUM_CLASSES: 50 NAME: '' NORM: '' NUM_CONV: 0 NUM_FC: 0 POOLER_RESOLUTION: 14 POOLER_SAMPLING_RATIO: 0 POOLER_TYPE: ROIAlignV2 SMOOTH_L1_BETA: 0.0 TRAIN_ON_PRED_BOXES: false USE_FED_LOSS: false USE_SIGMOID_CE: false ROI_HEADS: BATCH_SIZE_PER_IMAGE: 512 IN_FEATURES: - res4 IOU_LABELS: - 0 - 1 IOU_THRESHOLDS: - 0.5 NAME: Res5ROIHeads NMS_THRESH_TEST: 0.5 NUM_CLASSES: 80 POSITIVE_FRACTION: 0.25 PROPOSAL_APPEND_GT: true SCORE_THRESH_TEST: 0.05 ROI_KEYPOINT_HEAD: CONV_DIMS: - 512 - 512 - 512 - 512 - 512 - 512 - 512 - 512 LOSS_WEIGHT: 1.0 MIN_KEYPOINTS_PER_IMAGE: 1 NAME: KRCNNConvDeconvUpsampleHead NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: true NUM_KEYPOINTS: 17 POOLER_RESOLUTION: 14 POOLER_SAMPLING_RATIO: 0 POOLER_TYPE: ROIAlignV2 ROI_MASK_HEAD: CLS_AGNOSTIC_MASK: false CONV_DIM: 256 NAME: MaskRCNNConvUpsampleHead NORM: '' NUM_CONV: 0 POOLER_RESOLUTION: 14 POOLER_SAMPLING_RATIO: 0 POOLER_TYPE: ROIAlignV2 RPN: BATCH_SIZE_PER_IMAGE: 256 BBOX_REG_LOSS_TYPE: smooth_l1 BBOX_REG_LOSS_WEIGHT: 1.0 BBOX_REG_WEIGHTS: *id002 BOUNDARY_THRESH: -1 CONV_DIMS: - -1 HEAD_NAME: StandardRPNHead IN_FEATURES: - res4 IOU_LABELS: - 0 - -1 - 1 IOU_THRESHOLDS: - 0.3 - 0.7 LOSS_WEIGHT: 1.0 NMS_THRESH: 0.7 POSITIVE_FRACTION: 0.5 POST_NMS_TOPK_TEST: 1000 POST_NMS_TOPK_TRAIN: 2000 PRE_NMS_TOPK_TEST: 6000 PRE_NMS_TOPK_TRAIN: 12000 SMOOTH_L1_BETA: 0.0 SEM_SEG_HEAD: ASPP_CHANNELS: 256 ASPP_DILATIONS: - 6 - 12 - 18 ASPP_DROPOUT: 0.1 COMMON_STRIDE: 4 CONVS_DIM: 256 DEFORMABLE_TRANSFORMER_ENCODER_IN_FEATURES: - res3 - res4 - res5 DEFORMABLE_TRANSFORMER_ENCODER_N_HEADS: 8 DEFORMABLE_TRANSFORMER_ENCODER_N_POINTS: 4 IGNORE_VALUE: 255 IN_FEATURES: - res2 - res3 - res4 - res5 LOSS_TYPE: hard_pixel_mining LOSS_WEIGHT: 1.0 MASK_DIM: 256 NAME: MaskFormerHead NORM: GN NUM_CLASSES: 1 PIXEL_DECODER_NAME: MSDeformAttnPixelDecoder PROJECT_CHANNELS: - 48 PROJECT_FEATURES: - res2 TRANSFORMER_ENC_LAYERS: 6 USE_DEPTHWISE_SEPARABLE_CONV: false SWIN: APE: false ATTN_DROP_RATE: 0.0 DEPTHS: - 2 - 2 - 6 - 2 DROP_PATH_RATE: 0.3 DROP_RATE: 0.0 EMBED_DIM: 96 MLP_RATIO: 4.0 NUM_HEADS: - 3 - 6 - 12 - 24 OUT_FEATURES: - res2 - res3 - res4 - res5 PATCH_NORM: true PATCH_SIZE: 4 PRETRAIN_IMG_SIZE: 224 QKV_BIAS: true QK_SCALE: null USE_CHECKPOINT: false WINDOW_SIZE: 7 VITA: APPLY_CLS_THRES: 0.01 DEC_LAYERS: 3 DEEP_SUPERVISION: true DIM_FEEDFORWARD: 2048 DROPOUT: 0.0 ENC_LAYERS: 6 ENC_WINDOW_SIZE: 8 ENFORCE_INPUT_PROJ: true FREEZE_DETECTOR: false FREEZE_TEXT_ENCODER: true HIDDEN_DIM: 256 LAST_LAYER_NUM: 3 MULTI_CLS_ON: true NHEADS: 8 NO_OBJECT_WEIGHT: 0.1 NUM_OBJECT_QUERIES: 10 PRE_NORM: false SIM_USE_CLIP: true SIM_WEIGHT: 1.0 TEST_INTERPOLATE_CHUNK_SIZE: 5 TEST_OUTPUT_THRESHOLD: 0.5 TEST_RUN_CHUNK_SIZE: 18 WEIGHTS: ./weights/repro_bs8_ddp_ft/model_final.pth OUTPUT_DIR: ./output/valid_u_bs8ckpt SEED: -1 SOLVER: AMP: ENABLED: true BACKBONE_MULTIPLIER: 0.1 BASE_LR: 5.0e-05 BASE_LR_END: 0.0 BIAS_LR_FACTOR: 1.0 CHECKPOINT_PERIOD: 5000 CLIP_GRADIENTS: CLIP_TYPE: full_model CLIP_VALUE: 0.01 ENABLED: true NORM_TYPE: 2.0 GAMMA: 0.1 IMS_PER_BATCH: 8 LR_SCHEDULER_NAME: WarmupMultiStepLR MAX_ITER: 55000 MOMENTUM: 0.9 NESTEROV: false NUM_DECAYS: 3 OPTIMIZER: ADAMW POLY_LR_CONSTANT_ENDING: 0.0 POLY_LR_POWER: 0.9 REFERENCE_WORLD_SIZE: 0 RESCALE_INTERVAL: false STEPS: - 40000 - 50000 WARMUP_FACTOR: 1.0 WARMUP_ITERS: 10 WARMUP_METHOD: linear WEIGHT_DECAY: 0.05 WEIGHT_DECAY_BIAS: null WEIGHT_DECAY_EMBED: 0.0 WEIGHT_DECAY_NORM: 0.0 TEST: AUG: ENABLED: false FLIP: true MAX_SIZE: 4000 MIN_SIZES: - 400 - 500 - 600 - 700 - 800 - 900 - 1000 - 1100 - 1200 DETECTIONS_PER_IMAGE: 1 EVAL_PERIOD: 50000 EXPECTED_RESULTS: [] KEYPOINT_OKS_SIGMAS: [] PRECISE_BN: ENABLED: false NUM_ITER: 200 VERSION: 2 VIS_PERIOD: 0