data_aug_scales = [192, 208, 224, 240] data_aug_max_size = 240 data_aug_scales2_resize = [256] data_aug_scales2_crop = [224, 224] data_aug_scale_overlap = None batch_size = 55 modelname = 'groundingdino' backbone = 'swin_T_224_1k' position_embedding = 'sine' pe_temperatureH = 20 pe_temperatureW = 20 return_interm_indices = [1, 2, 3] enc_layers = 6 dec_layers = 6 pre_norm = False dim_feedforward = 2048 hidden_dim = 256 dropout = 0.0 nheads = 8 num_queries = 900 query_dim = 4 num_patterns = 0 num_feature_levels = 4 enc_n_points = 4 dec_n_points = 4 two_stage_type = 'standard' two_stage_bbox_embed_share = False two_stage_class_embed_share = False transformer_activation = 'relu' dec_pred_bbox_embed_share = True dn_box_noise_scale = 1.0 dn_label_noise_ratio = 0.5 dn_label_coef = 1.0 dn_bbox_coef = 1.0 embed_init_tgt = True dn_labelbook_size = 91 max_text_len = 256 text_encoder_type = './bert-base-uncased' use_text_enhancer = True use_fusion_layer = True use_checkpoint = True use_transformer_ckpt = True use_text_cross_attention = True text_dropout = 0.0 fusion_dropout = 0.0 fusion_droppath = 0.1 sub_sentence_present = True max_labels = 30 lr = 0.001 backbone_freeze_keywords = None freeze_keywords = [] lr_backbone = 1e-05 lr_backbone_names = ['backbone.0', 'bert'] lr_linear_proj_mult = 1e-05 lr_linear_proj_names = ['ref_point_head', 'sampling_offsets'] weight_decay = 0.0001 param_dict_type = 'ddetr_in_mmdet' ddetr_lr_param = False epochs = 50 lr_drop = 10 save_checkpoint_interval = 10 clip_max_norm = 0.1 onecyclelr = False multi_step_lr = False lr_drop_list = [10, 20, 30, 40] frozen_weights = None dilation = False pdetr3_bbox_embed_diff_each_layer = False pdetr3_refHW = -1 random_refpoints_xy = False fix_refpoints_hw = -1 dabdetr_yolo_like_anchor_update = False dabdetr_deformable_encoder = False dabdetr_deformable_decoder = False use_deformable_box_attn = False box_attn_type = 'roi_align' dec_layer_number = None decoder_layer_noise = False dln_xy_noise = 0.2 dln_hw_noise = 0.2 add_channel_attention = False add_pos_value = False two_stage_pat_embed = 0 two_stage_add_query_num = 0 two_stage_learn_wh = False two_stage_default_hw = 0.05 two_stage_keep_all_tokens = False num_select = 40 batch_norm_type = 'FrozenBatchNorm2d' masks = False aux_loss = True set_cost_class = 1.0 set_cost_bbox = 5.0 set_cost_giou = 2.0 cls_loss_coef = 2.5 bbox_loss_coef = 5.0 giou_loss_coef = 2.0 enc_loss_coef = 1.0 interm_loss_coef = 1.0 no_interm_box_loss = False mask_loss_coef = 1.0 dice_loss_coef = 1.0 focal_alpha = 0.25 focal_gamma = 2.5 decoder_sa_type = 'sa' matcher_type = 'HungarianMatcher' decoder_module_seq = ['sa', 'ca', 'ffn'] nms_iou_threshold = -1 dec_pred_class_embed_share = True match_unstable_error = True use_ema = True ema_decay = 0.9997 ema_epoch = 0 use_detached_boxes_dec_out = False use_coco_eval = False dn_scalar = 100 label_list = [ 'alphabet soup', 'basket', 'bbq sauce', 'black bowl', 'book', 'butter', 'cabinet', 'caddy', 'chocolate pudding', 'cream cheese', 'gripper', 'ketchup', 'left moka pot', 'left plate', 'microwave', 'milk', 'moka pot', 'orange juice', 'plate', 'right moka pot', 'right plate', 'salad dressing', 'stove', 'tomato sauce', 'white mug', 'wine bottle', 'wine rack', 'yellow and white mug' ]