Spaces:
Running
on
Zero
Running
on
Zero
| # @package _global_ | |
| # Model | |
| backbone: | |
| _target_: models.backbones.image_encoder.ImageEncoder | |
| scalp: 1 | |
| trunk: | |
| _target_: models.backbones.hieradet.Hiera | |
| embed_dim: 112 | |
| num_heads: 2 | |
| neck: | |
| _target_: models.backbones.image_encoder.FpnNeck | |
| position_encoding: | |
| _target_: models.position_encoding.PositionEmbeddingSine | |
| num_pos_feats: 256 | |
| normalize: true | |
| scale: null | |
| temperature: 10000 | |
| d_model: 256 | |
| backbone_channel_list: [896, 448, 224, 112] | |
| fpn_top_down_levels: [2, 3] # output level 0 and 1 directly use the backbone features | |
| fpn_interp_model: nearest | |
| #num_maskmem: 7 | |
| #image_size: 1024 | |
| ## apply scaled sigmoid on mask logits for memory encoder, and directly feed input mask as output mask | |
| #sigmoid_scale_for_mem_enc: 20.0 | |
| #sigmoid_bias_for_mem_enc: -10.0 | |
| #use_mask_input_as_output_without_sam: true | |
| ## Memory | |
| #directly_add_no_mem_embed: true | |
| ## use high-resolution feature map in the SAM mask decoder | |
| #use_high_res_features_in_sam: true | |
| ## output 3 masks on the first click on initial conditioning frames | |
| #multimask_output_in_sam: true | |
| ## SAM heads | |
| #iou_prediction_use_sigmoid: True | |
| ## cross-attend to object pointers from other frames (based on SAM output tokens) in the encoder | |
| #use_obj_ptrs_in_encoder: true | |
| #add_tpos_enc_to_obj_ptrs: false | |
| #only_obj_ptrs_in_the_past_for_eval: true | |
| ## object occlusion prediction | |
| #pred_obj_scores: true | |
| #pred_obj_scores_mlp: true | |
| #fixed_no_obj_ptr: true | |
| ## multimask tracking settings | |
| #multimask_output_for_tracking: true | |
| #use_multimask_token_for_obj_ptr: true | |
| #multimask_min_pt_num: 0 | |
| #multimask_max_pt_num: 1 | |
| #use_mlp_for_obj_ptr_proj: true | |
| ## Compilation flag | |
| #compile_image_encoder: False | |