| pixel_mean: [0.485, 0.456, 0.406] |
| pixel_std: [0.229, 0.224, 0.225] |
|
|
| pixel_dim: 256 |
| key_dim: 64 |
| value_dim: 256 |
| sensory_dim: 256 |
| embed_dim: 256 |
|
|
| pixel_encoder: |
| type: resnet50 |
| ms_dims: [1024, 512, 256, 64, 3] |
|
|
| mask_encoder: |
| type: resnet18 |
| final_dim: 256 |
|
|
| pixel_pe_scale: 32 |
| pixel_pe_temperature: 128 |
|
|
| object_transformer: |
| embed_dim: ${model.embed_dim} |
| ff_dim: 2048 |
| num_heads: 8 |
| num_blocks: 3 |
| num_queries: 16 |
| read_from_pixel: |
| input_norm: False |
| input_add_pe: False |
| add_pe_to_qkv: [True, True, False] |
| read_from_past: |
| add_pe_to_qkv: [True, True, False] |
| read_from_memory: |
| add_pe_to_qkv: [True, True, False] |
| read_from_query: |
| add_pe_to_qkv: [True, True, False] |
| output_norm: False |
| query_self_attention: |
| add_pe_to_qkv: [True, True, False] |
| pixel_self_attention: |
| add_pe_to_qkv: [True, True, False] |
|
|
| object_summarizer: |
| embed_dim: ${model.object_transformer.embed_dim} |
| num_summaries: ${model.object_transformer.num_queries} |
| add_pe: True |
|
|
| aux_loss: |
| sensory: |
| enabled: True |
| weight: 0.01 |
| query: |
| enabled: True |
| weight: 0.01 |
|
|
| mask_decoder: |
| |
| up_dims: [256, 128, 128, 64, 16] |
|
|