# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. import argparse def get_args_parser(): parser = argparse.ArgumentParser("MDETR", add_help=False) parser.add_argument("--dataset_config", default=None, required=True) # Transformer parser.add_argument( "--enc_layers", default=6, type=int, help="Number of encoding layers in the transformer", ) parser.add_argument( "--dec_layers", default=6, type=int, help="Number of decoding layers in the transformer", ) parser.add_argument( "--dim_feedforward", default=2048, type=int, help="Intermediate size of the feedforward layers in the transformer blocks", ) parser.add_argument( "--hidden_dim", default=256, type=int, help="Size of the embeddings (dimension of the transformer)", ) parser.add_argument( "--dropout", default=0.1, type=float, help="Dropout applied in the transformer" ) parser.add_argument( "--nheads", default=8, type=int, help="Number of attention heads inside the transformer's attentions", ) parser.add_argument( "--num_classes", default=255, type=int, help="Number of classes" ) parser.add_argument( "--num_queries", default=100, type=int, help="Number of query slots" ) parser.add_argument("--pre_norm", action="store_true") parser.add_argument("--output_dir", default="test") parser.add_argument( "--freeze_text_encoder", action="store_true", help="Whether to freeze the weights of the text encoder", ) parser.add_argument("--tokenizer_type", type=str, default="roberta-base") parser.add_argument("--seed", default=42, type=int) parser.add_argument( "--test", action="store_true", help="Whether to run evaluation on val or test set", ) parser.add_argument( "--backbone", default="resnet101", type=str, help="Name of the convolutional backbone to use such as resnet50 resnet101 timm_tf_efficientnet_b3_ns", ) parser.add_argument( "--no_contrastive_align_loss", dest="contrastive_align_loss", action="store_false", help="Whether to add contrastive alignment loss", ) parser.add_argument( "--contrastive_loss_hdim", type=int, default=64, help="Projection head output size before computing the contrastive alignment loss", ) parser.add_argument( "--temperature", type=float, default=0.07, help="Temperature in the contrastive alignment loss", ) # Matcher parser.add_argument( "--matcher_cost_class", default=1, type=float, help="Class coefficient in the matching cost", ) parser.add_argument( "--matcher_cost_bbox", default=5, type=float, help="L1 box coefficient in the matching cost", ) parser.add_argument( "--matcher_cost_giou", default=2, type=float, help="giou box coefficient in the matching cost", ) # Loss coefficients parser.add_argument("--ce_loss_coef", default=1, type=float) parser.add_argument("--bbox_loss_coef", default=5, type=float) parser.add_argument("--giou_loss_coef", default=2, type=float) parser.add_argument("--qa_loss_coef", default=1, type=float) parser.add_argument( "--no_object_weight", default=0.1, type=float, help="Relative classification weight of the no-object class", ) parser.add_argument("--contrastive_align_loss_coef", default=1, type=float) parser.add_argument("--lr", default=1e-4, type=float) parser.add_argument("--lr_backbone", default=1e-5, type=float) parser.add_argument("--text_encoder_lr", default=5e-5, type=float) parser.add_argument("--batch_size", default=2, type=int) parser.add_argument("--weight_decay", default=1e-4, type=float) parser.add_argument("--epochs", default=40, type=int) parser.add_argument("--lr_drop", default=35, type=int) parser.add_argument( "--epoch_chunks", default=-1, type=int, help="If greater than 0, will split the training set into chunks and validate/checkpoint after each chunk", ) parser.add_argument("--optimizer", default="adam", type=str) parser.add_argument( "--clip_max_norm", default=0.1, type=float, help="gradient clipping max norm" ) parser.add_argument( "--eval_skip", default=1, type=int, help='do evaluation every "eval_skip" frames', ) parser.add_argument("--resume", default="", help="resume from checkpoint") parser.add_argument("--load", default="", help="resume from checkpoint") parser.add_argument( "--start-epoch", default=0, type=int, metavar="N", help="start epoch" ) parser.add_argument("--eval", action="store_true", help="Only run evaluation") parser.add_argument( "--schedule", default="linear_with_warmup", type=str, choices=("step", "multistep", "linear_with_warmup", "all_linear_with_warmup"), ) parser.add_argument("--ema", action="store_true") parser.add_argument("--ema_decay", type=float, default=0.9998) parser.add_argument( "--fraction_warmup_steps", default=0.01, type=float, help="Fraction of total number of steps", ) parser.add_argument( "--device", default="cuda", help="device to use for training / testing" ) parser.add_argument( "--world-size", default=1, type=int, help="number of distributed processes" ) parser.add_argument( "--dist-url", default="env://", help="url used to set up distributed training" ) return parser