| | |
| | |
| | |
| | |
| | |
| |
|
| | import argparse |
| |
|
| |
|
| | def get_args_parser(): |
| | parser = argparse.ArgumentParser("MDETR", add_help=False) |
| | parser.add_argument("--dataset_config", default=None, required=True) |
| | |
| | parser.add_argument( |
| | "--enc_layers", |
| | default=6, |
| | type=int, |
| | help="Number of encoding layers in the transformer", |
| | ) |
| | parser.add_argument( |
| | "--dec_layers", |
| | default=6, |
| | type=int, |
| | help="Number of decoding layers in the transformer", |
| | ) |
| | parser.add_argument( |
| | "--dim_feedforward", |
| | default=2048, |
| | type=int, |
| | help="Intermediate size of the feedforward layers in the transformer blocks", |
| | ) |
| | parser.add_argument( |
| | "--hidden_dim", |
| | default=256, |
| | type=int, |
| | help="Size of the embeddings (dimension of the transformer)", |
| | ) |
| | parser.add_argument( |
| | "--dropout", default=0.1, type=float, help="Dropout applied in the transformer" |
| | ) |
| | parser.add_argument( |
| | "--nheads", |
| | default=8, |
| | type=int, |
| | help="Number of attention heads inside the transformer's attentions", |
| | ) |
| | parser.add_argument( |
| | "--num_classes", default=255, type=int, help="Number of classes" |
| | ) |
| | parser.add_argument( |
| | "--num_queries", default=100, type=int, help="Number of query slots" |
| | ) |
| | parser.add_argument("--pre_norm", action="store_true") |
| |
|
| | parser.add_argument("--output_dir", default="test") |
| | parser.add_argument( |
| | "--freeze_text_encoder", |
| | action="store_true", |
| | help="Whether to freeze the weights of the text encoder", |
| | ) |
| | parser.add_argument("--tokenizer_type", type=str, default="roberta-base") |
| | parser.add_argument("--seed", default=42, type=int) |
| | parser.add_argument( |
| | "--test", |
| | action="store_true", |
| | help="Whether to run evaluation on val or test set", |
| | ) |
| | parser.add_argument( |
| | "--backbone", |
| | default="resnet101", |
| | type=str, |
| | help="Name of the convolutional backbone to use such as resnet50 resnet101 timm_tf_efficientnet_b3_ns", |
| | ) |
| |
|
| | parser.add_argument( |
| | "--no_contrastive_align_loss", |
| | dest="contrastive_align_loss", |
| | action="store_false", |
| | help="Whether to add contrastive alignment loss", |
| | ) |
| |
|
| | parser.add_argument( |
| | "--contrastive_loss_hdim", |
| | type=int, |
| | default=64, |
| | help="Projection head output size before computing the contrastive alignment loss", |
| | ) |
| | parser.add_argument( |
| | "--temperature", |
| | type=float, |
| | default=0.07, |
| | help="Temperature in the contrastive alignment loss", |
| | ) |
| | |
| | parser.add_argument( |
| | "--matcher_cost_class", |
| | default=1, |
| | type=float, |
| | help="Class coefficient in the matching cost", |
| | ) |
| | parser.add_argument( |
| | "--matcher_cost_bbox", |
| | default=5, |
| | type=float, |
| | help="L1 box coefficient in the matching cost", |
| | ) |
| | parser.add_argument( |
| | "--matcher_cost_giou", |
| | default=2, |
| | type=float, |
| | help="giou box coefficient in the matching cost", |
| | ) |
| | |
| | parser.add_argument("--ce_loss_coef", default=1, type=float) |
| | parser.add_argument("--bbox_loss_coef", default=5, type=float) |
| | parser.add_argument("--giou_loss_coef", default=2, type=float) |
| | parser.add_argument("--qa_loss_coef", default=1, type=float) |
| | parser.add_argument( |
| | "--no_object_weight", |
| | default=0.1, |
| | type=float, |
| | help="Relative classification weight of the no-object class", |
| | ) |
| | parser.add_argument("--contrastive_align_loss_coef", default=1, type=float) |
| | parser.add_argument("--lr", default=1e-4, type=float) |
| | parser.add_argument("--lr_backbone", default=1e-5, type=float) |
| | parser.add_argument("--text_encoder_lr", default=5e-5, type=float) |
| | parser.add_argument("--batch_size", default=2, type=int) |
| | parser.add_argument("--weight_decay", default=1e-4, type=float) |
| | parser.add_argument("--epochs", default=40, type=int) |
| | parser.add_argument("--lr_drop", default=35, type=int) |
| | parser.add_argument( |
| | "--epoch_chunks", |
| | default=-1, |
| | type=int, |
| | help="If greater than 0, will split the training set into chunks and validate/checkpoint after each chunk", |
| | ) |
| | parser.add_argument("--optimizer", default="adam", type=str) |
| | parser.add_argument( |
| | "--clip_max_norm", default=0.1, type=float, help="gradient clipping max norm" |
| | ) |
| | parser.add_argument( |
| | "--eval_skip", |
| | default=1, |
| | type=int, |
| | help='do evaluation every "eval_skip" frames', |
| | ) |
| | parser.add_argument("--resume", default="", help="resume from checkpoint") |
| | parser.add_argument("--load", default="", help="resume from checkpoint") |
| | parser.add_argument( |
| | "--start-epoch", default=0, type=int, metavar="N", help="start epoch" |
| | ) |
| | parser.add_argument("--eval", action="store_true", help="Only run evaluation") |
| | parser.add_argument( |
| | "--schedule", |
| | default="linear_with_warmup", |
| | type=str, |
| | choices=("step", "multistep", "linear_with_warmup", "all_linear_with_warmup"), |
| | ) |
| | parser.add_argument("--ema", action="store_true") |
| | parser.add_argument("--ema_decay", type=float, default=0.9998) |
| | parser.add_argument( |
| | "--fraction_warmup_steps", |
| | default=0.01, |
| | type=float, |
| | help="Fraction of total number of steps", |
| | ) |
| | parser.add_argument( |
| | "--device", default="cuda", help="device to use for training / testing" |
| | ) |
| | parser.add_argument( |
| | "--world-size", default=1, type=int, help="number of distributed processes" |
| | ) |
| | parser.add_argument( |
| | "--dist-url", default="env://", help="url used to set up distributed training" |
| | ) |
| |
|
| | return parser |
| |
|