{ "activation_dropout": 0.0, "activation_function": "relu", "angle_loss_coefficient": 1, "architectures": [ "Mono3DVGv2ForSingleObjectDetection" ], "attention_dropout": 0.0, "auxiliary_loss": false, "backbone": "swin_large_patch4_window7_224", "backbone_config": null, "bbox_cost": 5, "bbox_loss_coefficient": 5, "center3d_cost": 10, "center3d_loss_coefficient": 10, "class_cost": 2, "cls_loss_coefficient": 2, "d_model": 256, "decoder_attention_heads": 8, "decoder_depth_residual": false, "decoder_ffn_dim": 256, "decoder_layers": 3, "decoder_n_points": 4, "decoder_self_attn": false, "decoder_text_residual": false, "depth_loss_coefficient": 1, "depth_map_loss_coefficient": 1, "depth_max": 60.0, "depth_min": 0.001, "dim_loss_coefficient": 1, "disable_custom_kernels": false, "dropout": 0.1, "encoder_attention_heads": 8, "encoder_ffn_dim": 256, "encoder_layers": 6, "encoder_n_points": 4, "focal_alpha": 0.25, "freeze_backbone": true, "giou_cost": 2, "giou_loss_coefficient": 2, "id2label": { "0": "pedestrian", "1": "car", "2": "cyclist", "3": "van", "4": "truck", "5": "tram", "6": "bus", "7": "person_sitting", "8": "motorcyclist" }, "init_box": false, "init_std": 0.02, "init_xavier_std": 1.0, "is_encoder_decoder": true, "label2id": { "bus": 6, "car": 1, "cyclist": 2, "motorcyclist": 8, "pedestrian": 0, "person_sitting": 7, "tram": 5, "truck": 4, "van": 3 }, "model_type": "mono3dvgv2", "num_channels": 3, "num_depth_bins": 80, "num_feature_levels": 4, "num_queries": 1, "num_text_output_layers": 6, "position_embedding_type": "sine", "pretrained_backbone_path": "pretrained-models/swin_large_patch4_window7_224/model.safetensors", "text_encoder_type": "pretrained-models/roberta-base", "torch_dtype": "float32", "transformers_version": "4.44.0", "two_stage": false, "use_dab": true, "use_pretrained_backbone": true, "use_text_guided_adapter": false, "use_timm_backbone": true, "vl_encoder_type": "simple-bridge-tower", "with_box_refine": true }