Sayeem26s
/

chart-element-detector

+model = dict(
+    type="CascadeRCNN",
+    backbone=dict(
+        type="SwinTransformer",
+        embed_dims=96,
+        depths=[2, 2, 6, 2],
+        num_heads=[3, 6, 12, 24],
+        window_size=7,
+        mlp_ratio=4,
+        qkv_bias=True,
+        qk_scale=None,
+        drop_rate=0.0,
+        attn_drop_rate=0.0,
+        drop_path_rate=0.2,
+        patch_norm=True,
+        out_indices=(0, 1, 2, 3),
+        with_cp=False,
+        convert_weights=True,
+        init_cfg=dict(
+            type="Pretrained",
+            checkpoint="https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth",
+        ),
+    ),
+    neck=dict(
+        type="FPN", in_channels=[96, 192, 384, 768], out_channels=256, num_outs=5
+    ),
+    rpn_head=dict(
+        type="RPNHead",
+        in_channels=256,
+        feat_channels=256,
+        anchor_generator=dict(
+            type="AnchorGenerator",
+            scales=[8],
+            ratios=[0.5, 1.0, 2.0],
+            strides=[4, 8, 16, 32, 64],
+        ),
+        bbox_coder=dict(
+            type="DeltaXYWHBBoxCoder",
+            target_means=[0.0, 0.0, 0.0, 0.0],
+            target_stds=[1.0, 1.0, 1.0, 1.0],
+        ),
+        loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=True, loss_weight=1.0),
+        loss_bbox=dict(type="SmoothL1Loss", beta=0.1111111111111111, loss_weight=1.0),
+    ),
+    roi_head=dict(
+        type="CascadeRoIHead_LGF",
+        num_stages=3,
+        stage_loss_weights=[1, 1, 0.5],
+        bbox_roi_extractor=dict(
+            type="SingleRoIExtractor",
+            roi_layer=dict(type="RoIAlign", output_size=7, sampling_ratio=0),
+            out_channels=256,
+            featmap_strides=[4, 8, 16, 32],
+        ),
+        bbox_head=[
+            dict(
+                type="Shared3FCBBoxHead_with_BboxEncoding",
+                in_channels=256,
+                fc_out_channels=1024,
+                bbox_encoding_dim=512,
+                roi_feat_size=7,
+                num_classes=18,
+                bbox_coder=dict(
+                    type="DeltaXYWHBBoxCoder",
+                    target_means=[0.0, 0.0, 0.0, 0.0],
+                    target_stds=[0.1, 0.1, 0.2, 0.2],
+                ),
+                reg_class_agnostic=True,
+                loss_cls=dict(type="FocalLoss"),
+                loss_bbox=dict(type="BalancedL1Loss", beta=1.0, loss_weight=1.0),
+            ),
+            dict(
+                type="Shared3FCBBoxHead_with_BboxEncoding",
+                in_channels=256,
+                fc_out_channels=1024,
+                bbox_encoding_dim=512,
+                roi_feat_size=7,
+                num_classes=18,
+                bbox_coder=dict(
+                    type="DeltaXYWHBBoxCoder",
+                    target_means=[0.0, 0.0, 0.0, 0.0],
+                    target_stds=[0.05, 0.05, 0.1, 0.1],
+                ),
+                reg_class_agnostic=True,
+                loss_cls=dict(type="FocalLoss"),
+                loss_bbox=dict(type="BalancedL1Loss", beta=1.0, loss_weight=1.0),
+            ),
+            dict(
+                type="Shared3FCBBoxHead_with_BboxEncoding",
+                in_channels=256,
+                fc_out_channels=1024,
+                bbox_encoding_dim=512,
+                roi_feat_size=7,
+                num_classes=18,
+                bbox_coder=dict(
+                    type="DeltaXYWHBBoxCoder",
+                    target_means=[0.0, 0.0, 0.0, 0.0],
+                    target_stds=[0.033, 0.033, 0.067, 0.067],
+                ),
+                reg_class_agnostic=True,
+                loss_cls=dict(type="FocalLoss"),
+                loss_bbox=dict(type="BalancedL1Loss", beta=1.0, loss_weight=1.0),
+            ),
+        ],
+        localglobal_fuser=dict(
+            type="LocalGlobal_Context_Fuser",
+            channels=256,
+            roi_size=7,
+            reduced_channels=256,
+            lg_merge_layer=dict(type="SELayer", channels=256),
+        ),
+        lgf_shared=False,
+        bbox_encoder=dict(
+            type="BboxEncoder",
+            n_layer=4,
+            n_head=4,
+            n_embd=512,
+            bbox_cord_dim=4,
+            bbox_max_num=1024,
+            embd_pdrop=0.1,
+            attn_pdrop=0.1,
+        ),
+        bbox_encoder_shared=False,
+    ),
+    train_cfg=dict(
+        rpn=dict(
+            assigner=dict(
+                type="MaxIoUAssigner",
+                pos_iou_thr=0.7,
+                neg_iou_thr=0.3,
+                min_pos_iou=0.3,
+                match_low_quality=True,
+                ignore_iof_thr=-1,
+            ),
+            sampler=dict(
+                type="RandomSampler",
+                num=256,
+                pos_fraction=0.5,
+                neg_pos_ub=-1,
+                add_gt_as_proposals=False,
+            ),
+            allowed_border=0,
+            pos_weight=-1,
+            debug=False,
+        ),
+        rpn_proposal=dict(
+            nms_pre=2000,
+            max_per_img=2000,
+            nms=dict(type="nms", iou_threshold=0.7),
+            min_bbox_size=0,
+        ),
+        rcnn=[
+            dict(
+                assigner=dict(
+                    type="MaxIoUAssigner",
+                    pos_iou_thr=0.5,
+                    neg_iou_thr=0.5,
+                    min_pos_iou=0.5,
+                    match_low_quality=False,
+                    ignore_iof_thr=-1,
+                ),
+                sampler=dict(
+                    type="RandomSampler",
+                    num=512,
+                    pos_fraction=0.25,
+                    neg_pos_ub=-1,
+                    add_gt_as_proposals=True,
+                ),
+                pos_weight=-1,
+                debug=False,
+            ),
+            dict(
+                assigner=dict(
+                    type="MaxIoUAssigner",
+                    pos_iou_thr=0.6,
+                    neg_iou_thr=0.6,
+                    min_pos_iou=0.6,
+                    match_low_quality=False,
+                    ignore_iof_thr=-1,
+                ),
+                sampler=dict(
+                    type="RandomSampler",
+                    num=512,
+                    pos_fraction=0.25,
+                    neg_pos_ub=-1,
+                    add_gt_as_proposals=True,
+                ),
+                pos_weight=-1,
+                debug=False,
+            ),
+            dict(
+                assigner=dict(
+                    type="MaxIoUAssigner",
+                    pos_iou_thr=0.7,
+                    neg_iou_thr=0.7,
+                    min_pos_iou=0.7,
+                    match_low_quality=False,
+                    ignore_iof_thr=-1,
+                ),
+                sampler=dict(
+                    type="RandomSampler",
+                    num=512,
+                    pos_fraction=0.25,
+                    neg_pos_ub=-1,
+                    add_gt_as_proposals=True,
+                ),
+                pos_weight=-1,
+                debug=False,
+            ),
+        ],
+    ),
+    test_cfg=dict(
+        rpn=dict(
+            nms_pre=1000,
+            max_per_img=1000,
+            nms=dict(type="nms", iou_threshold=0.7),
+            min_bbox_size=0,
+        ),
+        rcnn=dict(
+            score_thr=0.0, nms=dict(type="nms", iou_threshold=0.7), max_per_img=200
+        ),
+    ),
+)
+dataset_type = "CocoDataset"
+data_root = "data/coco/"
+img_norm_cfg = dict(
+    mean=[216.45, 212.36, 206.76], std=[55.82, 56.04, 55.56], to_rgb=True
+)
+train_pipeline = [
+    dict(type="LoadImageFromFile"),
+    dict(type="LoadAnnotations", with_bbox=True),
+    dict(
+        type="AutoAugment",
+        policies=[
+            [
+                {
+                    "type": "Resize",
+                    "img_scale": [
+                        (480, 1333),
+                        (512, 1333),
+                        (544, 1333),
+                        (576, 1333),
+                        (608, 1333),
+                        (640, 1333),
+                        (672, 1333),
+                        (704, 1333),
+                        (736, 1333),
+                        (768, 1333),
+                        (800, 1333),
+                    ],
+                    "multiscale_mode": "value",
+                    "keep_ratio": True,
+                }
+            ],
+            [
+                {
+                    "type": "Resize",
+                    "img_scale": [(400, 1333), (500, 1333), (600, 1333)],
+                    "multiscale_mode": "value",
+                    "keep_ratio": True,
+                },
+                {
+                    "type": "RandomCrop",
+                    "crop_type": "absolute_range",
+                    "crop_size": (384, 600),
+                    "allow_negative_crop": True,
+                },
+                {
+                    "type": "Resize",
+                    "img_scale": [
+                        (480, 1333),
+                        (512, 1333),
+                        (544, 1333),
+                        (576, 1333),
+                        (608, 1333),
+                        (640, 1333),
+                        (672, 1333),
+                        (704, 1333),
+                        (736, 1333),
+                        (768, 1333),
+                        (800, 1333),
+                    ],
+                    "multiscale_mode": "value",
+                    "override": True,
+                    "keep_ratio": True,
+                },
+                {
+                    "type": "PhotoMetricDistortion",
+                    "brightness_delta": 32,
+                    "contrast_range": (0.5, 1.5),
+                    "saturation_range": (0.5, 1.5),
+                    "hue_delta": 18,
+                },
+                {
+                    "type": "MinIoURandomCrop",
+                    "min_ious": (0.4, 0.5, 0.6, 0.7, 0.8, 0.9),
+                    "min_crop_size": 0.3,
+                },
+                {
+                    "type": "CutOut",
+                    "n_holes": (5, 10),
+                    "cutout_shape": [
+                        (4, 4),
+                        (4, 8),
+                        (8, 4),
+                        (8, 8),
+                        (16, 32),
+                        (32, 16),
+                        (32, 32),
+                        (32, 48),
+                        (48, 32),
+                        (48, 48),
+                    ],
+                },
+            ],
+        ],
+    ),
+    dict(type="RandomFlip", flip_ratio=0.1),
+    dict(
+        type="Normalize",
+        mean=[216.45, 212.36, 206.76],
+        std=[55.82, 56.04, 55.56],
+        to_rgb=True,
+    ),
+    dict(type="Pad", size_divisor=32),
+    dict(type="DefaultFormatBundle"),
+    dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]),
+]
+test_pipeline = [
+    dict(type="LoadImageFromFile", to_float32=True),
+    dict(
+        type="MultiScaleFlipAug",
+        img_scale=(1333, 800),
+        flip=False,
+        transforms=[
+            dict(type="Resize", keep_ratio=True),
+            dict(type="RandomFlip", flip_ratio=0.0),
+            dict(
+                type="Normalize",
+                mean=[216.45, 212.36, 206.76],
+                std=[55.82, 56.04, 55.56],
+                to_rgb=True,
+            ),
+            dict(type="Pad", size_divisor=32),
+            dict(type="DefaultFormatBundle"),
+            dict(type="Collect", keys=["img"]),
+        ],
+    ),
+]
+data = dict(
+    samples_per_gpu=3,
+    workers_per_gpu=4,
+    train=dict(
+        type="CocoDataset",
+        ann_file="./data/pmc_2022/pmc_coco/element_detection/train.json",
+        img_prefix="./data/pmc_2022/pmc_coco/element_detection/train/",
+        pipeline=[
+            dict(type="LoadImageFromFile"),
+            dict(type="LoadAnnotations", with_bbox=True),
+            dict(
+                type="AutoAugment",
+                policies=[
+                    [
+                        {
+                            "type": "Resize",
+                            "img_scale": [
+                                (480, 1333),
+                                (512, 1333),
+                                (544, 1333),
+                                (576, 1333),
+                                (608, 1333),
+                                (640, 1333),
+                                (672, 1333),
+                                (704, 1333),
+                                (736, 1333),
+                                (768, 1333),
+                                (800, 1333),
+                            ],
+                            "multiscale_mode": "value",
+                            "keep_ratio": True,
+                        }
+                    ],
+                    [
+                        {
+                            "type": "Resize",
+                            "img_scale": [(400, 1333), (500, 1333), (600, 1333)],
+                            "multiscale_mode": "value",
+                            "keep_ratio": True,
+                        },
+                        {
+                            "type": "RandomCrop",
+                            "crop_type": "absolute_range",
+                            "crop_size": (384, 600),
+                            "allow_negative_crop": True,
+                        },
+                        {
+                            "type": "Resize",
+                            "img_scale": [
+                                (480, 1333),
+                                (512, 1333),
+                                (544, 1333),
+                                (576, 1333),
+                                (608, 1333),
+                                (640, 1333),
+                                (672, 1333),
+                                (704, 1333),
+                                (736, 1333),
+                                (768, 1333),
+                                (800, 1333),
+                            ],
+                            "multiscale_mode": "value",
+                            "override": True,
+                            "keep_ratio": True,
+                        },
+                        {
+                            "type": "PhotoMetricDistortion",
+                            "brightness_delta": 32,
+                            "contrast_range": (0.5, 1.5),
+                            "saturation_range": (0.5, 1.5),
+                            "hue_delta": 18,
+                        },
+                        {
+                            "type": "MinIoURandomCrop",
+                            "min_ious": (0.4, 0.5, 0.6, 0.7, 0.8, 0.9),
+                            "min_crop_size": 0.3,
+                        },
+                        {
+                            "type": "CutOut",
+                            "n_holes": (5, 10),
+                            "cutout_shape": [
+                                (4, 4),
+                                (4, 8),
+                                (8, 4),
+                                (8, 8),
+                                (16, 32),
+                                (32, 16),
+                                (32, 32),
+                                (32, 48),
+                                (48, 32),
+                                (48, 48),
+                            ],
+                        },
+                    ],
+                ],
+            ),
+            dict(type="RandomFlip", flip_ratio=0.1),
+            dict(
+                type="Normalize",
+                mean=[216.45, 212.36, 206.76],
+                std=[55.82, 56.04, 55.56],
+                to_rgb=True,
+            ),
+            dict(type="Pad", size_divisor=32),
+            dict(type="DefaultFormatBundle"),
+            dict(type="Collect", keys=["img", "gt_bboxes", "gt_labels"]),
+        ],
+        classes=[
+            "x_title",
+            "y_title",
+            "plot_area",
+            "other",
+            "xlabel",
+            "ylabel",
+            "chart_title",
+            "x_tick",
+            "y_tick",
+            "legend_patch",
+            "legend_label",
+            "legend_title",
+            "legend_area",
+            "mark_label",
+            "value_label",
+            "y_axis_area",
+            "x_axis_area",
+            "tick_grouping",
+        ],
+    ),
+    val=dict(
+        type="CocoDataset",
+        ann_file="./data/pmc_2022/pmc_coco/element_detection/val.json",
+        img_prefix="./data/pmc_2022/pmc_coco/element_detection/val/",
+        pipeline=[
+            dict(type="LoadImageFromFile"),
+            dict(
+                type="MultiScaleFlipAug",
+                img_scale=(1333, 800),
+                flip=False,
+                transforms=[
+                    dict(type="Resize", keep_ratio=True),
+                    dict(type="RandomFlip"),
+                    dict(
+                        type="Normalize",
+                        mean=[123.675, 116.28, 103.53],
+                        std=[58.395, 57.12, 57.375],
+                        to_rgb=True,
+                    ),
+                    dict(type="Pad", size_divisor=32),
+                    dict(type="ImageToTensor", keys=["img"]),
+                    dict(type="Collect", keys=["img"]),
+                ],
+            ),
+        ],
+        classes=[
+            "x_title",
+            "y_title",
+            "plot_area",
+            "other",
+            "xlabel",
+            "ylabel",
+            "chart_title",
+            "x_tick",
+            "y_tick",
+            "legend_patch",
+            "legend_label",
+            "legend_title",
+            "legend_area",
+            "mark_label",
+            "value_label",
+            "y_axis_area",
+            "x_axis_area",
+            "tick_grouping",
+        ],
+    ),
+    test=dict(
+        type="CocoDataset",
+        ann_file="./data/pmc_2022/pmc_coco/element_detection/split3_test.json",
+        img_prefix="./data/pmc_2022/pmc_coco/element_detection/split3_test/",
+        pipeline=[
+            dict(type="LoadImageFromFile"),
+            dict(
+                type="MultiScaleFlipAug",
+                img_scale=(1333, 800),
+                flip=False,
+                transforms=[
+                    dict(type="Resize", keep_ratio=True),
+                    dict(type="RandomFlip"),
+                    dict(
+                        type="Normalize",
+                        mean=[123.675, 116.28, 103.53],
+                        std=[58.395, 57.12, 57.375],
+                        to_rgb=True,
+                    ),
+                    dict(type="Pad", size_divisor=32),
+                    dict(type="ImageToTensor", keys=["img"]),
+                    dict(type="Collect", keys=["img"]),
+                ],
+            ),
+        ],
+        classes=[
+            "x_title",
+            "y_title",
+            "plot_area",
+            "other",
+            "xlabel",
+            "ylabel",
+            "chart_title",
+            "x_tick",
+            "y_tick",
+            "legend_patch",
+            "legend_label",
+            "legend_title",
+            "legend_area",
+            "mark_label",
+            "value_label",
+            "y_axis_area",
+            "x_axis_area",
+            "tick_grouping",
+        ],
+    ),
+)
+evaluation = dict(interval=1, metric=["bbox"])
+optimizer = dict(
+    type="AdamW",
+    lr=0.0002,
+    betas=(0.9, 0.999),
+    weight_decay=0.05,
+    paramwise_cfg=dict(
+        custom_keys=dict(
+            absolute_pos_embed=dict(decay_mult=0.0),
+            relative_position_bias_table=dict(decay_mult=0.0),
+            norm=dict(decay_mult=0.0),
+        )
+    ),
+)
+optimizer_config = dict(grad_clip=None)
+lr_config = dict(
+    policy="step", warmup="linear", warmup_iters=500, warmup_ratio=0.001, step=[8, 11]
+)
+runner = dict(type="EpochBasedRunner", max_epochs=150)
+checkpoint_config = dict(interval=1)
+log_config = dict(interval=50, hooks=[dict(type="TextLoggerHook")])
+custom_hooks = [dict(type="NumClassCheckHook")]
+dist_params = dict(backend="nccl")
+log_level = "INFO"
+load_from = None
+resume_from = None
+workflow = [("train", 1)]
+opencv_num_threads = 0
+mp_start_method = "fork"
+auto_scale_lr = dict(enable=False, base_batch_size=16)
+pretrained = "https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth"
+classes = [
+    "x_title",
+    "y_title",
+    "plot_area",
+    "other",
+    "xlabel",
+    "ylabel",
+    "chart_title",
+    "x_tick",
+    "y_tick",
+    "legend_patch",
+    "legend_label",
+    "legend_title",
+    "legend_area",
+    "mark_label",
+    "value_label",
+    "y_axis_area",
+    "x_axis_area",
+    "tick_grouping",
+]
+auto_resume = False
+gpu_ids = range(0, 4)