GreatBird commited on
Commit
af20dda
·
verified ·
1 Parent(s): 13f9718

Upload 125 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +26 -0
  2. README.md +26 -0
  3. ckpts/ViTP_InternVL_1B_med.safetensors +3 -0
  4. ckpts/ViTP_InternVL_1B_rs.safetensors +3 -0
  5. ckpts/ViTP_ViT_L_300M_med.safetensors +3 -0
  6. ckpts/ViTP_ViT_L_300M_rs.safetensors +3 -0
  7. ckpts/reduct_pth.py +31 -0
  8. ckpts/vitp_amos_upernet_9060/20250905_141251.log +0 -0
  9. ckpts/vitp_amos_upernet_9060/best_mDice.pth +3 -0
  10. ckpts/vitp_amos_upernet_9060/eval_single_scale_20250906_143650.json +54 -0
  11. ckpts/vitp_amos_upernet_9060/vitp_amos_upernet.py +218 -0
  12. ckpts/vitp_brats_upernet_7211/20250907_130222.log +0 -0
  13. ckpts/vitp_brats_upernet_7211/best_mDice.pth +3 -0
  14. ckpts/vitp_brats_upernet_7211/eval_single_scale_20250908_054047.json +18 -0
  15. ckpts/vitp_brats_upernet_7211/vitp_brats_upernet.py +201 -0
  16. ckpts/vitp_convid_upernet_9155/20250902_103001.log +0 -0
  17. ckpts/vitp_convid_upernet_9155/best_mDice.pth +3 -0
  18. ckpts/vitp_convid_upernet_9155/eval_single_scale_20250902_233031.json +15 -0
  19. ckpts/vitp_convid_upernet_9155/vitp_convid_upernet.py +207 -0
  20. ckpts/vitp_dior_cascade_rcnn_7960/20250730_223238.log +0 -0
  21. ckpts/vitp_dior_cascade_rcnn_7960/20250730_223238.log.json +19 -0
  22. ckpts/vitp_dior_cascade_rcnn_7960/epoch_12.pth +3 -0
  23. ckpts/vitp_dior_cascade_rcnn_7960/vitp_dior_cascade_rcnn.py +308 -0
  24. ckpts/vitp_diorr_orcnn_7508/20250918_082138.log +0 -0
  25. ckpts/vitp_diorr_orcnn_7508/epoch_12.pth +3 -0
  26. ckpts/vitp_diorr_orcnn_7508/vitp_diorr_orcnn.py +311 -0
  27. ckpts/vitp_dotav2_orcnn_6073/20250726_012424.log +0 -0
  28. ckpts/vitp_dotav2_orcnn_6073/20250726_012424.log.json +61 -0
  29. ckpts/vitp_dotav2_orcnn_6073/epoch_12.pth +3 -0
  30. ckpts/vitp_dotav2_orcnn_6073/vitp_dotav2_orcnn.py +302 -0
  31. ckpts/vitp_isaid_upernet_7114/20250803_154801.log +0 -0
  32. ckpts/vitp_isaid_upernet_7114/20250803_154801.log.json +0 -0
  33. ckpts/vitp_isaid_upernet_7114/ViTP_isaid_upernet.py +192 -0
  34. ckpts/vitp_isaid_upernet_7114/eval_20250921_141413.json +40 -0
  35. ckpts/vitp_isaid_upernet_7114/iter_80000.pth +3 -0
  36. ckpts/vitp_levir_upernet_7268/20250919_030132/20250919_030132.log +0 -0
  37. ckpts/vitp_levir_upernet_7268/20250919_030132/20250921_105914.log +485 -0
  38. ckpts/vitp_levir_upernet_7268/iter_80000.pth +3 -0
  39. ckpts/vitp_levir_upernet_7268/upernet_internvit_adp_levir.py +344 -0
  40. ckpts/vitp_loveda_upernet_5428/20250807_180314.log +0 -0
  41. ckpts/vitp_loveda_upernet_5428/20250807_180314.log.json +0 -0
  42. ckpts/vitp_loveda_upernet_5428/iter_80000.pth +3 -0
  43. ckpts/vitp_loveda_upernet_5428/vitp_loveda_upernet.py +208 -0
  44. ckpts/vitp_rsar_orcnn_7231/20250716_042910.log +0 -0
  45. ckpts/vitp_rsar_orcnn_7231/20250716_042910.log.json +241 -0
  46. ckpts/vitp_rsar_orcnn_7231/epoch_12.pth +3 -0
  47. ckpts/vitp_rsar_orcnn_7231/vitp_rsar_orcnn.py +300 -0
  48. ckpts/vitp_s2looking_upernet_6989/20250915_140502/20250915_140502.log +0 -0
  49. ckpts/vitp_s2looking_upernet_6989/best_checkpoint.pth.pth +3 -0
  50. ckpts/vitp_s2looking_upernet_6989/vitp_s2looking_upernet.py +360 -0
.gitattributes CHANGED
@@ -33,3 +33,29 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ pretrain_data/annotations/general_ann/docvqa_train_10k.jsonl filter=lfs diff=lfs merge=lfs -text
37
+ pretrain_data/annotations/general_ann/dvqa_train_200k.jsonl filter=lfs diff=lfs merge=lfs -text
38
+ pretrain_data/annotations/general_ann/fit_rs_vqa_100k.jsonl filter=lfs diff=lfs merge=lfs -text
39
+ pretrain_data/annotations/general_ann/geoqa+.jsonl filter=lfs diff=lfs merge=lfs -text
40
+ pretrain_data/annotations/general_ann/sharegpt4v_instruct_gpt4-vision_cap100k.jsonl filter=lfs diff=lfs merge=lfs -text
41
+ pretrain_data/annotations/general_ann/sharegpt4v_mix665k_cap23k_coco-ap9k_lcs3k_sam9k_div2k_novg.jsonl filter=lfs diff=lfs merge=lfs -text
42
+ pretrain_data/annotations/general_ann/synthdog_en.jsonl filter=lfs diff=lfs merge=lfs -text
43
+ pretrain_data/annotations/general_ann/vqa_rgb_rsvqahr_train_instruct_100k.jsonl filter=lfs diff=lfs merge=lfs -text
44
+ pretrain_data/annotations/medical_ann/huatuo_oa.jsonl filter=lfs diff=lfs merge=lfs -text
45
+ pretrain_data/annotations/medical_ann/huatuo_vqa.jsonl filter=lfs diff=lfs merge=lfs -text
46
+ pretrain_data/annotations/medical_ann/omnimedvqa.jsonl filter=lfs diff=lfs merge=lfs -text
47
+ pretrain_data/annotations/medical_ann/pmc_oa.jsonl filter=lfs diff=lfs merge=lfs -text
48
+ pretrain_data/annotations/medical_ann/pmc_vqa.jsonl filter=lfs diff=lfs merge=lfs -text
49
+ pretrain_data/annotations/medical_ann/quilt_1m.jsonl filter=lfs diff=lfs merge=lfs -text
50
+ pretrain_data/annotations/medical_ann/quilt_instruct_107k.jsonl filter=lfs diff=lfs merge=lfs -text
51
+ pretrain_data/annotations/medical_ann/quilt_instruct_complex_abductive.jsonl filter=lfs diff=lfs merge=lfs -text
52
+ pretrain_data/annotations/medical_ann/quilt_instruct_conv_desc.jsonl filter=lfs diff=lfs merge=lfs -text
53
+ pretrain_data/annotations/rs_ann/caption_cd_rgb_LevirCCcaptions.jsonl filter=lfs diff=lfs merge=lfs -text
54
+ pretrain_data/annotations/rs_ann/caption_GAIA_trainval_instruct.jsonl filter=lfs diff=lfs merge=lfs -text
55
+ pretrain_data/annotations/rs_ann/cls_rgb_Million-AID_CoT.jsonl filter=lfs diff=lfs merge=lfs -text
56
+ pretrain_data/annotations/rs_ann/cls_rgb_Million-AID.jsonl filter=lfs diff=lfs merge=lfs -text
57
+ pretrain_data/annotations/rs_ann/cls_rgb_NWPU-RESISC45.jsonl filter=lfs diff=lfs merge=lfs -text
58
+ pretrain_data/annotations/rs_ann/geochat_train.jsonl filter=lfs diff=lfs merge=lfs -text
59
+ pretrain_data/annotations/rs_ann/vqa_rgb_LRBEN.jsonl filter=lfs diff=lfs merge=lfs -text
60
+ pretrain_data/annotations/rs_ann/vqa_rgb_SAMRS.jsonl filter=lfs diff=lfs merge=lfs -text
61
+ pretrain_data/annotations/rs_ann/vrsbench_train.jsonl filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ # Introduction
4
+
5
+ Modern computer vision is converging on a closed loop in which perception, reasoning and generation mutually reinforce each other. However, this loop remains incomplete: the top-down influence of high-level reasoning on the foundational learning of low-level perceptual features is not yet underexplored. This paper addresses this gap by proposing a new paradigm for pretraining foundation models in downstream domains. We introduce **V**isual **i**ns**T**ruction **P**retraining (**ViTP**), a novel approach that directly leverages reasoning to enhance perception. ViTP embeds a Vision Transformer (ViT) backbone within a Vision-Language Model and pretrains it end-to-end using a rich corpus of visual instruction data curated from target downstream domains. ViTP is powered by our proposed Visual Robustness Learning (VRL), which compels the ViT to learn robust and domain-relevant features from a sparse set of visual tokens. Extensive experiments on 16 challenging remote sensing and medical imaging benchmarks demonstrate that ViTP establishes new state-of-the-art performance across a diverse range of downstream tasks. The code is available at [GitHub](github.com/zcablii/ViTP).
6
+
7
+ ----
8
+
9
+ ![image/png](docs/loop_radar.png)
10
+ The synergistic relationship between perception, generation, and reasoning in modern CV. Our proposed ViTP forges a novel link from high-level reasoning to low-level perception, a previously underexplored connection. ViTP sets new SOTA performance across a diverse range of downstream tasks in medical imaging and remote sensing.
11
+
12
+ ----
13
+
14
+ ![image/png](docs/vitp.png)
15
+ A conceptual illustration of the ViTP framework. A ViT backbone is embedded within a large VLM and then pretrained with domain-specific instruction following objective and Visual Robustness Learning (VRL). This process instils high-level semantic understanding into the ViT. The resulting weights are then used to initialize models for various downstream perception tasks.
16
+
17
+ ----
18
+
19
+ ```bibtex
20
+ @misc{tongyidr,
21
+ author={Tongyi DeepResearch Team},
22
+ title={Tongyi-DeepResearch},
23
+ year={2025},
24
+ howpublished={\url{https://github.com/Alibaba-NLP/DeepResearch}}
25
+ }
26
+ ```
ckpts/ViTP_InternVL_1B_med.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a33c06bd3a146de19b80f5bf3289fd1b7fd899fdde06cb94d39e9c7911e0dd7
3
+ size 1876463472
ckpts/ViTP_InternVL_1B_rs.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb63f06371476844f0813d80043e83c2cdd2bfa7878c6221b37b909be2ea10a9
3
+ size 1876463472
ckpts/ViTP_ViT_L_300M_med.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df83fabd968ac7d46280beebbee65b93bddd74860a04a09f97f58004f9dfa21e
3
+ size 617029872
ckpts/ViTP_ViT_L_300M_rs.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d44154814bfdaf4bd5b36e7ab1a657bd065c643b3c969c135e43b8bad7589661
3
+ size 617029872
ckpts/reduct_pth.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import glob
3
+
4
+
5
+ # find out all pth files in the directory
6
+ pth_files = glob.glob('**/*.pth', recursive=True)
7
+
8
+ # loop over all pth files and overwirte them without the 'optimizer' key
9
+
10
+ def overwirte_pth(pth_file):
11
+ print(f'Overwriting {pth_file}')
12
+ checkpoint = torch.load(pth_file)
13
+
14
+ # print the keys and values of the checkpoint
15
+ print(checkpoint.keys()) # dict_keys(['meta', 'state_dict', 'optimizer'])
16
+
17
+ if 'optimizer' not in checkpoint.keys():
18
+ print('No optimizer found in the checkpoint')
19
+ return
20
+ # delete the 'optimizer' key
21
+ del checkpoint['optimizer']
22
+ if 'param_schedulers' in checkpoint.keys(): del checkpoint['param_schedulers']
23
+ if 'message_hub' in checkpoint.keys(): del checkpoint['message_hub']
24
+
25
+ # overwirte the checkpoint without the 'optimizer' key
26
+ torch.save(checkpoint, pth_file)
27
+ print(f'Overwritten {pth_file} successfully')
28
+
29
+ for pth_file in pth_files:
30
+ overwirte_pth(pth_file)
31
+
ckpts/vitp_amos_upernet_9060/20250905_141251.log ADDED
The diff for this file is too large to render. See raw diff
 
ckpts/vitp_amos_upernet_9060/best_mDice.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a10e012e5c2a3bb778ec98226b0e6f73aef298a12c8832add221f1e7ddb8f3e9
3
+ size 1809417685
ckpts/vitp_amos_upernet_9060/eval_single_scale_20250906_143650.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": "./configs/internvit_new/upernet_internvit_adp_160e_amos_2022_yx.py",
3
+ "metric": {
4
+ "aAcc": 0.9494,
5
+ "mIoU": 0.8334999999999999,
6
+ "mAcc": 0.8970999999999999,
7
+ "mDice": 0.9059999999999999,
8
+ "IoU.spleen": 0.8662000274658204,
9
+ "IoU.kidney_right": 0.93,
10
+ "IoU.kidney_left": 0.915,
11
+ "IoU.gallbladder": 0.7836000061035157,
12
+ "IoU.esophagus": 0.7613999938964844,
13
+ "IoU.liver": 0.9440000152587891,
14
+ "IoU.stomach": 0.8463999938964843,
15
+ "IoU.aorta": 0.9205999755859375,
16
+ "IoU.inferior_vena_cava": 0.7538999938964843,
17
+ "IoU.pancreas": 0.8077999877929688,
18
+ "IoU.adrenal_gland_right": 0.6437000274658203,
19
+ "IoU.adrenal_gland_left": 0.7948000335693359,
20
+ "IoU.duodenum": 0.6570999908447266,
21
+ "IoU.bladder": 0.960999984741211,
22
+ "IoU.prostate_and_uterus": 0.9162999725341797,
23
+ "Acc.spleen": 0.892300033569336,
24
+ "Acc.kidney_right": 0.9648999786376953,
25
+ "Acc.kidney_left": 0.9219000244140625,
26
+ "Acc.gallbladder": 0.844800033569336,
27
+ "Acc.esophagus": 0.9162999725341797,
28
+ "Acc.liver": 0.9783999633789062,
29
+ "Acc.stomach": 0.9309999847412109,
30
+ "Acc.aorta": 0.949800033569336,
31
+ "Acc.inferior_vena_cava": 0.8994000244140625,
32
+ "Acc.pancreas": 0.9041999816894531,
33
+ "Acc.adrenal_gland_right": 0.7270999908447265,
34
+ "Acc.adrenal_gland_left": 0.8395999908447266,
35
+ "Acc.duodenum": 0.7452999877929688,
36
+ "Acc.bladder": 0.9805000305175782,
37
+ "Acc.prostate_and_uterus": 0.9605000305175782,
38
+ "Dice.spleen": 0.9283000183105469,
39
+ "Dice.kidney_right": 0.9637000274658203,
40
+ "Dice.kidney_left": 0.9555999755859375,
41
+ "Dice.gallbladder": 0.8787000274658203,
42
+ "Dice.esophagus": 0.8645999908447266,
43
+ "Dice.liver": 0.9712000274658203,
44
+ "Dice.stomach": 0.9168000030517578,
45
+ "Dice.aorta": 0.9587000274658203,
46
+ "Dice.inferior_vena_cava": 0.8597000122070313,
47
+ "Dice.pancreas": 0.8937000274658203,
48
+ "Dice.adrenal_gland_right": 0.7831999969482422,
49
+ "Dice.adrenal_gland_left": 0.8856999969482422,
50
+ "Dice.duodenum": 0.7930999755859375,
51
+ "Dice.bladder": 0.9801000213623047,
52
+ "Dice.prostate_and_uterus": 0.9562999725341796
53
+ }
54
+ }
ckpts/vitp_amos_upernet_9060/vitp_amos_upernet.py ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset_type = 'AMOS2022Dataset'
2
+ data_root = '/root/data-fs/twh/dataset/AMOS2022/mmseg_data'
3
+ img_norm_cfg = dict(
4
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
5
+ crop_size = (512, 512)
6
+ train_pipeline = [
7
+ dict(type='LoadImageFromFile'),
8
+ dict(type='LoadAnnotations', reduce_zero_label=True),
9
+ dict(type='Resize', img_scale=(512, 512), ratio_range=(0.5, 2.0)),
10
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
11
+ dict(type='RandomFlip', prob=0.5),
12
+ dict(type='PhotoMetricDistortion'),
13
+ dict(
14
+ type='Normalize',
15
+ mean=[123.675, 116.28, 103.53],
16
+ std=[58.395, 57.12, 57.375],
17
+ to_rgb=True),
18
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255),
19
+ dict(type='DefaultFormatBundle'),
20
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
21
+ ]
22
+ test_pipeline = [
23
+ dict(type='LoadImageFromFile'),
24
+ dict(
25
+ type='MultiScaleFlipAug',
26
+ img_scale=(512, 512),
27
+ flip=False,
28
+ transforms=[
29
+ dict(type='Resize', keep_ratio=True),
30
+ dict(
31
+ type='Normalize',
32
+ mean=[123.675, 116.28, 103.53],
33
+ std=[58.395, 57.12, 57.375],
34
+ to_rgb=True),
35
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255),
36
+ dict(type='ImageToTensor', keys=['img']),
37
+ dict(type='Collect', keys=['img'])
38
+ ])
39
+ ]
40
+ data = dict(
41
+ samples_per_gpu=2,
42
+ workers_per_gpu=4,
43
+ train=dict(
44
+ type='AMOS2022Dataset',
45
+ data_root='/root/data-fs/twh/dataset/AMOS2022/mmseg_data',
46
+ img_dir='img_dir/train',
47
+ ann_dir='ann_dir/train',
48
+ pipeline=[
49
+ dict(type='LoadImageFromFile'),
50
+ dict(type='LoadAnnotations', reduce_zero_label=True),
51
+ dict(type='Resize', img_scale=(512, 512), ratio_range=(0.5, 2.0)),
52
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
53
+ dict(type='RandomFlip', prob=0.5),
54
+ dict(type='PhotoMetricDistortion'),
55
+ dict(
56
+ type='Normalize',
57
+ mean=[123.675, 116.28, 103.53],
58
+ std=[58.395, 57.12, 57.375],
59
+ to_rgb=True),
60
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255),
61
+ dict(type='DefaultFormatBundle'),
62
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
63
+ ]),
64
+ val=dict(
65
+ type='AMOS2022Dataset',
66
+ data_root='/root/data-fs/twh/dataset/AMOS2022/mmseg_data',
67
+ img_dir='img_dir/val',
68
+ ann_dir='ann_dir/val',
69
+ pipeline=[
70
+ dict(type='LoadImageFromFile'),
71
+ dict(
72
+ type='MultiScaleFlipAug',
73
+ img_scale=(512, 512),
74
+ flip=False,
75
+ transforms=[
76
+ dict(type='Resize', keep_ratio=True),
77
+ dict(
78
+ type='Normalize',
79
+ mean=[123.675, 116.28, 103.53],
80
+ std=[58.395, 57.12, 57.375],
81
+ to_rgb=True),
82
+ dict(
83
+ type='Pad',
84
+ size=(512, 512),
85
+ pad_val=0,
86
+ seg_pad_val=255),
87
+ dict(type='ImageToTensor', keys=['img']),
88
+ dict(type='Collect', keys=['img'])
89
+ ])
90
+ ]),
91
+ test=dict(
92
+ type='AMOS2022Dataset',
93
+ data_root='/root/data-fs/twh/dataset/AMOS2022/mmseg_data',
94
+ img_dir='img_dir/test',
95
+ ann_dir='ann_dir/test',
96
+ pipeline=[
97
+ dict(type='LoadImageFromFile'),
98
+ dict(
99
+ type='MultiScaleFlipAug',
100
+ img_scale=(512, 512),
101
+ flip=False,
102
+ transforms=[
103
+ dict(type='Resize', keep_ratio=True),
104
+ dict(
105
+ type='Normalize',
106
+ mean=[123.675, 116.28, 103.53],
107
+ std=[58.395, 57.12, 57.375],
108
+ to_rgb=True),
109
+ dict(
110
+ type='Pad',
111
+ size=(512, 512),
112
+ pad_val=0,
113
+ seg_pad_val=255),
114
+ dict(type='ImageToTensor', keys=['img']),
115
+ dict(type='Collect', keys=['img'])
116
+ ])
117
+ ]))
118
+ log_config = dict(
119
+ interval=50,
120
+ hooks=[
121
+ dict(
122
+ type='MMSegWandbHook',
123
+ init_kwargs=dict(
124
+ project='ITAP_SEG', name='upernet_internvit_adp_160e_amos'),
125
+ interval=1,
126
+ num_eval_images=0)
127
+ ])
128
+ dist_params = dict(backend='nccl')
129
+ log_level = 'INFO'
130
+ load_from = None
131
+ resume_from = None
132
+ workflow = [('train', 1)]
133
+ cudnn_benchmark = True
134
+ bs = 2
135
+ pretrained = 'pretrained/ViTP_ViT_L_300M_med.safetensors'
136
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
137
+ model = dict(
138
+ type='EncoderDecoder',
139
+ pretrained=None,
140
+ backbone=dict(
141
+ type='InternViTAdapter',
142
+ pretrain_size=448,
143
+ img_size=512,
144
+ patch_size=16,
145
+ embed_dim=1024,
146
+ depth=24,
147
+ num_heads=16,
148
+ mlp_ratio=4.0,
149
+ drop_path_rate=0.1,
150
+ init_values=0.1,
151
+ with_cp=True,
152
+ use_flash_attn=True,
153
+ qk_normalization=False,
154
+ layerscale_force_fp32=False,
155
+ with_fpn=False,
156
+ freeze_vit=False,
157
+ use_final_norm=True,
158
+ interaction_indexes=[[0, 7], [8, 11], [12, 15], [16, 23]],
159
+ cffn_ratio=0.25,
160
+ deform_ratio=0.25,
161
+ qkv_bias=True,
162
+ norm_type='layer_norm',
163
+ pretrained='pretrained/ViTP_ViT_L_300M_med.safetensors',
164
+ pretrained_type='full'),
165
+ decode_head=dict(
166
+ type='UPerHead',
167
+ in_index=[0, 1, 2, 3],
168
+ pool_scales=(1, 2, 3, 6),
169
+ dropout_ratio=0.1,
170
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
171
+ align_corners=False,
172
+ num_classes=15,
173
+ channels=1024,
174
+ ignore_index=255,
175
+ in_channels=[1024, 1024, 1024, 1024],
176
+ loss_decode=dict(
177
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
178
+ auxiliary_head=dict(
179
+ type='FCNHead',
180
+ in_channels=1024,
181
+ in_index=2,
182
+ channels=1024,
183
+ num_convs=1,
184
+ concat_input=False,
185
+ dropout_ratio=0.1,
186
+ num_classes=15,
187
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
188
+ align_corners=False,
189
+ loss_decode=dict(
190
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
191
+ train_cfg=dict(),
192
+ test_cfg=dict(mode='slide', crop_size=(512, 512), stride=(256, 256)))
193
+ optimizer = dict(
194
+ type='AdamW',
195
+ lr=2e-05,
196
+ betas=(0.9, 0.999),
197
+ weight_decay=0.05,
198
+ constructor='InternViTAdapterLayerDecayOptimizerConstructor',
199
+ paramwise_cfg=dict(num_layers=24, layer_decay_rate=0.9))
200
+ optimizer_config = dict()
201
+ lr_config = dict(
202
+ policy='CosineAnnealing',
203
+ warmup='linear',
204
+ warmup_iters=1500,
205
+ warmup_ratio=1e-06,
206
+ min_lr=0.0)
207
+ runner = dict(type='EpochBasedRunner', max_epochs=160)
208
+ checkpoint_config = dict(interval=16, max_keep_ckpts=1)
209
+ evaluation = dict(interval=16, metric=['mIoU', 'mDice'], save_best='mDice')
210
+ fp16 = dict(loss_scale=dict(init_scale=512))
211
+ randomness = dict(seed=3407)
212
+ vis_backends = [
213
+ dict(type='LocalVisBackend'),
214
+ dict(type='TensorboardVisBackend')
215
+ ]
216
+ work_dir = './work_dirs/vitp_amos_upernet'
217
+ gpu_ids = range(0, 8)
218
+ auto_resume = False
ckpts/vitp_brats_upernet_7211/20250907_130222.log ADDED
The diff for this file is too large to render. See raw diff
 
ckpts/vitp_brats_upernet_7211/best_mDice.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecfbb50e049d31ee649eddf0d5ffcf003464850edd46c2afa626d3d2c6cbdcec
3
+ size 1809319893
ckpts/vitp_brats_upernet_7211/eval_single_scale_20250908_054047.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": "./configs/internvit_new/upernet_30e_brats_ailab4_512_5e6.py",
3
+ "metric": {
4
+ "aAcc": 0.8059000000000001,
5
+ "mIoU": 0.5802,
6
+ "mAcc": 0.7034,
7
+ "mDice": 0.7211,
8
+ "IoU.necrotic_tumor_core": 0.8151999664306641,
9
+ "IoU.peritumoral_edema": 0.4456999969482422,
10
+ "IoU.enhancing_tumor": 0.4797999954223633,
11
+ "Acc.necrotic_tumor_core": 0.9245999908447265,
12
+ "Acc.peritumoral_edema": 0.530099983215332,
13
+ "Acc.enhancing_tumor": 0.6555000305175781,
14
+ "Dice.necrotic_tumor_core": 0.8981999969482422,
15
+ "Dice.peritumoral_edema": 0.6165999984741211,
16
+ "Dice.enhancing_tumor": 0.648499984741211
17
+ }
18
+ }
ckpts/vitp_brats_upernet_7211/vitp_brats_upernet.py ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset_type = 'BraTS2021Dataset'
2
+ img_norm_cfg = dict(
3
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
4
+ crop_size = (512, 512)
5
+ train_pipeline = [
6
+ dict(type='LoadImageFromFile'),
7
+ dict(type='LoadAnnotations', reduce_zero_label=True),
8
+ dict(type='Resize', img_scale=(512, 512), ratio_range=(0.5, 2.0)),
9
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
10
+ dict(type='RandomFlip', prob=0.5),
11
+ dict(type='PhotoMetricDistortion'),
12
+ dict(
13
+ type='Normalize',
14
+ mean=[123.675, 116.28, 103.53],
15
+ std=[58.395, 57.12, 57.375],
16
+ to_rgb=True),
17
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255),
18
+ dict(type='DefaultFormatBundle'),
19
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
20
+ ]
21
+ test_pipeline = [
22
+ dict(type='LoadImageFromFile'),
23
+ dict(
24
+ type='MultiScaleFlipAug',
25
+ img_scale=(512, 512),
26
+ flip=False,
27
+ transforms=[
28
+ dict(type='Resize', keep_ratio=True),
29
+ dict(
30
+ type='Normalize',
31
+ mean=[123.675, 116.28, 103.53],
32
+ std=[58.395, 57.12, 57.375],
33
+ to_rgb=True),
34
+ dict(type='ImageToTensor', keys=['img']),
35
+ dict(type='Collect', keys=['img'])
36
+ ])
37
+ ]
38
+ data_root = '/ailab/user/tangwenhao/data/seg/brats2021/mmseg_data'
39
+ data = dict(
40
+ samples_per_gpu=8,
41
+ workers_per_gpu=4,
42
+ train=dict(
43
+ type='BraTS2021Dataset',
44
+ data_root='/ailab/user/tangwenhao/data/seg/brats2021/mmseg_data',
45
+ img_dir='img_dir/train',
46
+ ann_dir='ann_dir/train',
47
+ pipeline=[
48
+ dict(type='LoadImageFromFile'),
49
+ dict(type='LoadAnnotations', reduce_zero_label=True),
50
+ dict(type='Resize', img_scale=(512, 512), ratio_range=(0.5, 2.0)),
51
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
52
+ dict(type='RandomFlip', prob=0.5),
53
+ dict(type='PhotoMetricDistortion'),
54
+ dict(
55
+ type='Normalize',
56
+ mean=[123.675, 116.28, 103.53],
57
+ std=[58.395, 57.12, 57.375],
58
+ to_rgb=True),
59
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255),
60
+ dict(type='DefaultFormatBundle'),
61
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
62
+ ]),
63
+ val=dict(
64
+ type='BraTS2021Dataset',
65
+ data_root='/ailab/user/tangwenhao/data/seg/brats2021/mmseg_data',
66
+ img_dir='img_dir/val',
67
+ ann_dir='ann_dir/val',
68
+ pipeline=[
69
+ dict(type='LoadImageFromFile'),
70
+ dict(
71
+ type='MultiScaleFlipAug',
72
+ img_scale=(512, 512),
73
+ flip=False,
74
+ transforms=[
75
+ dict(type='Resize', keep_ratio=True),
76
+ dict(
77
+ type='Normalize',
78
+ mean=[123.675, 116.28, 103.53],
79
+ std=[58.395, 57.12, 57.375],
80
+ to_rgb=True),
81
+ dict(type='ImageToTensor', keys=['img']),
82
+ dict(type='Collect', keys=['img'])
83
+ ])
84
+ ]),
85
+ test=dict(
86
+ type='BraTS2021Dataset',
87
+ data_root='/ailab/user/tangwenhao/data/seg/brats2021/mmseg_data',
88
+ img_dir='img_dir/test',
89
+ ann_dir='ann_dir/test',
90
+ pipeline=[
91
+ dict(type='LoadImageFromFile'),
92
+ dict(
93
+ type='MultiScaleFlipAug',
94
+ img_scale=(512, 512),
95
+ flip=False,
96
+ transforms=[
97
+ dict(type='Resize', keep_ratio=True),
98
+ dict(
99
+ type='Normalize',
100
+ mean=[123.675, 116.28, 103.53],
101
+ std=[58.395, 57.12, 57.375],
102
+ to_rgb=True),
103
+ dict(type='ImageToTensor', keys=['img']),
104
+ dict(type='Collect', keys=['img'])
105
+ ])
106
+ ]))
107
+ log_config = dict(
108
+ interval=50,
109
+ hooks=[
110
+ dict(
111
+ type='MMSegWandbHook',
112
+ init_kwargs=dict(
113
+ project='ITAP_SEG',
114
+ name='upernet_internvit_adp_30e_brats_512'),
115
+ interval=1,
116
+ num_eval_images=0)
117
+ ])
118
+ dist_params = dict(backend='nccl')
119
+ log_level = 'INFO'
120
+ load_from = None
121
+ resume_from = None
122
+ workflow = [('train', 1)]
123
+ cudnn_benchmark = True
124
+ bs = 8
125
+ pretrained = 'pretrained/ViTP_ViT_L_300M_med.safetensors'
126
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
127
+ model = dict(
128
+ type='EncoderDecoder',
129
+ pretrained=None,
130
+ backbone=dict(
131
+ type='InternViTAdapter',
132
+ pretrain_size=448,
133
+ img_size=512,
134
+ patch_size=16,
135
+ embed_dim=1024,
136
+ depth=24,
137
+ num_heads=16,
138
+ mlp_ratio=4.0,
139
+ drop_path_rate=0.1,
140
+ init_values=0.1,
141
+ with_cp=True,
142
+ use_flash_attn=True,
143
+ qk_normalization=False,
144
+ layerscale_force_fp32=False,
145
+ with_fpn=False,
146
+ freeze_vit=False,
147
+ use_final_norm=True,
148
+ interaction_indexes=[[0, 7], [8, 11], [12, 15], [16, 23]],
149
+ cffn_ratio=0.25,
150
+ deform_ratio=0.25,
151
+ qkv_bias=True,
152
+ norm_type='layer_norm',
153
+ pretrained='pretrained/ViTP_ViT_L_300M_med.safetensors',
154
+ pretrained_type='full'),
155
+ decode_head=dict(
156
+ type='UPerHead',
157
+ in_index=[0, 1, 2, 3],
158
+ pool_scales=(1, 2, 3, 6),
159
+ dropout_ratio=0.1,
160
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
161
+ align_corners=False,
162
+ num_classes=3,
163
+ channels=1024,
164
+ in_channels=[1024, 1024, 1024, 1024],
165
+ loss_decode=dict(
166
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
167
+ auxiliary_head=dict(
168
+ type='FCNHead',
169
+ in_channels=1024,
170
+ in_index=2,
171
+ channels=1024,
172
+ num_convs=1,
173
+ concat_input=False,
174
+ dropout_ratio=0.1,
175
+ num_classes=3,
176
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
177
+ align_corners=False,
178
+ loss_decode=dict(
179
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
180
+ test_cfg=dict(mode='whole'))
181
+ optimizer = dict(type='AdamW', lr=5e-06, betas=(0.9, 0.999), weight_decay=0.05)
182
+ optimizer_config = dict()
183
+ lr_config = dict(
184
+ policy='CosineAnnealing',
185
+ warmup='linear',
186
+ warmup_iters=375,
187
+ warmup_ratio=1e-08,
188
+ min_lr=0,
189
+ by_epoch=False)
190
+ runner = dict(type='EpochBasedRunner', max_epochs=30)
191
+ checkpoint_config = dict(interval=3, max_keep_ckpts=1)
192
+ evaluation = dict(interval=3, metric=['mIoU', 'mDice'], save_best='mDice')
193
+ fp16 = dict(loss_scale=dict(init_scale=512))
194
+ randomness = dict(seed=3407)
195
+ vis_backends = [
196
+ dict(type='LocalVisBackend'),
197
+ dict(type='TensorboardVisBackend')
198
+ ]
199
+ work_dir = './work_dirs/vitp_brats_upernet'
200
+ gpu_ids = range(0, 8)
201
+ auto_resume = False
ckpts/vitp_convid_upernet_9155/20250902_103001.log ADDED
The diff for this file is too large to render. See raw diff
 
ckpts/vitp_convid_upernet_9155/best_mDice.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c81fb347d19a94011bd171fe4f1343c981bc786dbf2a7e60e4bb87b3b04d481a
3
+ size 1809310421
ckpts/vitp_convid_upernet_9155/eval_single_scale_20250902_233031.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": "./configs/internvit_new/upernet_internvit_adp_300e_convid_quex_v100yx_512_re_5e6_new.py",
3
+ "metric": {
4
+ "aAcc": 0.9301999999999999,
5
+ "mIoU": 0.8462000000000001,
6
+ "mAcc": 0.9157,
7
+ "mDice": 0.9155,
8
+ "IoU.lung": 0.9062000274658203,
9
+ "IoU.covid": 0.7862000274658203,
10
+ "Acc.lung": 0.9505000305175781,
11
+ "Acc.covid": 0.8808000183105469,
12
+ "Dice.lung": 0.9508000183105468,
13
+ "Dice.covid": 0.8802999877929687
14
+ }
15
+ }
ckpts/vitp_convid_upernet_9155/vitp_convid_upernet.py ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset_type = 'ConvidQuexDataset'
2
+ data_root = '/root/data-fs/twh/dataset/convid_quex/seg_data'
3
+ img_norm_cfg = dict(
4
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
5
+ crop_size = (512, 512)
6
+ train_pipeline = [
7
+ dict(type='LoadImageFromFile'),
8
+ dict(type='LoadAnnotations', reduce_zero_label=True),
9
+ dict(type='Resize', img_scale=(512, 512), ratio_range=(0.5, 2.0)),
10
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
11
+ dict(type='RandomFlip', prob=0.5),
12
+ dict(type='PhotoMetricDistortion'),
13
+ dict(
14
+ type='Normalize',
15
+ mean=[123.675, 116.28, 103.53],
16
+ std=[58.395, 57.12, 57.375],
17
+ to_rgb=True),
18
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255),
19
+ dict(type='DefaultFormatBundle'),
20
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
21
+ ]
22
+ test_pipeline = [
23
+ dict(type='LoadImageFromFile'),
24
+ dict(
25
+ type='MultiScaleFlipAug',
26
+ img_scale=(512, 512),
27
+ flip=False,
28
+ transforms=[
29
+ dict(type='Resize', keep_ratio=True),
30
+ dict(
31
+ type='Normalize',
32
+ mean=[123.675, 116.28, 103.53],
33
+ std=[58.395, 57.12, 57.375],
34
+ to_rgb=True),
35
+ dict(type='ImageToTensor', keys=['img']),
36
+ dict(type='Collect', keys=['img'])
37
+ ])
38
+ ]
39
+ data = dict(
40
+ samples_per_gpu=2,
41
+ workers_per_gpu=4,
42
+ train=dict(
43
+ type='ConvidQuexDataset',
44
+ data_root='/root/data-fs/twh/dataset/convid_quex/seg_data',
45
+ img_dir='image/train',
46
+ ann_dir='anno/train',
47
+ pipeline=[
48
+ dict(type='LoadImageFromFile'),
49
+ dict(type='LoadAnnotations', reduce_zero_label=True),
50
+ dict(type='Resize', img_scale=(512, 512), ratio_range=(0.5, 2.0)),
51
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
52
+ dict(type='RandomFlip', prob=0.5),
53
+ dict(
54
+ type='Normalize',
55
+ mean=[123.675, 116.28, 103.53],
56
+ std=[58.395, 57.12, 57.375],
57
+ to_rgb=True),
58
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255),
59
+ dict(type='DefaultFormatBundle'),
60
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
61
+ ]),
62
+ val=dict(
63
+ type='ConvidQuexDataset',
64
+ data_root='/root/data-fs/twh/dataset/convid_quex/seg_data',
65
+ img_dir='image/val',
66
+ ann_dir='anno/val',
67
+ pipeline=[
68
+ dict(type='LoadImageFromFile'),
69
+ dict(
70
+ type='MultiScaleFlipAug',
71
+ img_scale=(512, 512),
72
+ flip=False,
73
+ transforms=[
74
+ dict(type='Resize', keep_ratio=True),
75
+ dict(
76
+ type='Normalize',
77
+ mean=[123.675, 116.28, 103.53],
78
+ std=[58.395, 57.12, 57.375],
79
+ to_rgb=True),
80
+ dict(type='ImageToTensor', keys=['img']),
81
+ dict(type='Collect', keys=['img'])
82
+ ])
83
+ ]),
84
+ test=dict(
85
+ type='ConvidQuexDataset',
86
+ data_root='/root/data-fs/twh/dataset/convid_quex/seg_data',
87
+ img_dir='image/test',
88
+ ann_dir='anno/test',
89
+ pipeline=[
90
+ dict(type='LoadImageFromFile'),
91
+ dict(
92
+ type='MultiScaleFlipAug',
93
+ img_scale=(512, 512),
94
+ flip=False,
95
+ transforms=[
96
+ dict(type='Resize', keep_ratio=True),
97
+ dict(
98
+ type='Normalize',
99
+ mean=[123.675, 116.28, 103.53],
100
+ std=[58.395, 57.12, 57.375],
101
+ to_rgb=True),
102
+ dict(type='ImageToTensor', keys=['img']),
103
+ dict(type='Collect', keys=['img'])
104
+ ])
105
+ ]))
106
+ log_config = dict(
107
+ interval=50,
108
+ hooks=[
109
+ dict(
110
+ type='MMSegWandbHook',
111
+ init_kwargs=dict(
112
+ project='ITAP_SEG',
113
+ name='upernet_internvit_adp_300e_convid_quex_re'),
114
+ interval=30,
115
+ num_eval_images=0)
116
+ ])
117
+ dist_params = dict(backend='nccl')
118
+ log_level = 'INFO'
119
+ load_from = None
120
+ resume_from = None
121
+ workflow = [('train', 1)]
122
+ cudnn_benchmark = True
123
+ bs = 2
124
+ pretrained = 'pretrained/ViTP_ViT_L_300M_med.safetensors'
125
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
126
+ model = dict(
127
+ type='EncoderDecoder',
128
+ pretrained=None,
129
+ backbone=dict(
130
+ type='InternViTAdapter',
131
+ pretrain_size=448,
132
+ img_size=512,
133
+ patch_size=16,
134
+ embed_dim=1024,
135
+ depth=24,
136
+ num_heads=16,
137
+ mlp_ratio=4.0,
138
+ drop_path_rate=0.1,
139
+ init_values=0.1,
140
+ with_cp=True,
141
+ use_flash_attn=True,
142
+ qk_normalization=False,
143
+ layerscale_force_fp32=False,
144
+ with_fpn=False,
145
+ freeze_vit=False,
146
+ use_final_norm=True,
147
+ interaction_indexes=[[0, 7], [8, 11], [12, 15], [16, 23]],
148
+ cffn_ratio=0.25,
149
+ deform_ratio=0.25,
150
+ qkv_bias=True,
151
+ norm_type='layer_norm',
152
+ pretrained='pretrained/ViTP_ViT_L_300M_med.safetensors',
153
+ pretrained_type='full'),
154
+ decode_head=dict(
155
+ type='UPerHead',
156
+ in_index=[0, 1, 2, 3],
157
+ pool_scales=(1, 2, 3, 6),
158
+ dropout_ratio=0.1,
159
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
160
+ align_corners=False,
161
+ num_classes=2,
162
+ channels=1024,
163
+ in_channels=[1024, 1024, 1024, 1024],
164
+ loss_decode=dict(
165
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
166
+ auxiliary_head=dict(
167
+ type='FCNHead',
168
+ in_channels=1024,
169
+ in_index=2,
170
+ channels=1024,
171
+ num_convs=1,
172
+ concat_input=False,
173
+ dropout_ratio=0.1,
174
+ num_classes=2,
175
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
176
+ align_corners=False,
177
+ loss_decode=dict(
178
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
179
+ test_cfg=dict(mode='whole'))
180
+ optimizer = dict(
181
+ type='AdamW',
182
+ lr=5e-06,
183
+ betas=(0.9, 0.999),
184
+ weight_decay=0.05,
185
+ constructor='InternViTAdapterLayerDecayOptimizerConstructor',
186
+ paramwise_cfg=dict(num_layers=24, layer_decay_rate=0.9))
187
+ optimizer_config = dict()
188
+ lr_config = dict(
189
+ policy='CosineAnnealing',
190
+ warmup='linear',
191
+ warmup_iters=1500,
192
+ warmup_ratio=1e-06,
193
+ min_lr=0.0,
194
+ by_epoch=False)
195
+ runner = dict(type='EpochBasedRunner', max_epochs=300)
196
+ checkpoint_config = dict(interval=30, max_keep_ckpts=1)
197
+ evaluation = dict(interval=30, metric=['mIoU', 'mDice'], save_best='mDice')
198
+ fp16 = dict(loss_scale=dict(init_scale=512))
199
+ randomness = dict(seed=3407)
200
+ vis_backends = [
201
+ dict(type='LocalVisBackend'),
202
+ dict(type='TensorboardVisBackend'),
203
+ dict(type='WandbVisBackend')
204
+ ]
205
+ work_dir = './work_dirs/vitp_convid_upernet'
206
+ gpu_ids = range(0, 8)
207
+ auto_resume = False
ckpts/vitp_dior_cascade_rcnn_7960/20250730_223238.log ADDED
The diff for this file is too large to render. See raw diff
 
ckpts/vitp_dior_cascade_rcnn_7960/20250730_223238.log.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"mmdet_version": "2.25.16fc0c4e", "CLASSES": ["airplane", "airport", "baseballfield", "basketballcourt", "bridge", "chimney", "dam", "Expressway-Service-area", "Expressway-toll-station", "golffield", "groundtrackfield", "harbor", "overpass", "ship", "stadium", "storagetank", "tenniscourt", "trainstation", "vehicle", "windmill"], "env_info": "sys.platform: linux\nPython: 3.10.16 (main, Dec 11 2024, 16:24:50) [GCC 11.2.0]\nCUDA available: True\nGPU 0,1,2,3,4,5,6,7: NVIDIA GeForce RTX 3090\nCUDA_HOME: /mnt/petrelfs/share_data/liqingyun/cuda/cuda-12.4/\nGCC: gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0\nPyTorch: 1.12.0\nPyTorch compiling details: PyTorch built with:\n - GCC 9.3\n - C++ Version: 201402\n - Intel(R) oneAPI Math Kernel Library Version 2024.0-Product Build 20231011 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v2.6.0 (Git Hash 52b5f107dd9cf10910aaa19cb47f3abf9b349815)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 11.3\n - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_37,code=compute_37\n - CuDNN 8.3.2 (built against CUDA 11.5)\n - Magma 2.5.2\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.3, CUDNN_VERSION=8.3.2, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.12.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=OFF, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, \n\nTorchVision: 0.13.0\nOpenCV: 4.11.0\nMMCV: 1.6.1\nMMCV Compiler: GCC 9.3\nMMCV CUDA Compiler: 11.4\nMMRotate: 0.3.4+6fc0c4e", "config": "dataset_type = 'DIORDataset'\ndata_root = '/defaultShare/pubdata/remote_sensing/DIOR/'\nimg_norm_cfg = dict(\n mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\nimg_size = 800\nangle_version = 'le90'\ntrain_pipeline = [\n dict(type='LoadImageFromFile'),\n dict(type='LoadAnnotations', with_bbox=True),\n dict(type='Resize', img_scale=(800, 800), keep_ratio=False),\n dict(type='RandomFlip', flip_ratio=0.5),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size=(800, 800)),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])\n]\ntest_pipeline = [\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(800, 800),\n flip=False,\n transforms=[\n dict(type='Resize', keep_ratio=False),\n dict(type='RandomFlip'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size=(800, 800)),\n dict(type='ImageToTensor', keys=['img']),\n dict(type='Collect', keys=['img'])\n ])\n]\ndata = dict(\n samples_per_gpu=1,\n workers_per_gpu=4,\n train=dict(\n type='DIORDataset',\n ann_file=\n '/defaultShare/pubdata/remote_sensing/DIOR/Annotations/train_val.json',\n img_prefix=\n '/defaultShare/pubdata/remote_sensing/DIOR/JPEGImages/trainval/',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(type='LoadAnnotations', with_bbox=True),\n dict(type='Resize', img_scale=(800, 800), keep_ratio=False),\n dict(type='RandomFlip', flip_ratio=0.5),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size=(800, 800)),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])\n ]),\n val=dict(\n type='DIORDataset',\n ann_file=\n '/defaultShare/pubdata/remote_sensing/DIOR/Annotations/test.json',\n img_prefix='/defaultShare/pubdata/remote_sensing/DIOR/JPEGImages/test/',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(800, 800),\n flip=False,\n transforms=[\n dict(type='Resize', keep_ratio=False),\n dict(type='RandomFlip'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size=(800, 800)),\n dict(type='ImageToTensor', keys=['img']),\n dict(type='Collect', keys=['img'])\n ])\n ]),\n test=dict(\n type='DIORDataset',\n ann_file=\n '/defaultShare/pubdata/remote_sensing/DIOR/Annotations/test.json',\n img_prefix='/defaultShare/pubdata/remote_sensing/DIOR/JPEGImages/test/',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(800, 800),\n flip=False,\n transforms=[\n dict(type='Resize', keep_ratio=False),\n dict(type='RandomFlip'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size=(800, 800)),\n dict(type='ImageToTensor', keys=['img']),\n dict(type='Collect', keys=['img'])\n ])\n ]))\nevaluation = dict(interval=4, metric='mAP', classwise=True)\noptimizer = dict(\n type='AdamW',\n lr=2e-05,\n betas=(0.9, 0.999),\n weight_decay=0.05,\n constructor='InternViTAdapterLayerDecayOptimizerConstructor',\n paramwise_cfg=dict(num_layers=24, layer_decay_rate=0.9))\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\nlr_config = dict(\n policy='step',\n warmup='linear',\n warmup_iters=500,\n warmup_ratio=0.3333333333333333,\n step=[8, 11])\nrunner = dict(type='EpochBasedRunner', max_epochs=12)\ncheckpoint_config = dict(interval=1, max_keep_ckpts=1)\nlog_config = dict(interval=500, hooks=[dict(type='TextLoggerHook')])\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\nopencv_num_threads = 0\nmp_start_method = 'fork'\npretrained = 'pretrained/ft_full_1b_8ksteps_instruct_tuning_as_pretrain_TMAug75.safetensors'\ngpu_number = 8\nnorm_cfg = dict(type='LN', requires_grad=True)\nnum_classes = 20\nmodel = dict(\n type='CascadeRCNN',\n backbone=dict(\n type='InternViTAdapter',\n pretrain_size=448,\n img_size=800,\n patch_size=16,\n embed_dim=1024,\n depth=24,\n num_heads=16,\n mlp_ratio=4.0,\n drop_path_rate=0.1,\n init_values=0.1,\n with_cp=True,\n use_flash_attn=True,\n qk_normalization=False,\n layerscale_force_fp32=False,\n with_fpn=False,\n freeze_vit=False,\n use_final_norm=True,\n interaction_indexes=[[0, 7], [8, 11], [12, 15], [16, 23]],\n cffn_ratio=0.25,\n deform_ratio=0.25,\n qkv_bias=True,\n norm_type='layer_norm',\n pretrained=\n 'pretrained/ft_full_1b_8ksteps_instruct_tuning_as_pretrain_TMAug75.safetensors',\n pretrained_type='full',\n only_feat_out=True),\n neck=dict(\n type='SimpleFPN',\n in_channels=[1024, 1024, 1024, 1024],\n out_channels=256,\n norm_cfg=dict(type='LN', requires_grad=True),\n use_residual=False,\n num_outs=5),\n rpn_head=dict(\n type='RPNHead',\n in_channels=256,\n feat_channels=256,\n anchor_generator=dict(\n type='AnchorGenerator',\n scales=[8],\n ratios=[0.5, 1.0, 2.0],\n strides=[4, 8, 16, 32, 64]),\n bbox_coder=dict(\n type='DeltaXYWHBBoxCoder',\n target_means=[0.0, 0.0, 0.0, 0.0],\n target_stds=[1.0, 1.0, 1.0, 1.0]),\n loss_cls=dict(\n type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n loss_bbox=dict(\n type='SmoothL1Loss', beta=0.1111111111111111, loss_weight=1.0)),\n roi_head=dict(\n type='CascadeRoIHead',\n num_stages=3,\n stage_loss_weights=[1, 0.5, 0.25],\n bbox_roi_extractor=dict(\n type='SingleRoIExtractor',\n roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),\n out_channels=256,\n featmap_strides=[4, 8, 16, 32]),\n bbox_head=[\n dict(\n type='Shared2FCBBoxHead',\n in_channels=256,\n fc_out_channels=1024,\n roi_feat_size=7,\n num_classes=20,\n bbox_coder=dict(\n type='DeltaXYWHBBoxCoder',\n target_means=[0.0, 0.0, 0.0, 0.0],\n target_stds=[0.1, 0.1, 0.2, 0.2]),\n reg_class_agnostic=True,\n loss_cls=dict(\n type='CrossEntropyLoss',\n use_sigmoid=False,\n loss_weight=1.0),\n loss_bbox=dict(type='SmoothL1Loss', beta=1.0,\n loss_weight=1.0)),\n dict(\n type='Shared2FCBBoxHead',\n in_channels=256,\n fc_out_channels=1024,\n roi_feat_size=7,\n num_classes=20,\n bbox_coder=dict(\n type='DeltaXYWHBBoxCoder',\n target_means=[0.0, 0.0, 0.0, 0.0],\n target_stds=[0.05, 0.05, 0.1, 0.1]),\n reg_class_agnostic=True,\n loss_cls=dict(\n type='CrossEntropyLoss',\n use_sigmoid=False,\n loss_weight=1.0),\n loss_bbox=dict(type='SmoothL1Loss', beta=1.0,\n loss_weight=1.0)),\n dict(\n type='Shared2FCBBoxHead',\n in_channels=256,\n fc_out_channels=1024,\n roi_feat_size=7,\n num_classes=20,\n bbox_coder=dict(\n type='DeltaXYWHBBoxCoder',\n target_means=[0.0, 0.0, 0.0, 0.0],\n target_stds=[0.033, 0.033, 0.067, 0.067]),\n reg_class_agnostic=True,\n loss_cls=dict(\n type='CrossEntropyLoss',\n use_sigmoid=False,\n loss_weight=1.0),\n loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))\n ]),\n train_cfg=dict(\n rpn=dict(\n assigner=dict(\n type='MaxIoUAssigner',\n pos_iou_thr=0.7,\n neg_iou_thr=0.3,\n min_pos_iou=0.3,\n match_low_quality=True,\n ignore_iof_thr=-1),\n sampler=dict(\n type='RandomSampler',\n num=256,\n pos_fraction=0.5,\n neg_pos_ub=-1,\n add_gt_as_proposals=False),\n allowed_border=0,\n pos_weight=-1,\n debug=False),\n rpn_proposal=dict(\n nms_pre=2000,\n max_per_img=2000,\n nms=dict(type='nms', iou_threshold=0.7),\n min_bbox_size=0),\n rcnn=[\n dict(\n assigner=dict(\n type='MaxIoUAssigner',\n pos_iou_thr=0.5,\n neg_iou_thr=0.5,\n min_pos_iou=0.5,\n match_low_quality=False,\n ignore_iof_thr=-1),\n sampler=dict(\n type='RandomSampler',\n num=512,\n pos_fraction=0.25,\n neg_pos_ub=-1,\n add_gt_as_proposals=True),\n pos_weight=-1,\n debug=False),\n dict(\n assigner=dict(\n type='MaxIoUAssigner',\n pos_iou_thr=0.6,\n neg_iou_thr=0.6,\n min_pos_iou=0.6,\n match_low_quality=False,\n ignore_iof_thr=-1),\n sampler=dict(\n type='RandomSampler',\n num=512,\n pos_fraction=0.25,\n neg_pos_ub=-1,\n add_gt_as_proposals=True),\n pos_weight=-1,\n debug=False),\n dict(\n assigner=dict(\n type='MaxIoUAssigner',\n pos_iou_thr=0.7,\n neg_iou_thr=0.7,\n min_pos_iou=0.7,\n match_low_quality=False,\n ignore_iof_thr=-1),\n sampler=dict(\n type='RandomSampler',\n num=512,\n pos_fraction=0.25,\n neg_pos_ub=-1,\n add_gt_as_proposals=True),\n pos_weight=-1,\n debug=False)\n ]),\n test_cfg=dict(\n rpn=dict(\n nms_pre=1000,\n max_per_img=1000,\n nms=dict(type='nms', iou_threshold=0.7),\n min_bbox_size=0),\n rcnn=dict(\n score_thr=0.05,\n nms=dict(type='nms', iou_threshold=0.5),\n max_per_img=100)))\nfp16 = dict(loss_scale=dict(init_scale=512))\nwork_dir = './work_dirs/dior_inst_tun_TMAug75_8k'\nauto_resume = False\ngpu_ids = range(0, 8)\ndevice = 'cuda'\n", "seed": 0, "exp_name": "dior_inst_tun_TMAug75_8k.py", "fp16": {"loss_scaler": {"scale": 2048.0, "growth_factor": 2.0, "backoff_factor": 0.5, "growth_interval": 2000, "_growth_tracker": 1864}}, "epoch": 4, "iter": 5864, "mmcv_version": "1.6.1", "time": "Wed Jul 30 18:25:23 2025", "hook_msgs": {"last_ckpt": "/nfs/liyuxuan/zhangyicheng/mmrotate/work_dirs/dior_inst_tun_TMAug75_8k/epoch_3.pth"}}
2
+ {"mode": "train", "epoch": 5, "iter": 500, "lr": 0.0, "memory": 12677, "data_time": 0.01045, "loss_rpn_cls": 0.00582, "loss_rpn_bbox": 0.00764, "s0.loss_cls": 0.05271, "s0.acc": 97.94438, "s0.loss_bbox": 0.03948, "s1.loss_cls": 0.02568, "s1.acc": 98.02153, "s1.loss_bbox": 0.0621, "s2.loss_cls": 0.01527, "s2.acc": 97.57696, "s2.loss_bbox": 0.05219, "loss": 0.26089, "grad_norm": Infinity, "time": 0.77073}
3
+ {"mode": "train", "epoch": 5, "iter": 1000, "lr": 0.0, "memory": 12677, "data_time": 0.00323, "loss_rpn_cls": 0.0059, "loss_rpn_bbox": 0.00788, "s0.loss_cls": 0.05248, "s0.acc": 97.94033, "s0.loss_bbox": 0.03961, "s1.loss_cls": 0.02571, "s1.acc": 98.00724, "s1.loss_bbox": 0.06156, "s2.loss_cls": 0.01496, "s2.acc": 97.60852, "s2.loss_bbox": 0.05159, "loss": 0.25968, "grad_norm": 9.70959, "time": 0.75422}
4
+ {"mode": "train", "epoch": 6, "iter": 500, "lr": 0.0, "memory": 12680, "data_time": 0.00791, "loss_rpn_cls": 0.00518, "loss_rpn_bbox": 0.00718, "s0.loss_cls": 0.04721, "s0.acc": 98.17241, "s0.loss_bbox": 0.03518, "s1.loss_cls": 0.02243, "s1.acc": 98.26773, "s1.loss_bbox": 0.05572, "s2.loss_cls": 0.01297, "s2.acc": 97.95815, "s2.loss_bbox": 0.04768, "loss": 0.23354, "grad_norm": 9.22684, "time": 0.75987}
5
+ {"mode": "train", "epoch": 6, "iter": 1000, "lr": 0.0, "memory": 12680, "data_time": 0.00303, "loss_rpn_cls": 0.00488, "loss_rpn_bbox": 0.00711, "s0.loss_cls": 0.0468, "s0.acc": 98.16084, "s0.loss_bbox": 0.03546, "s1.loss_cls": 0.02189, "s1.acc": 98.29857, "s1.loss_bbox": 0.05547, "s2.loss_cls": 0.01293, "s2.acc": 97.95175, "s2.loss_bbox": 0.04742, "loss": 0.23196, "grad_norm": Infinity, "time": 0.75293}
6
+ {"mode": "train", "epoch": 7, "iter": 500, "lr": 0.0, "memory": 12680, "data_time": 0.00782, "loss_rpn_cls": 0.00395, "loss_rpn_bbox": 0.00645, "s0.loss_cls": 0.04222, "s0.acc": 98.33569, "s0.loss_bbox": 0.03149, "s1.loss_cls": 0.01936, "s1.acc": 98.49516, "s1.loss_bbox": 0.0514, "s2.loss_cls": 0.01139, "s2.acc": 98.21969, "s2.loss_bbox": 0.04539, "loss": 0.21164, "grad_norm": 9.5548, "time": 0.75796}
7
+ {"mode": "train", "epoch": 7, "iter": 1000, "lr": 0.0, "memory": 12680, "data_time": 0.00291, "loss_rpn_cls": 0.00449, "loss_rpn_bbox": 0.00649, "s0.loss_cls": 0.04345, "s0.acc": 98.29019, "s0.loss_bbox": 0.03286, "s1.loss_cls": 0.02, "s1.acc": 98.45613, "s1.loss_bbox": 0.0523, "s2.loss_cls": 0.01178, "s2.acc": 98.15007, "s2.loss_bbox": 0.04496, "loss": 0.21633, "grad_norm": 9.5179, "time": 0.75234}
8
+ {"mode": "train", "epoch": 8, "iter": 500, "lr": 0.0, "memory": 12680, "data_time": 0.00785, "loss_rpn_cls": 0.00372, "loss_rpn_bbox": 0.00585, "s0.loss_cls": 0.03913, "s0.acc": 98.46299, "s0.loss_bbox": 0.02886, "s1.loss_cls": 0.01705, "s1.acc": 98.68055, "s1.loss_bbox": 0.04656, "s2.loss_cls": 0.00996, "s2.acc": 98.43402, "s2.loss_bbox": 0.04085, "loss": 0.19199, "grad_norm": 8.43056, "time": 0.75836}
9
+ {"mode": "train", "epoch": 8, "iter": 1000, "lr": 0.0, "memory": 12680, "data_time": 0.00285, "loss_rpn_cls": 0.00367, "loss_rpn_bbox": 0.0061, "s0.loss_cls": 0.0383, "s0.acc": 98.48643, "s0.loss_bbox": 0.02924, "s1.loss_cls": 0.01657, "s1.acc": 98.71499, "s1.loss_bbox": 0.04741, "s2.loss_cls": 0.00973, "s2.acc": 98.48204, "s2.loss_bbox": 0.04206, "loss": 0.19309, "grad_norm": 8.75413, "time": 0.75288}
10
+ {"mode": "val", "epoch": 8, "iter": 1468, "lr": 0.0, "bbox_mAP": 0.552, "bbox_mAP_50": 0.797, "bbox_mAP_75": 0.601, "bbox_mAP_s": 0.171, "bbox_mAP_m": 0.464, "bbox_mAP_l": 0.738, "bbox_mAP_copypaste": "0.552 0.797 0.601 0.171 0.464 0.738"}
11
+ {"mode": "train", "epoch": 9, "iter": 500, "lr": 0.0, "memory": 12680, "data_time": 0.00814, "loss_rpn_cls": 0.00287, "loss_rpn_bbox": 0.00487, "s0.loss_cls": 0.03275, "s0.acc": 98.70068, "s0.loss_bbox": 0.02321, "s1.loss_cls": 0.01318, "s1.acc": 98.98104, "s1.loss_bbox": 0.03895, "s2.loss_cls": 0.00756, "s2.acc": 98.83655, "s2.loss_bbox": 0.03562, "loss": 0.159, "grad_norm": 7.53158, "time": 0.76159}
12
+ {"mode": "train", "epoch": 9, "iter": 1000, "lr": 0.0, "memory": 12680, "data_time": 0.003, "loss_rpn_cls": 0.00298, "loss_rpn_bbox": 0.00475, "s0.loss_cls": 0.03326, "s0.acc": 98.70029, "s0.loss_bbox": 0.02426, "s1.loss_cls": 0.01377, "s1.acc": 98.94669, "s1.loss_bbox": 0.04039, "s2.loss_cls": 0.00799, "s2.acc": 98.7764, "s2.loss_bbox": 0.0367, "loss": 0.16409, "grad_norm": 7.60728, "time": 0.75893}
13
+ {"mode": "train", "epoch": 10, "iter": 500, "lr": 0.0, "memory": 12680, "data_time": 0.0081, "loss_rpn_cls": 0.00275, "loss_rpn_bbox": 0.00454, "s0.loss_cls": 0.03061, "s0.acc": 98.79683, "s0.loss_bbox": 0.02215, "s1.loss_cls": 0.01231, "s1.acc": 99.03177, "s1.loss_bbox": 0.03735, "s2.loss_cls": 0.00687, "s2.acc": 98.9413, "s2.loss_bbox": 0.03433, "loss": 0.15092, "grad_norm": 7.38586, "time": 0.76107}
14
+ {"mode": "train", "epoch": 10, "iter": 1000, "lr": 0.0, "memory": 12680, "data_time": 0.00312, "loss_rpn_cls": 0.00261, "loss_rpn_bbox": 0.00469, "s0.loss_cls": 0.03044, "s0.acc": 98.81895, "s0.loss_bbox": 0.02166, "s1.loss_cls": 0.01218, "s1.acc": 99.06392, "s1.loss_bbox": 0.03619, "s2.loss_cls": 0.00683, "s2.acc": 98.95856, "s2.loss_bbox": 0.03315, "loss": 0.14775, "grad_norm": 6.83723, "time": 0.75739}
15
+ {"mode": "train", "epoch": 11, "iter": 500, "lr": 0.0, "memory": 12680, "data_time": 0.00774, "loss_rpn_cls": 0.00255, "loss_rpn_bbox": 0.00446, "s0.loss_cls": 0.02944, "s0.acc": 98.8356, "s0.loss_bbox": 0.02092, "s1.loss_cls": 0.01161, "s1.acc": 99.09921, "s1.loss_bbox": 0.03464, "s2.loss_cls": 0.00648, "s2.acc": 98.98859, "s2.loss_bbox": 0.03173, "loss": 0.14183, "grad_norm": 6.81652, "time": 0.7619}
16
+ {"mode": "train", "epoch": 11, "iter": 1000, "lr": 0.0, "memory": 12683, "data_time": 0.00296, "loss_rpn_cls": 0.00247, "loss_rpn_bbox": 0.00473, "s0.loss_cls": 0.02998, "s0.acc": 98.82202, "s0.loss_bbox": 0.02142, "s1.loss_cls": 0.01177, "s1.acc": 99.08769, "s1.loss_bbox": 0.03574, "s2.loss_cls": 0.00655, "s2.acc": 98.99902, "s2.loss_bbox": 0.03296, "loss": 0.14561, "grad_norm": 7.11457, "time": 0.7583}
17
+ {"mode": "train", "epoch": 12, "iter": 500, "lr": 0.0, "memory": 12683, "data_time": 0.00791, "loss_rpn_cls": 0.00221, "loss_rpn_bbox": 0.00392, "s0.loss_cls": 0.02771, "s0.acc": 98.91255, "s0.loss_bbox": 0.01946, "s1.loss_cls": 0.01073, "s1.acc": 99.16979, "s1.loss_bbox": 0.0328, "s2.loss_cls": 0.00588, "s2.acc": 99.10159, "s2.loss_bbox": 0.03082, "loss": 0.13353, "grad_norm": 6.75006, "time": 0.76188}
18
+ {"mode": "train", "epoch": 12, "iter": 1000, "lr": 0.0, "memory": 12683, "data_time": 0.00304, "loss_rpn_cls": 0.00255, "loss_rpn_bbox": 0.00466, "s0.loss_cls": 0.02867, "s0.acc": 98.86948, "s0.loss_bbox": 0.02018, "s1.loss_cls": 0.01103, "s1.acc": 99.1422, "s1.loss_bbox": 0.0331, "s2.loss_cls": 0.00617, "s2.acc": 99.05892, "s2.loss_bbox": 0.03062, "loss": 0.13698, "grad_norm": Infinity, "time": 0.7584}
19
+ {"mode": "val", "epoch": 12, "iter": 1468, "lr": 0.0, "bbox_mAP": 0.557, "bbox_mAP_50": 0.796, "bbox_mAP_75": 0.61, "bbox_mAP_s": 0.175, "bbox_mAP_m": 0.474, "bbox_mAP_l": 0.747, "bbox_mAP_copypaste": "0.557 0.796 0.610 0.175 0.474 0.747"}
ckpts/vitp_dior_cascade_rcnn_7960/epoch_12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb6fd409f17f1ac0c98c7ee418c5c126c482c8753d4db467cfacc65e5fddfd61
3
+ size 1478110605
ckpts/vitp_dior_cascade_rcnn_7960/vitp_dior_cascade_rcnn.py ADDED
@@ -0,0 +1,308 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset_type = 'DIORDataset'
2
+ data_root = '/defaultShare/pubdata/remote_sensing/DIOR/'
3
+ img_norm_cfg = dict(
4
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
5
+ img_size = 800
6
+ angle_version = 'le90'
7
+
8
+ data = dict(
9
+ samples_per_gpu=1,
10
+ workers_per_gpu=4,
11
+ train=dict(
12
+ type='DIORDataset',
13
+ ann_file=
14
+ '/defaultShare/pubdata/remote_sensing/DIOR/Annotations/train_val.json',
15
+ img_prefix=
16
+ '/defaultShare/pubdata/remote_sensing/DIOR/JPEGImages/trainval/',
17
+ pipeline=[
18
+ dict(type='LoadImageFromFile'),
19
+ dict(type='LoadAnnotations', with_bbox=True),
20
+ dict(type='Resize', img_scale=(800, 800), keep_ratio=False),
21
+ dict(type='RandomFlip', flip_ratio=0.5),
22
+ dict(
23
+ type='Normalize',
24
+ mean=[123.675, 116.28, 103.53],
25
+ std=[58.395, 57.12, 57.375],
26
+ to_rgb=True),
27
+ dict(type='Pad', size=(800, 800)),
28
+ dict(type='DefaultFormatBundle'),
29
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
30
+ ]),
31
+ val=dict(
32
+ type='DIORDataset',
33
+ ann_file=
34
+ '/defaultShare/pubdata/remote_sensing/DIOR/Annotations/test.json',
35
+ img_prefix='/defaultShare/pubdata/remote_sensing/DIOR/JPEGImages/test/',
36
+ pipeline=[
37
+ dict(type='LoadImageFromFile'),
38
+ dict(
39
+ type='MultiScaleFlipAug',
40
+ img_scale=(800, 800),
41
+ flip=False,
42
+ transforms=[
43
+ dict(type='Resize', keep_ratio=False),
44
+ dict(type='RandomFlip'),
45
+ dict(
46
+ type='Normalize',
47
+ mean=[123.675, 116.28, 103.53],
48
+ std=[58.395, 57.12, 57.375],
49
+ to_rgb=True),
50
+ dict(type='Pad', size=(800, 800)),
51
+ dict(type='ImageToTensor', keys=['img']),
52
+ dict(type='Collect', keys=['img'])
53
+ ])
54
+ ]),
55
+ test=dict(
56
+ type='DIORDataset',
57
+ ann_file=
58
+ '/defaultShare/pubdata/remote_sensing/DIOR/Annotations/test.json',
59
+ img_prefix='/defaultShare/pubdata/remote_sensing/DIOR/JPEGImages/test/',
60
+ pipeline=[
61
+ dict(type='LoadImageFromFile'),
62
+ dict(
63
+ type='MultiScaleFlipAug',
64
+ img_scale=(800, 800),
65
+ flip=False,
66
+ transforms=[
67
+ dict(type='Resize', keep_ratio=False),
68
+ dict(type='RandomFlip'),
69
+ dict(
70
+ type='Normalize',
71
+ mean=[123.675, 116.28, 103.53],
72
+ std=[58.395, 57.12, 57.375],
73
+ to_rgb=True),
74
+ dict(type='Pad', size=(800, 800)),
75
+ dict(type='ImageToTensor', keys=['img']),
76
+ dict(type='Collect', keys=['img'])
77
+ ])
78
+ ]))
79
+ evaluation = dict(interval=4, metric='bbox', classwise=True)
80
+ optimizer = dict(
81
+ type='AdamW',
82
+ lr=2e-05,
83
+ betas=(0.9, 0.999),
84
+ weight_decay=0.05,
85
+ constructor='InternViTAdapterLayerDecayOptimizerConstructor',
86
+ paramwise_cfg=dict(num_layers=24, layer_decay_rate=0.9))
87
+ optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
88
+ lr_config = dict(
89
+ policy='step',
90
+ warmup='linear',
91
+ warmup_iters=500,
92
+ warmup_ratio=0.3333333333333333,
93
+ step=[8, 11])
94
+ runner = dict(type='EpochBasedRunner', max_epochs=12)
95
+ checkpoint_config = dict(interval=1, max_keep_ckpts=1)
96
+ log_config = dict(interval=500, hooks=[dict(type='TextLoggerHook')])
97
+ dist_params = dict(backend='nccl')
98
+ log_level = 'INFO'
99
+ load_from = None
100
+ resume_from = None
101
+ workflow = [('train', 1)]
102
+ opencv_num_threads = 0
103
+ mp_start_method = 'fork'
104
+ pretrained = 'pretrained/ViTP_ViT_L_300M_rs.safetensors'
105
+ gpu_number = 8
106
+ norm_cfg = dict(type='LN', requires_grad=True)
107
+ num_classes = 20
108
+ model = dict(
109
+ type='CascadeRCNN',
110
+ backbone=dict(
111
+ type='InternViTAdapter',
112
+ pretrain_size=448,
113
+ img_size=800,
114
+ patch_size=16,
115
+ embed_dim=1024,
116
+ depth=24,
117
+ num_heads=16,
118
+ mlp_ratio=4.0,
119
+ drop_path_rate=0.1,
120
+ init_values=0.1,
121
+ with_cp=True,
122
+ use_flash_attn=True,
123
+ qk_normalization=False,
124
+ layerscale_force_fp32=False,
125
+ with_fpn=False,
126
+ freeze_vit=False,
127
+ use_final_norm=True,
128
+ interaction_indexes=[[0, 7], [8, 11], [12, 15], [16, 23]],
129
+ cffn_ratio=0.25,
130
+ deform_ratio=0.25,
131
+ qkv_bias=True,
132
+ norm_type='layer_norm',
133
+ pretrained='pretrained/ViTP_ViT_L_300M_rs.safetensors',
134
+ pretrained_type='full',
135
+ only_feat_out=True),
136
+ neck=dict(
137
+ type='SimpleFPN',
138
+ in_channels=[1024, 1024, 1024, 1024],
139
+ out_channels=256,
140
+ norm_cfg=dict(type='LN', requires_grad=True),
141
+ use_residual=False,
142
+ num_outs=5),
143
+ rpn_head=dict(
144
+ type='RPNHead',
145
+ in_channels=256,
146
+ feat_channels=256,
147
+ anchor_generator=dict(
148
+ type='AnchorGenerator',
149
+ scales=[8],
150
+ ratios=[0.5, 1.0, 2.0],
151
+ strides=[4, 8, 16, 32, 64]),
152
+ bbox_coder=dict(
153
+ type='DeltaXYWHBBoxCoder',
154
+ target_means=[0.0, 0.0, 0.0, 0.0],
155
+ target_stds=[1.0, 1.0, 1.0, 1.0]),
156
+ loss_cls=dict(
157
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
158
+ loss_bbox=dict(
159
+ type='SmoothL1Loss', beta=0.1111111111111111, loss_weight=1.0)),
160
+ roi_head=dict(
161
+ type='CascadeRoIHead',
162
+ num_stages=3,
163
+ stage_loss_weights=[1, 0.5, 0.25],
164
+ bbox_roi_extractor=dict(
165
+ type='SingleRoIExtractor',
166
+ roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
167
+ out_channels=256,
168
+ featmap_strides=[4, 8, 16, 32]),
169
+ bbox_head=[
170
+ dict(
171
+ type='Shared2FCBBoxHead',
172
+ in_channels=256,
173
+ fc_out_channels=1024,
174
+ roi_feat_size=7,
175
+ num_classes=20,
176
+ bbox_coder=dict(
177
+ type='DeltaXYWHBBoxCoder',
178
+ target_means=[0.0, 0.0, 0.0, 0.0],
179
+ target_stds=[0.1, 0.1, 0.2, 0.2]),
180
+ reg_class_agnostic=True,
181
+ loss_cls=dict(
182
+ type='CrossEntropyLoss',
183
+ use_sigmoid=False,
184
+ loss_weight=1.0),
185
+ loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
186
+ loss_weight=1.0)),
187
+ dict(
188
+ type='Shared2FCBBoxHead',
189
+ in_channels=256,
190
+ fc_out_channels=1024,
191
+ roi_feat_size=7,
192
+ num_classes=20,
193
+ bbox_coder=dict(
194
+ type='DeltaXYWHBBoxCoder',
195
+ target_means=[0.0, 0.0, 0.0, 0.0],
196
+ target_stds=[0.05, 0.05, 0.1, 0.1]),
197
+ reg_class_agnostic=True,
198
+ loss_cls=dict(
199
+ type='CrossEntropyLoss',
200
+ use_sigmoid=False,
201
+ loss_weight=1.0),
202
+ loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
203
+ loss_weight=1.0)),
204
+ dict(
205
+ type='Shared2FCBBoxHead',
206
+ in_channels=256,
207
+ fc_out_channels=1024,
208
+ roi_feat_size=7,
209
+ num_classes=20,
210
+ bbox_coder=dict(
211
+ type='DeltaXYWHBBoxCoder',
212
+ target_means=[0.0, 0.0, 0.0, 0.0],
213
+ target_stds=[0.033, 0.033, 0.067, 0.067]),
214
+ reg_class_agnostic=True,
215
+ loss_cls=dict(
216
+ type='CrossEntropyLoss',
217
+ use_sigmoid=False,
218
+ loss_weight=1.0),
219
+ loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
220
+ ]),
221
+ train_cfg=dict(
222
+ rpn=dict(
223
+ assigner=dict(
224
+ type='MaxIoUAssigner',
225
+ pos_iou_thr=0.7,
226
+ neg_iou_thr=0.3,
227
+ min_pos_iou=0.3,
228
+ match_low_quality=True,
229
+ ignore_iof_thr=-1),
230
+ sampler=dict(
231
+ type='RandomSampler',
232
+ num=256,
233
+ pos_fraction=0.5,
234
+ neg_pos_ub=-1,
235
+ add_gt_as_proposals=False),
236
+ allowed_border=0,
237
+ pos_weight=-1,
238
+ debug=False),
239
+ rpn_proposal=dict(
240
+ nms_pre=2000,
241
+ max_per_img=2000,
242
+ nms=dict(type='nms', iou_threshold=0.7),
243
+ min_bbox_size=0),
244
+ rcnn=[
245
+ dict(
246
+ assigner=dict(
247
+ type='MaxIoUAssigner',
248
+ pos_iou_thr=0.5,
249
+ neg_iou_thr=0.5,
250
+ min_pos_iou=0.5,
251
+ match_low_quality=False,
252
+ ignore_iof_thr=-1),
253
+ sampler=dict(
254
+ type='RandomSampler',
255
+ num=512,
256
+ pos_fraction=0.25,
257
+ neg_pos_ub=-1,
258
+ add_gt_as_proposals=True),
259
+ pos_weight=-1,
260
+ debug=False),
261
+ dict(
262
+ assigner=dict(
263
+ type='MaxIoUAssigner',
264
+ pos_iou_thr=0.6,
265
+ neg_iou_thr=0.6,
266
+ min_pos_iou=0.6,
267
+ match_low_quality=False,
268
+ ignore_iof_thr=-1),
269
+ sampler=dict(
270
+ type='RandomSampler',
271
+ num=512,
272
+ pos_fraction=0.25,
273
+ neg_pos_ub=-1,
274
+ add_gt_as_proposals=True),
275
+ pos_weight=-1,
276
+ debug=False),
277
+ dict(
278
+ assigner=dict(
279
+ type='MaxIoUAssigner',
280
+ pos_iou_thr=0.7,
281
+ neg_iou_thr=0.7,
282
+ min_pos_iou=0.7,
283
+ match_low_quality=False,
284
+ ignore_iof_thr=-1),
285
+ sampler=dict(
286
+ type='RandomSampler',
287
+ num=512,
288
+ pos_fraction=0.25,
289
+ neg_pos_ub=-1,
290
+ add_gt_as_proposals=True),
291
+ pos_weight=-1,
292
+ debug=False)
293
+ ]),
294
+ test_cfg=dict(
295
+ rpn=dict(
296
+ nms_pre=1000,
297
+ max_per_img=1000,
298
+ nms=dict(type='nms', iou_threshold=0.7),
299
+ min_bbox_size=0),
300
+ rcnn=dict(
301
+ score_thr=0.05,
302
+ nms=dict(type='nms', iou_threshold=0.5),
303
+ max_per_img=100)))
304
+ fp16 = dict(loss_scale=dict(init_scale=512))
305
+ work_dir = './work_dirs/dior_inst_tun_TMAug75_8k'
306
+ auto_resume = True
307
+ gpu_ids = range(0, 8)
308
+ device = 'cuda'
ckpts/vitp_diorr_orcnn_7508/20250918_082138.log ADDED
The diff for this file is too large to render. See raw diff
 
ckpts/vitp_diorr_orcnn_7508/epoch_12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efb778e73bb38524df174933dcb8ec40e778ed101f571810556942946b917148
3
+ size 1373279149
ckpts/vitp_diorr_orcnn_7508/vitp_diorr_orcnn.py ADDED
@@ -0,0 +1,311 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset_type = 'DIORRDataset'
2
+ data_root = '/defaultShare/pubdata/remote_sensing/DIOR/'
3
+ angle_version = 'le90'
4
+ img_norm_cfg = dict(
5
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6
+ train_pipeline = [
7
+ dict(type='LoadImageFromFile'),
8
+ dict(type='LoadAnnotations', with_bbox=True),
9
+ dict(type='RResize', img_scale=(1024, 1024)),
10
+ dict(
11
+ type='RRandomFlip',
12
+ flip_ratio=[0.25, 0.25, 0.25],
13
+ direction=['horizontal', 'vertical', 'diagonal'],
14
+ version='le90'),
15
+ dict(
16
+ type='PolyRandomRotate',
17
+ rotate_ratio=0.5,
18
+ angles_range=180,
19
+ auto_bound=False,
20
+ rect_classes=[5, 15, 19],
21
+ version='le90'),
22
+ dict(
23
+ type='Normalize',
24
+ mean=[123.675, 116.28, 103.53],
25
+ std=[58.395, 57.12, 57.375],
26
+ to_rgb=True),
27
+ dict(type='Pad', size_divisor=32),
28
+ dict(type='DefaultFormatBundle'),
29
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
30
+ ]
31
+ test_pipeline = [
32
+ dict(type='LoadImageFromFile'),
33
+ dict(
34
+ type='MultiScaleFlipAug',
35
+ img_scale=(1024, 1024),
36
+ flip=False,
37
+ transforms=[
38
+ dict(type='RResize'),
39
+ dict(
40
+ type='Normalize',
41
+ mean=[123.675, 116.28, 103.53],
42
+ std=[58.395, 57.12, 57.375],
43
+ to_rgb=True),
44
+ dict(type='Pad', size_divisor=32),
45
+ dict(type='DefaultFormatBundle'),
46
+ dict(type='Collect', keys=['img'])
47
+ ])
48
+ ]
49
+ data = dict(
50
+ samples_per_gpu=1,
51
+ workers_per_gpu=4,
52
+ train=dict(
53
+ type='DIORRDataset',
54
+ ann_file=[
55
+ '/defaultShare/pubdata/remote_sensing/DIOR/ImageSets/train.txt',
56
+ '/defaultShare/pubdata/remote_sensing/DIOR/ImageSets/val.txt'
57
+ ],
58
+ ann_subdir=
59
+ '/defaultShare/pubdata/remote_sensing/DIOR/Annotations/Oriented Bounding Boxes/',
60
+ img_subdir=
61
+ '/defaultShare/pubdata/remote_sensing/DIOR/JPEGImages/trainval/',
62
+ img_prefix=
63
+ '/defaultShare/pubdata/remote_sensing/DIOR/JPEGImages/trainval/',
64
+ pipeline=[
65
+ dict(type='LoadImageFromFile'),
66
+ dict(type='LoadAnnotations', with_bbox=True),
67
+ dict(type='RResize', img_scale=(1024, 1024)),
68
+ dict(
69
+ type='RRandomFlip',
70
+ flip_ratio=[0.25, 0.25, 0.25],
71
+ direction=['horizontal', 'vertical', 'diagonal'],
72
+ version='le90'),
73
+ dict(
74
+ type='PolyRandomRotate',
75
+ rotate_ratio=0.5,
76
+ angles_range=180,
77
+ auto_bound=False,
78
+ rect_classes=[5, 15, 19],
79
+ version='le90'),
80
+ dict(
81
+ type='Normalize',
82
+ mean=[123.675, 116.28, 103.53],
83
+ std=[58.395, 57.12, 57.375],
84
+ to_rgb=True),
85
+ dict(type='Pad', size_divisor=32),
86
+ dict(type='DefaultFormatBundle'),
87
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
88
+ ],
89
+ version='le90'),
90
+ val=dict(
91
+ type='DIORRDataset',
92
+ ann_file='/defaultShare/pubdata/remote_sensing/DIOR/ImageSets/test.txt',
93
+ ann_subdir=
94
+ '/defaultShare/pubdata/remote_sensing/DIOR/Annotations/Oriented Bounding Boxes/',
95
+ img_subdir='/defaultShare/pubdata/remote_sensing/DIOR/JPEGImages/test/',
96
+ img_prefix='/defaultShare/pubdata/remote_sensing/DIOR/JPEGImages/test/',
97
+ pipeline=[
98
+ dict(type='LoadImageFromFile'),
99
+ dict(
100
+ type='MultiScaleFlipAug',
101
+ img_scale=(1024, 1024),
102
+ flip=False,
103
+ transforms=[
104
+ dict(type='RResize'),
105
+ dict(
106
+ type='Normalize',
107
+ mean=[123.675, 116.28, 103.53],
108
+ std=[58.395, 57.12, 57.375],
109
+ to_rgb=True),
110
+ dict(type='Pad', size_divisor=32),
111
+ dict(type='DefaultFormatBundle'),
112
+ dict(type='Collect', keys=['img'])
113
+ ])
114
+ ],
115
+ version='le90'),
116
+ test=dict(
117
+ type='DIORRDataset',
118
+ ann_file='/defaultShare/pubdata/remote_sensing/DIOR/ImageSets/test.txt',
119
+ ann_subdir=
120
+ '/defaultShare/pubdata/remote_sensing/DIOR/Annotations/Oriented Bounding Boxes/',
121
+ img_subdir='/defaultShare/pubdata/remote_sensing/DIOR/JPEGImages/test/',
122
+ img_prefix='/defaultShare/pubdata/remote_sensing/DIOR/JPEGImages/test/',
123
+ pipeline=[
124
+ dict(type='LoadImageFromFile'),
125
+ dict(
126
+ type='MultiScaleFlipAug',
127
+ img_scale=(1024, 1024),
128
+ flip=False,
129
+ transforms=[
130
+ dict(type='RResize'),
131
+ dict(
132
+ type='Normalize',
133
+ mean=[123.675, 116.28, 103.53],
134
+ std=[58.395, 57.12, 57.375],
135
+ to_rgb=True),
136
+ dict(type='Pad', size_divisor=32),
137
+ dict(type='DefaultFormatBundle'),
138
+ dict(type='Collect', keys=['img'])
139
+ ])
140
+ ],
141
+ version='le90'))
142
+ evaluation = dict(interval=1, metric='mAP')
143
+ optimizer = dict(
144
+ type='AdamW',
145
+ lr=2.5e-05,
146
+ betas=(0.9, 0.999),
147
+ weight_decay=0.05,
148
+ constructor='InternViTAdapterLayerDecayOptimizerConstructor',
149
+ paramwise_cfg=dict(num_layers=24, layer_decay_rate=0.95))
150
+ optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
151
+ lr_config = dict(
152
+ policy='step',
153
+ warmup='linear',
154
+ warmup_iters=500,
155
+ warmup_ratio=0.3333333333333333,
156
+ step=[8, 11])
157
+ runner = dict(type='EpochBasedRunner', max_epochs=12)
158
+ checkpoint_config = dict(interval=1, max_keep_ckpts=1)
159
+ log_config = dict(interval=500, hooks=[dict(type='TextLoggerHook')])
160
+ dist_params = dict(backend='nccl')
161
+ log_level = 'INFO'
162
+ load_from = None
163
+ resume_from = None
164
+ workflow = [('train', 1)]
165
+ opencv_num_threads = 0
166
+ mp_start_method = 'fork'
167
+ pretrained = 'pretrained/ft_full_1b_8ksteps_instruct_tuning_as_pretrain_TMAug75.safetensors'
168
+ norm_cfg = dict(type='LN', requires_grad=True)
169
+ model = dict(
170
+ type='OrientedRCNN',
171
+ backbone=dict(
172
+ type='InternViTAdapter',
173
+ pretrain_size=448,
174
+ img_size=1024,
175
+ patch_size=16,
176
+ embed_dim=1024,
177
+ depth=24,
178
+ num_heads=16,
179
+ mlp_ratio=4.0,
180
+ drop_path_rate=0.1,
181
+ init_values=0.1,
182
+ with_cp=True,
183
+ use_flash_attn=True,
184
+ qk_normalization=False,
185
+ layerscale_force_fp32=False,
186
+ with_fpn=False,
187
+ freeze_vit=False,
188
+ use_final_norm=True,
189
+ interaction_indexes=[[0, 7], [8, 11], [12, 15], [16, 23]],
190
+ cffn_ratio=0.25,
191
+ deform_ratio=0.25,
192
+ qkv_bias=True,
193
+ norm_type='layer_norm',
194
+ pretrained=
195
+ 'pretrained/ft_full_1b_8ksteps_instruct_tuning_as_pretrain_TMAug75.safetensors',
196
+ pretrained_type='full',
197
+ only_feat_out=True),
198
+ neck=dict(
199
+ type='SimpleFPN',
200
+ in_channels=[1024, 1024, 1024, 1024],
201
+ out_channels=256,
202
+ norm_cfg=dict(type='LN', requires_grad=True),
203
+ use_residual=False,
204
+ num_outs=5),
205
+ rpn_head=dict(
206
+ type='OrientedRPNHead',
207
+ in_channels=256,
208
+ feat_channels=256,
209
+ version='le90',
210
+ anchor_generator=dict(
211
+ type='AnchorGenerator',
212
+ scales=[8],
213
+ ratios=[0.5, 1.0, 2.0],
214
+ strides=[4, 8, 16, 32, 64]),
215
+ bbox_coder=dict(
216
+ type='MidpointOffsetCoder',
217
+ angle_range='le90',
218
+ target_means=[0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
219
+ target_stds=[1.0, 1.0, 1.0, 1.0, 0.5, 0.5]),
220
+ loss_cls=dict(
221
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
222
+ loss_bbox=dict(
223
+ type='SmoothL1Loss', beta=0.1111111111111111, loss_weight=1.0)),
224
+ roi_head=dict(
225
+ type='OrientedStandardRoIHead',
226
+ bbox_roi_extractor=dict(
227
+ type='RotatedSingleRoIExtractor',
228
+ roi_layer=dict(
229
+ type='RoIAlignRotated',
230
+ out_size=7,
231
+ sample_num=2,
232
+ clockwise=True),
233
+ out_channels=256,
234
+ featmap_strides=[4, 8, 16, 32]),
235
+ bbox_head=dict(
236
+ type='RotatedShared2FCBBoxHead',
237
+ in_channels=256,
238
+ fc_out_channels=1024,
239
+ roi_feat_size=7,
240
+ num_classes=20,
241
+ bbox_coder=dict(
242
+ type='DeltaXYWHAOBBoxCoder',
243
+ angle_range='le90',
244
+ norm_factor=None,
245
+ edge_swap=True,
246
+ proj_xy=True,
247
+ target_means=(0.0, 0.0, 0.0, 0.0, 0.0),
248
+ target_stds=(0.1, 0.1, 0.2, 0.2, 0.1)),
249
+ reg_class_agnostic=True,
250
+ loss_cls=dict(
251
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
252
+ loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))),
253
+ train_cfg=dict(
254
+ rpn=dict(
255
+ assigner=dict(
256
+ type='MaxIoUAssigner',
257
+ pos_iou_thr=0.7,
258
+ neg_iou_thr=0.3,
259
+ min_pos_iou=0.3,
260
+ match_low_quality=True,
261
+ gpu_assign_thr=800,
262
+ ignore_iof_thr=-1),
263
+ sampler=dict(
264
+ type='RandomSampler',
265
+ num=256,
266
+ pos_fraction=0.5,
267
+ neg_pos_ub=-1,
268
+ add_gt_as_proposals=False),
269
+ allowed_border=0,
270
+ pos_weight=-1,
271
+ debug=False),
272
+ rpn_proposal=dict(
273
+ nms_pre=2000,
274
+ max_per_img=2000,
275
+ nms=dict(type='nms', iou_threshold=0.8),
276
+ min_bbox_size=0),
277
+ rcnn=dict(
278
+ assigner=dict(
279
+ type='MaxIoUAssigner',
280
+ pos_iou_thr=0.5,
281
+ neg_iou_thr=0.5,
282
+ min_pos_iou=0.5,
283
+ match_low_quality=False,
284
+ gpu_assign_thr=800,
285
+ iou_calculator=dict(type='RBboxOverlaps2D'),
286
+ ignore_iof_thr=-1),
287
+ sampler=dict(
288
+ type='RRandomSampler',
289
+ num=512,
290
+ pos_fraction=0.25,
291
+ neg_pos_ub=-1,
292
+ add_gt_as_proposals=True),
293
+ pos_weight=-1,
294
+ debug=False)),
295
+ test_cfg=dict(
296
+ rpn=dict(
297
+ nms_pre=2000,
298
+ max_per_img=2000,
299
+ nms=dict(type='nms', iou_threshold=0.8),
300
+ min_bbox_size=0),
301
+ rcnn=dict(
302
+ nms_pre=2000,
303
+ min_bbox_size=0,
304
+ score_thr=0.05,
305
+ nms=dict(iou_thr=0.1),
306
+ max_per_img=2000)))
307
+ fp16 = dict(loss_scale=dict(init_scale=512))
308
+ work_dir = './work_dirs/diorr_inst_tun_TMAug75_8k'
309
+ auto_resume = False
310
+ gpu_ids = range(0, 8)
311
+ device = 'cuda'
ckpts/vitp_dotav2_orcnn_6073/20250726_012424.log ADDED
The diff for this file is too large to render. See raw diff
 
ckpts/vitp_dotav2_orcnn_6073/20250726_012424.log.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"env_info": "sys.platform: linux\nPython: 3.10.16 (main, Dec 11 2024, 16:24:50) [GCC 11.2.0]\nCUDA available: True\nGPU 0,1,2,3,4,5,6,7: NVIDIA GeForce RTX 3090\nCUDA_HOME: /usr/local/cuda-11\nNVCC: Cuda compilation tools, release 11.4, V11.4.120\nGCC: gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0\nPyTorch: 1.12.0\nPyTorch compiling details: PyTorch built with:\n - GCC 9.3\n - C++ Version: 201402\n - Intel(R) oneAPI Math Kernel Library Version 2024.0-Product Build 20231011 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v2.6.0 (Git Hash 52b5f107dd9cf10910aaa19cb47f3abf9b349815)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 11.3\n - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_37,code=compute_37\n - CuDNN 8.3.2 (built against CUDA 11.5)\n - Magma 2.5.2\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.3, CUDNN_VERSION=8.3.2, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.12.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=OFF, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, \n\nTorchVision: 0.13.0\nOpenCV: 4.11.0\nMMCV: 1.6.1\nMMCV Compiler: GCC 9.3\nMMCV CUDA Compiler: 11.4\nMMRotate: 0.3.4+6fc0c4e", "config": "dataset_type = 'DOTAv2Dataset'\ndata_root = '/defaultShare/pubdata/remote_sensing/dota_v2/'\nangle_version = 'le90'\nimg_norm_cfg = dict(\n mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n dict(type='LoadImageFromFile'),\n dict(type='LoadAnnotations', with_bbox=True),\n dict(type='RResize', img_scale=(1024, 1024)),\n dict(\n type='RRandomFlip',\n flip_ratio=[0.25, 0.25, 0.25],\n direction=['horizontal', 'vertical', 'diagonal'],\n version='le90'),\n dict(\n type='PolyRandomRotate',\n rotate_ratio=0.5,\n angles_range=180,\n auto_bound=False,\n rect_classes=[9, 11, 16],\n version='le90'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size_divisor=32),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])\n]\ntest_pipeline = [\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(1024, 1024),\n flip=False,\n transforms=[\n dict(type='RResize'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size_divisor=32),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img'])\n ])\n]\ndata = dict(\n samples_per_gpu=1,\n workers_per_gpu=4,\n train=dict(\n type='DOTAv2Dataset',\n ann_file=\n '/defaultShare/pubdata/remote_sensing/dota_v2/split_ss_dota/trainval/annfiles/',\n img_prefix=\n '/defaultShare/pubdata/remote_sensing/dota_v2/split_ss_dota/trainval/images/',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(type='LoadAnnotations', with_bbox=True),\n dict(type='RResize', img_scale=(1024, 1024)),\n dict(\n type='RRandomFlip',\n flip_ratio=[0.25, 0.25, 0.25],\n direction=['horizontal', 'vertical', 'diagonal'],\n version='le90'),\n dict(\n type='PolyRandomRotate',\n rotate_ratio=0.5,\n angles_range=180,\n auto_bound=False,\n rect_classes=[9, 11, 16],\n version='le90'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size_divisor=32),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])\n ],\n version='le90'),\n val=dict(\n type='DOTAv2Dataset',\n ann_file=\n '/defaultShare/pubdata/remote_sensing/dota_v2/split_ss_dota/val/annfiles/',\n img_prefix=\n '/defaultShare/pubdata/remote_sensing/dota_v2/split_ss_dota/val/images/',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(1024, 1024),\n flip=False,\n transforms=[\n dict(type='RResize'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size_divisor=32),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img'])\n ])\n ],\n version='le90'),\n test=dict(\n type='DOTAv2Dataset',\n ann_file=\n '/defaultShare/pubdata/remote_sensing/dota_v2/split_ss_dota/test/images/',\n img_prefix=\n '/defaultShare/pubdata/remote_sensing/dota_v2/split_ss_dota/test/images/',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(1024, 1024),\n flip=False,\n transforms=[\n dict(type='RResize'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size_divisor=32),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img'])\n ])\n ],\n version='le90'))\nevaluation = dict(interval=1, metric='mAP')\noptimizer = dict(\n type='AdamW',\n lr=2.5e-05,\n betas=(0.9, 0.999),\n weight_decay=0.05,\n constructor='InternViTAdapterLayerDecayOptimizerConstructor',\n paramwise_cfg=dict(num_layers=24, layer_decay_rate=0.95))\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\nlr_config = dict(\n policy='step',\n warmup='linear',\n warmup_iters=500,\n warmup_ratio=0.3333333333333333,\n step=[8, 11])\nrunner = dict(type='EpochBasedRunner', max_epochs=12)\ncheckpoint_config = dict(interval=1, max_keep_ckpts=1)\nlog_config = dict(interval=500, hooks=[dict(type='TextLoggerHook')])\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\nopencv_num_threads = 0\nmp_start_method = 'fork'\npretrained = 'pretrained/ft_full_1b_8ksteps_instruct_tuning_as_pretrain_TMAug75.safetensors'\nnorm_cfg = dict(type='LN', requires_grad=True)\nmodel = dict(\n type='OrientedRCNN',\n backbone=dict(\n type='InternViTAdapter',\n pretrain_size=448,\n img_size=1024,\n patch_size=16,\n embed_dim=1024,\n depth=24,\n num_heads=16,\n mlp_ratio=4.0,\n drop_path_rate=0.1,\n init_values=0.1,\n with_cp=True,\n use_flash_attn=True,\n qk_normalization=False,\n layerscale_force_fp32=False,\n with_fpn=False,\n freeze_vit=False,\n use_final_norm=True,\n interaction_indexes=[[0, 7], [8, 11], [12, 15], [16, 23]],\n cffn_ratio=0.25,\n deform_ratio=0.25,\n qkv_bias=True,\n norm_type='layer_norm',\n pretrained=\n 'pretrained/ft_full_1b_8ksteps_instruct_tuning_as_pretrain_TMAug75.safetensors',\n pretrained_type='full',\n only_feat_out=True),\n neck=dict(\n type='SimpleFPN',\n in_channels=[1024, 1024, 1024, 1024],\n out_channels=256,\n norm_cfg=dict(type='LN', requires_grad=True),\n use_residual=False,\n num_outs=5),\n rpn_head=dict(\n type='OrientedRPNHead',\n in_channels=256,\n feat_channels=256,\n version='le90',\n anchor_generator=dict(\n type='AnchorGenerator',\n scales=[8],\n ratios=[0.5, 1.0, 2.0],\n strides=[4, 8, 16, 32, 64]),\n bbox_coder=dict(\n type='MidpointOffsetCoder',\n angle_range='le90',\n target_means=[0.0, 0.0, 0.0, 0.0, 0.0, 0.0],\n target_stds=[1.0, 1.0, 1.0, 1.0, 0.5, 0.5]),\n loss_cls=dict(\n type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n loss_bbox=dict(\n type='SmoothL1Loss', beta=0.1111111111111111, loss_weight=1.0)),\n roi_head=dict(\n type='OrientedStandardRoIHead',\n bbox_roi_extractor=dict(\n type='RotatedSingleRoIExtractor',\n roi_layer=dict(\n type='RoIAlignRotated',\n out_size=7,\n sample_num=2,\n clockwise=True),\n out_channels=256,\n featmap_strides=[4, 8, 16, 32]),\n bbox_head=dict(\n type='RotatedShared2FCBBoxHead',\n in_channels=256,\n fc_out_channels=1024,\n roi_feat_size=7,\n num_classes=18,\n bbox_coder=dict(\n type='DeltaXYWHAOBBoxCoder',\n angle_range='le90',\n norm_factor=None,\n edge_swap=True,\n proj_xy=True,\n target_means=(0.0, 0.0, 0.0, 0.0, 0.0),\n target_stds=(0.1, 0.1, 0.2, 0.2, 0.1)),\n reg_class_agnostic=True,\n loss_cls=dict(\n type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))),\n train_cfg=dict(\n rpn=dict(\n assigner=dict(\n type='MaxIoUAssigner',\n pos_iou_thr=0.7,\n neg_iou_thr=0.3,\n min_pos_iou=0.3,\n match_low_quality=True,\n gpu_assign_thr=1000,\n ignore_iof_thr=-1),\n sampler=dict(\n type='RandomSampler',\n num=256,\n pos_fraction=0.5,\n neg_pos_ub=-1,\n add_gt_as_proposals=False),\n allowed_border=0,\n pos_weight=-1,\n debug=False),\n rpn_proposal=dict(\n nms_pre=2000,\n max_per_img=2000,\n nms=dict(type='nms', iou_threshold=0.8),\n min_bbox_size=0),\n rcnn=dict(\n assigner=dict(\n type='MaxIoUAssigner',\n pos_iou_thr=0.5,\n neg_iou_thr=0.5,\n min_pos_iou=0.5,\n match_low_quality=False,\n gpu_assign_thr=1000,\n iou_calculator=dict(type='RBboxOverlaps2D'),\n ignore_iof_thr=-1),\n sampler=dict(\n type='RRandomSampler',\n num=512,\n pos_fraction=0.25,\n neg_pos_ub=-1,\n add_gt_as_proposals=True),\n pos_weight=-1,\n debug=False)),\n test_cfg=dict(\n rpn=dict(\n nms_pre=2000,\n max_per_img=2000,\n nms=dict(type='nms', iou_threshold=0.8),\n min_bbox_size=0),\n rcnn=dict(\n nms_pre=2000,\n min_bbox_size=0,\n score_thr=0.05,\n nms=dict(iou_thr=0.1),\n max_per_img=2000)))\nfp16 = dict(loss_scale=dict(init_scale=512))\nwork_dir = './work_dirs/dotav2_ss_inst_tun_TMAug75_orcnn_8k_submit_fixed'\nauto_resume = False\ngpu_ids = range(0, 8)\ndevice = 'cuda'\n", "seed": 0, "exp_name": "dotav2_ss_inst_tun_TMAug75_orcnn_8k_submit_fixed.py"}
2
+ {"mode": "train", "epoch": 1, "iter": 500, "lr": 1e-05, "memory": 17876, "data_time": 0.01321, "loss_rpn_cls": 0.22962, "loss_rpn_bbox": 0.23782, "loss_cls": 0.2992, "acc": 91.19155, "loss_bbox": 0.36975, "loss": 1.13638, "grad_norm": 6.84031, "time": 0.89729}
3
+ {"mode": "train", "epoch": 1, "iter": 1000, "lr": 1e-05, "memory": 18434, "data_time": 0.00449, "loss_rpn_cls": 0.06197, "loss_rpn_bbox": 0.13979, "loss_cls": 0.27737, "acc": 89.68506, "loss_bbox": 0.32214, "loss": 0.80126, "grad_norm": 8.01017, "time": 0.92367}
4
+ {"mode": "train", "epoch": 1, "iter": 1500, "lr": 1e-05, "memory": 18674, "data_time": 0.00473, "loss_rpn_cls": 0.04664, "loss_rpn_bbox": 0.10544, "loss_cls": 0.25047, "acc": 90.28662, "loss_bbox": 0.28368, "loss": 0.68623, "grad_norm": 7.65981, "time": 0.93148}
5
+ {"mode": "train", "epoch": 1, "iter": 2000, "lr": 1e-05, "memory": 18674, "data_time": 0.0046, "loss_rpn_cls": 0.04112, "loss_rpn_bbox": 0.09466, "loss_cls": 0.23303, "acc": 90.85869, "loss_bbox": 0.2537, "loss": 0.62252, "grad_norm": 7.42095, "time": 0.88799}
6
+ {"mode": "val", "epoch": 1, "iter": 507, "lr": 1e-05, "mAP": 0.55798}
7
+ {"mode": "train", "epoch": 2, "iter": 500, "lr": 1e-05, "memory": 18674, "data_time": 0.01054, "loss_rpn_cls": 0.03452, "loss_rpn_bbox": 0.08228, "loss_cls": 0.21041, "acc": 91.63521, "loss_bbox": 0.22821, "loss": 0.55542, "grad_norm": 6.93511, "time": 0.86113}
8
+ {"mode": "train", "epoch": 2, "iter": 1000, "lr": 1e-05, "memory": 18674, "data_time": 0.00488, "loss_rpn_cls": 0.0323, "loss_rpn_bbox": 0.07856, "loss_cls": 0.20609, "acc": 91.78198, "loss_bbox": 0.21681, "loss": 0.53376, "grad_norm": 6.61931, "time": 0.96748}
9
+ {"mode": "train", "epoch": 2, "iter": 1500, "lr": 1e-05, "memory": 18674, "data_time": 0.00467, "loss_rpn_cls": 0.03153, "loss_rpn_bbox": 0.07904, "loss_cls": 0.19671, "acc": 92.2127, "loss_bbox": 0.20355, "loss": 0.51084, "grad_norm": 6.5069, "time": 0.88819}
10
+ {"mode": "train", "epoch": 2, "iter": 2000, "lr": 1e-05, "memory": 18674, "data_time": 0.0045, "loss_rpn_cls": 0.02838, "loss_rpn_bbox": 0.07284, "loss_cls": 0.18933, "acc": 92.44087, "loss_bbox": 0.20238, "loss": 0.49292, "grad_norm": 6.43785, "time": 0.88837}
11
+ {"mode": "val", "epoch": 2, "iter": 507, "lr": 1e-05, "mAP": 0.67134}
12
+ {"mode": "train", "epoch": 3, "iter": 500, "lr": 1e-05, "memory": 18674, "data_time": 0.01028, "loss_rpn_cls": 0.02596, "loss_rpn_bbox": 0.06967, "loss_cls": 0.17774, "acc": 92.91035, "loss_bbox": 0.19083, "loss": 0.46419, "grad_norm": 6.16322, "time": 0.93415}
13
+ {"mode": "train", "epoch": 3, "iter": 1000, "lr": 1e-05, "memory": 18674, "data_time": 0.0049, "loss_rpn_cls": 0.0243, "loss_rpn_bbox": 0.07143, "loss_cls": 0.17614, "acc": 92.94189, "loss_bbox": 0.18816, "loss": 0.46003, "grad_norm": 6.08296, "time": 0.85458}
14
+ {"mode": "train", "epoch": 3, "iter": 1500, "lr": 1e-05, "memory": 18674, "data_time": 0.00555, "loss_rpn_cls": 0.02326, "loss_rpn_bbox": 0.07019, "loss_cls": 0.17277, "acc": 93.06279, "loss_bbox": 0.18304, "loss": 0.44925, "grad_norm": Infinity, "time": 0.93356}
15
+ {"mode": "train", "epoch": 3, "iter": 2000, "lr": 1e-05, "memory": 18674, "data_time": 0.00419, "loss_rpn_cls": 0.02175, "loss_rpn_bbox": 0.06569, "loss_cls": 0.16608, "acc": 93.34463, "loss_bbox": 0.17497, "loss": 0.42848, "grad_norm": 5.86902, "time": 0.93196}
16
+ {"mode": "val", "epoch": 3, "iter": 507, "lr": 1e-05, "mAP": 0.73974}
17
+ {"mode": "train", "epoch": 4, "iter": 500, "lr": 1e-05, "memory": 18674, "data_time": 0.01021, "loss_rpn_cls": 0.0207, "loss_rpn_bbox": 0.06478, "loss_cls": 0.1601, "acc": 93.57822, "loss_bbox": 0.1722, "loss": 0.41777, "grad_norm": 5.68575, "time": 0.83728}
18
+ {"mode": "train", "epoch": 4, "iter": 1000, "lr": 1e-05, "memory": 18674, "data_time": 0.00434, "loss_rpn_cls": 0.01994, "loss_rpn_bbox": 0.06668, "loss_cls": 0.15932, "acc": 93.60884, "loss_bbox": 0.16885, "loss": 0.41479, "grad_norm": 5.81053, "time": 0.88715}
19
+ {"mode": "train", "epoch": 4, "iter": 1500, "lr": 1e-05, "memory": 18674, "data_time": 0.00427, "loss_rpn_cls": 0.01936, "loss_rpn_bbox": 0.06142, "loss_cls": 0.15656, "acc": 93.70991, "loss_bbox": 0.16712, "loss": 0.40445, "grad_norm": 5.80572, "time": 0.92355}
20
+ {"mode": "train", "epoch": 4, "iter": 2000, "lr": 1e-05, "memory": 18674, "data_time": 0.0045, "loss_rpn_cls": 0.01946, "loss_rpn_bbox": 0.06579, "loss_cls": 0.15377, "acc": 93.8209, "loss_bbox": 0.16848, "loss": 0.4075, "grad_norm": 5.65134, "time": 0.91218}
21
+ {"mode": "val", "epoch": 4, "iter": 507, "lr": 1e-05, "mAP": 0.77918}
22
+ {"mode": "train", "epoch": 5, "iter": 500, "lr": 1e-05, "memory": 18674, "data_time": 0.01137, "loss_rpn_cls": 0.01661, "loss_rpn_bbox": 0.06183, "loss_cls": 0.14519, "acc": 94.13652, "loss_bbox": 0.16133, "loss": 0.38495, "grad_norm": 5.58271, "time": 0.83983}
23
+ {"mode": "train", "epoch": 5, "iter": 1000, "lr": 1e-05, "memory": 18674, "data_time": 0.00575, "loss_rpn_cls": 0.01661, "loss_rpn_bbox": 0.05994, "loss_cls": 0.14637, "acc": 94.13779, "loss_bbox": 0.15861, "loss": 0.38153, "grad_norm": 5.50105, "time": 0.90623}
24
+ {"mode": "train", "epoch": 5, "iter": 1500, "lr": 1e-05, "memory": 18684, "data_time": 0.00576, "loss_rpn_cls": 0.01698, "loss_rpn_bbox": 0.05849, "loss_cls": 0.1446, "acc": 94.17349, "loss_bbox": 0.1583, "loss": 0.37836, "grad_norm": 5.34356, "time": 0.88473}
25
+ {"mode": "train", "epoch": 5, "iter": 2000, "lr": 1e-05, "memory": 18684, "data_time": 0.00543, "loss_rpn_cls": 0.01642, "loss_rpn_bbox": 0.05949, "loss_cls": 0.14323, "acc": 94.2292, "loss_bbox": 0.1564, "loss": 0.37555, "grad_norm": 5.26925, "time": 0.93525}
26
+ {"mode": "val", "epoch": 5, "iter": 507, "lr": 1e-05, "mAP": 0.77819}
27
+ {"mode": "train", "epoch": 6, "iter": 500, "lr": 1e-05, "memory": 18684, "data_time": 0.01102, "loss_rpn_cls": 0.01521, "loss_rpn_bbox": 0.05975, "loss_cls": 0.14136, "acc": 94.25312, "loss_bbox": 0.15807, "loss": 0.37439, "grad_norm": 5.27262, "time": 0.84039}
28
+ {"mode": "train", "epoch": 6, "iter": 1000, "lr": 1e-05, "memory": 18684, "data_time": 0.00541, "loss_rpn_cls": 0.01487, "loss_rpn_bbox": 0.05635, "loss_cls": 0.13433, "acc": 94.55645, "loss_bbox": 0.14917, "loss": 0.35473, "grad_norm": Infinity, "time": 0.94292}
29
+ {"mode": "train", "epoch": 6, "iter": 1500, "lr": 1e-05, "memory": 18684, "data_time": 0.00517, "loss_rpn_cls": 0.01424, "loss_rpn_bbox": 0.05669, "loss_cls": 0.13325, "acc": 94.6062, "loss_bbox": 0.14822, "loss": 0.35239, "grad_norm": 5.11383, "time": 0.94904}
30
+ {"mode": "train", "epoch": 6, "iter": 2000, "lr": 1e-05, "memory": 18684, "data_time": 0.00494, "loss_rpn_cls": 0.01532, "loss_rpn_bbox": 0.05812, "loss_cls": 0.1349, "acc": 94.57983, "loss_bbox": 0.14951, "loss": 0.35784, "grad_norm": 5.09458, "time": 0.8966}
31
+ {"mode": "val", "epoch": 6, "iter": 507, "lr": 1e-05, "mAP": 0.78846}
32
+ {"mode": "train", "epoch": 7, "iter": 500, "lr": 1e-05, "memory": 18684, "data_time": 0.0115, "loss_rpn_cls": 0.01331, "loss_rpn_bbox": 0.05755, "loss_cls": 0.12985, "acc": 94.76611, "loss_bbox": 0.14636, "loss": 0.34707, "grad_norm": 5.24194, "time": 0.86415}
33
+ {"mode": "train", "epoch": 7, "iter": 1000, "lr": 1e-05, "memory": 18684, "data_time": 0.00517, "loss_rpn_cls": 0.01366, "loss_rpn_bbox": 0.05277, "loss_cls": 0.12833, "acc": 94.75073, "loss_bbox": 0.14497, "loss": 0.33974, "grad_norm": 5.13276, "time": 0.88887}
34
+ {"mode": "train", "epoch": 7, "iter": 1500, "lr": 1e-05, "memory": 18684, "data_time": 0.00541, "loss_rpn_cls": 0.01313, "loss_rpn_bbox": 0.05564, "loss_cls": 0.12753, "acc": 94.81455, "loss_bbox": 0.14402, "loss": 0.34032, "grad_norm": Infinity, "time": 0.93935}
35
+ {"mode": "train", "epoch": 7, "iter": 2000, "lr": 1e-05, "memory": 18684, "data_time": 0.00511, "loss_rpn_cls": 0.01303, "loss_rpn_bbox": 0.05475, "loss_cls": 0.13166, "acc": 94.66523, "loss_bbox": 0.14399, "loss": 0.34343, "grad_norm": 5.21322, "time": 0.8627}
36
+ {"mode": "val", "epoch": 7, "iter": 507, "lr": 1e-05, "mAP": 0.81797}
37
+ {"mode": "train", "epoch": 8, "iter": 500, "lr": 1e-05, "memory": 18684, "data_time": 0.01038, "loss_rpn_cls": 0.01266, "loss_rpn_bbox": 0.05059, "loss_cls": 0.12186, "acc": 95.03833, "loss_bbox": 0.14016, "loss": 0.32527, "grad_norm": 4.9427, "time": 0.9485}
38
+ {"mode": "train", "epoch": 8, "iter": 1000, "lr": 1e-05, "memory": 18684, "data_time": 0.005, "loss_rpn_cls": 0.01225, "loss_rpn_bbox": 0.05314, "loss_cls": 0.1225, "acc": 94.99727, "loss_bbox": 0.1376, "loss": 0.32549, "grad_norm": 4.87507, "time": 0.90573}
39
+ {"mode": "train", "epoch": 8, "iter": 1500, "lr": 1e-05, "memory": 18684, "data_time": 0.00499, "loss_rpn_cls": 0.01257, "loss_rpn_bbox": 0.052, "loss_cls": 0.12513, "acc": 94.90845, "loss_bbox": 0.1408, "loss": 0.3305, "grad_norm": 4.96171, "time": 0.86173}
40
+ {"mode": "train", "epoch": 8, "iter": 2000, "lr": 1e-05, "memory": 18684, "data_time": 0.00506, "loss_rpn_cls": 0.01301, "loss_rpn_bbox": 0.05485, "loss_cls": 0.12459, "acc": 94.92754, "loss_bbox": 0.14242, "loss": 0.33486, "grad_norm": 4.92717, "time": 0.95854}
41
+ {"mode": "val", "epoch": 8, "iter": 507, "lr": 1e-05, "mAP": 0.82038}
42
+ {"mode": "train", "epoch": 9, "iter": 500, "lr": 0.0, "memory": 18684, "data_time": 0.01025, "loss_rpn_cls": 0.01003, "loss_rpn_bbox": 0.04765, "loss_cls": 0.11069, "acc": 95.49946, "loss_bbox": 0.12562, "loss": 0.29398, "grad_norm": 4.3049, "time": 0.91349}
43
+ {"mode": "train", "epoch": 9, "iter": 1000, "lr": 0.0, "memory": 18684, "data_time": 0.00495, "loss_rpn_cls": 0.01021, "loss_rpn_bbox": 0.04601, "loss_cls": 0.10919, "acc": 95.51021, "loss_bbox": 0.12339, "loss": 0.2888, "grad_norm": 4.20204, "time": 0.8631}
44
+ {"mode": "train", "epoch": 9, "iter": 1500, "lr": 0.0, "memory": 18684, "data_time": 0.00576, "loss_rpn_cls": 0.00961, "loss_rpn_bbox": 0.04675, "loss_cls": 0.10608, "acc": 95.65625, "loss_bbox": 0.1228, "loss": 0.28525, "grad_norm": 4.11367, "time": 0.89445}
45
+ {"mode": "train", "epoch": 9, "iter": 2000, "lr": 0.0, "memory": 18684, "data_time": 0.00502, "loss_rpn_cls": 0.00968, "loss_rpn_bbox": 0.04612, "loss_cls": 0.10711, "acc": 95.61128, "loss_bbox": 0.12204, "loss": 0.28495, "grad_norm": 4.12107, "time": 0.93494}
46
+ {"mode": "val", "epoch": 9, "iter": 507, "lr": 0.0, "mAP": 0.83246}
47
+ {"mode": "train", "epoch": 10, "iter": 500, "lr": 0.0, "memory": 18722, "data_time": 0.0103, "loss_rpn_cls": 0.009, "loss_rpn_bbox": 0.04503, "loss_cls": 0.1037, "acc": 95.71821, "loss_bbox": 0.11915, "loss": 0.27688, "grad_norm": NaN, "time": 0.86487}
48
+ {"mode": "train", "epoch": 10, "iter": 1000, "lr": 0.0, "memory": 18722, "data_time": 0.00489, "loss_rpn_cls": 0.00899, "loss_rpn_bbox": 0.04443, "loss_cls": 0.10421, "acc": 95.70728, "loss_bbox": 0.12051, "loss": 0.27814, "grad_norm": 4.01737, "time": 0.92096}
49
+ {"mode": "train", "epoch": 10, "iter": 1500, "lr": 0.0, "memory": 18722, "data_time": 0.00489, "loss_rpn_cls": 0.00916, "loss_rpn_bbox": 0.04545, "loss_cls": 0.10351, "acc": 95.72437, "loss_bbox": 0.11887, "loss": 0.27699, "grad_norm": 4.11703, "time": 0.93527}
50
+ {"mode": "train", "epoch": 10, "iter": 2000, "lr": 0.0, "memory": 18722, "data_time": 0.00452, "loss_rpn_cls": 0.00918, "loss_rpn_bbox": 0.04553, "loss_cls": 0.10346, "acc": 95.7481, "loss_bbox": 0.11914, "loss": 0.27731, "grad_norm": 4.26776, "time": 0.9228}
51
+ {"mode": "val", "epoch": 10, "iter": 507, "lr": 0.0, "mAP": 0.83925}
52
+ {"mode": "train", "epoch": 11, "iter": 500, "lr": 0.0, "memory": 18722, "data_time": 0.0105, "loss_rpn_cls": 0.00882, "loss_rpn_bbox": 0.04445, "loss_cls": 0.09972, "acc": 95.8978, "loss_bbox": 0.11679, "loss": 0.26977, "grad_norm": 4.12779, "time": 0.93442}
53
+ {"mode": "train", "epoch": 11, "iter": 1000, "lr": 0.0, "memory": 18722, "data_time": 0.0052, "loss_rpn_cls": 0.00876, "loss_rpn_bbox": 0.04516, "loss_cls": 0.10168, "acc": 95.81157, "loss_bbox": 0.11813, "loss": 0.27373, "grad_norm": 4.12723, "time": 0.89153}
54
+ {"mode": "train", "epoch": 11, "iter": 1500, "lr": 0.0, "memory": 18722, "data_time": 0.00459, "loss_rpn_cls": 0.00864, "loss_rpn_bbox": 0.04431, "loss_cls": 0.09949, "acc": 95.87734, "loss_bbox": 0.11514, "loss": 0.26758, "grad_norm": 4.13996, "time": 0.90387}
55
+ {"mode": "train", "epoch": 11, "iter": 2000, "lr": 0.0, "memory": 18722, "data_time": 0.00435, "loss_rpn_cls": 0.00881, "loss_rpn_bbox": 0.04572, "loss_cls": 0.1016, "acc": 95.80464, "loss_bbox": 0.11718, "loss": 0.27331, "grad_norm": 4.13149, "time": 0.86303}
56
+ {"mode": "val", "epoch": 11, "iter": 507, "lr": 0.0, "mAP": 0.84054}
57
+ {"mode": "train", "epoch": 12, "iter": 500, "lr": 0.0, "memory": 18722, "data_time": 0.01022, "loss_rpn_cls": 0.00828, "loss_rpn_bbox": 0.0456, "loss_cls": 0.09908, "acc": 95.91694, "loss_bbox": 0.11592, "loss": 0.26888, "grad_norm": 4.12893, "time": 0.93153}
58
+ {"mode": "train", "epoch": 12, "iter": 1000, "lr": 0.0, "memory": 18722, "data_time": 0.00509, "loss_rpn_cls": 0.00866, "loss_rpn_bbox": 0.04128, "loss_cls": 0.09904, "acc": 95.91704, "loss_bbox": 0.11454, "loss": 0.26352, "grad_norm": Infinity, "time": 0.90641}
59
+ {"mode": "train", "epoch": 12, "iter": 1500, "lr": 0.0, "memory": 18722, "data_time": 0.0044, "loss_rpn_cls": 0.00815, "loss_rpn_bbox": 0.0453, "loss_cls": 0.09877, "acc": 95.94561, "loss_bbox": 0.11316, "loss": 0.26537, "grad_norm": 3.98564, "time": 0.88759}
60
+ {"mode": "train", "epoch": 12, "iter": 2000, "lr": 0.0, "memory": 18722, "data_time": 0.00443, "loss_rpn_cls": 0.0083, "loss_rpn_bbox": 0.04348, "loss_cls": 0.09853, "acc": 95.96401, "loss_bbox": 0.11658, "loss": 0.26689, "grad_norm": 4.07181, "time": 0.88259}
61
+ {"mode": "val", "epoch": 12, "iter": 507, "lr": 0.0, "mAP": 0.83731}
ckpts/vitp_dotav2_orcnn_6073/epoch_12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8aa7c0b74b0b90c9e1560a679407b8fdaa6e35449c65d00ac99151d7c62ba075
3
+ size 1373270509
ckpts/vitp_dotav2_orcnn_6073/vitp_dotav2_orcnn.py ADDED
@@ -0,0 +1,302 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset_type = 'DOTAv2Dataset'
2
+ data_root = '/defaultShare/pubdata/remote_sensing/dota_v2/'
3
+ angle_version = 'le90'
4
+ img_norm_cfg = dict(
5
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6
+ train_pipeline = [
7
+ dict(type='LoadImageFromFile'),
8
+ dict(type='LoadAnnotations', with_bbox=True),
9
+ dict(type='RResize', img_scale=(1024, 1024)),
10
+ dict(
11
+ type='RRandomFlip',
12
+ flip_ratio=[0.25, 0.25, 0.25],
13
+ direction=['horizontal', 'vertical', 'diagonal'],
14
+ version='le90'),
15
+ dict(
16
+ type='PolyRandomRotate',
17
+ rotate_ratio=0.5,
18
+ angles_range=180,
19
+ auto_bound=False,
20
+ rect_classes=[9, 11, 16],
21
+ version='le90'),
22
+ dict(
23
+ type='Normalize',
24
+ mean=[123.675, 116.28, 103.53],
25
+ std=[58.395, 57.12, 57.375],
26
+ to_rgb=True),
27
+ dict(type='Pad', size_divisor=32),
28
+ dict(type='DefaultFormatBundle'),
29
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
30
+ ]
31
+ test_pipeline = [
32
+ dict(type='LoadImageFromFile'),
33
+ dict(
34
+ type='MultiScaleFlipAug',
35
+ img_scale=(1024, 1024),
36
+ flip=False,
37
+ transforms=[
38
+ dict(type='RResize'),
39
+ dict(
40
+ type='Normalize',
41
+ mean=[123.675, 116.28, 103.53],
42
+ std=[58.395, 57.12, 57.375],
43
+ to_rgb=True),
44
+ dict(type='Pad', size_divisor=32),
45
+ dict(type='DefaultFormatBundle'),
46
+ dict(type='Collect', keys=['img'])
47
+ ])
48
+ ]
49
+ data = dict(
50
+ samples_per_gpu=1,
51
+ workers_per_gpu=4,
52
+ train=dict(
53
+ type='DOTAv2Dataset',
54
+ ann_file=
55
+ '/defaultShare/pubdata/remote_sensing/dota_v2/split_ss_dota/trainval/annfiles/',
56
+ img_prefix=
57
+ '/defaultShare/pubdata/remote_sensing/dota_v2/split_ss_dota/trainval/images/',
58
+ pipeline=[
59
+ dict(type='LoadImageFromFile'),
60
+ dict(type='LoadAnnotations', with_bbox=True),
61
+ dict(type='RResize', img_scale=(1024, 1024)),
62
+ dict(
63
+ type='RRandomFlip',
64
+ flip_ratio=[0.25, 0.25, 0.25],
65
+ direction=['horizontal', 'vertical', 'diagonal'],
66
+ version='le90'),
67
+ dict(
68
+ type='PolyRandomRotate',
69
+ rotate_ratio=0.5,
70
+ angles_range=180,
71
+ auto_bound=False,
72
+ rect_classes=[9, 11, 16],
73
+ version='le90'),
74
+ dict(
75
+ type='Normalize',
76
+ mean=[123.675, 116.28, 103.53],
77
+ std=[58.395, 57.12, 57.375],
78
+ to_rgb=True),
79
+ dict(type='Pad', size_divisor=32),
80
+ dict(type='DefaultFormatBundle'),
81
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
82
+ ],
83
+ version='le90'),
84
+ val=dict(
85
+ type='DOTAv2Dataset',
86
+ ann_file=
87
+ '/defaultShare/pubdata/remote_sensing/dota_v2/split_ss_dota/val/annfiles/',
88
+ img_prefix=
89
+ '/defaultShare/pubdata/remote_sensing/dota_v2/split_ss_dota/val/images/',
90
+ pipeline=[
91
+ dict(type='LoadImageFromFile'),
92
+ dict(
93
+ type='MultiScaleFlipAug',
94
+ img_scale=(1024, 1024),
95
+ flip=False,
96
+ transforms=[
97
+ dict(type='RResize'),
98
+ dict(
99
+ type='Normalize',
100
+ mean=[123.675, 116.28, 103.53],
101
+ std=[58.395, 57.12, 57.375],
102
+ to_rgb=True),
103
+ dict(type='Pad', size_divisor=32),
104
+ dict(type='DefaultFormatBundle'),
105
+ dict(type='Collect', keys=['img'])
106
+ ])
107
+ ],
108
+ version='le90'),
109
+ test=dict(
110
+ type='DOTAv2Dataset',
111
+ ann_file=
112
+ '/defaultShare/pubdata/remote_sensing/dota_v2/split_ss_dota/test/images/',
113
+ img_prefix=
114
+ '/defaultShare/pubdata/remote_sensing/dota_v2/split_ss_dota/test/images/',
115
+ pipeline=[
116
+ dict(type='LoadImageFromFile'),
117
+ dict(
118
+ type='MultiScaleFlipAug',
119
+ img_scale=(1024, 1024),
120
+ flip=False,
121
+ transforms=[
122
+ dict(type='RResize'),
123
+ dict(
124
+ type='Normalize',
125
+ mean=[123.675, 116.28, 103.53],
126
+ std=[58.395, 57.12, 57.375],
127
+ to_rgb=True),
128
+ dict(type='Pad', size_divisor=32),
129
+ dict(type='DefaultFormatBundle'),
130
+ dict(type='Collect', keys=['img'])
131
+ ])
132
+ ],
133
+ version='le90'))
134
+ evaluation = dict(interval=1, metric='mAP')
135
+ optimizer = dict(
136
+ type='AdamW',
137
+ lr=2.5e-05,
138
+ betas=(0.9, 0.999),
139
+ weight_decay=0.05,
140
+ constructor='InternViTAdapterLayerDecayOptimizerConstructor',
141
+ paramwise_cfg=dict(num_layers=24, layer_decay_rate=0.95))
142
+ optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
143
+ lr_config = dict(
144
+ policy='step',
145
+ warmup='linear',
146
+ warmup_iters=500,
147
+ warmup_ratio=0.3333333333333333,
148
+ step=[8, 11])
149
+ runner = dict(type='EpochBasedRunner', max_epochs=12)
150
+ checkpoint_config = dict(interval=1, max_keep_ckpts=1)
151
+ log_config = dict(interval=500, hooks=[dict(type='TextLoggerHook')])
152
+ dist_params = dict(backend='nccl')
153
+ log_level = 'INFO'
154
+ load_from = None
155
+ resume_from = None
156
+ workflow = [('train', 1)]
157
+ opencv_num_threads = 0
158
+ mp_start_method = 'fork'
159
+ pretrained = 'pretrained/ViTP_ViT_L_300M_rs.safetensors'
160
+ norm_cfg = dict(type='LN', requires_grad=True)
161
+ model = dict(
162
+ type='OrientedRCNN',
163
+ backbone=dict(
164
+ type='InternViTAdapter',
165
+ pretrain_size=448,
166
+ img_size=1024,
167
+ patch_size=16,
168
+ embed_dim=1024,
169
+ depth=24,
170
+ num_heads=16,
171
+ mlp_ratio=4.0,
172
+ drop_path_rate=0.1,
173
+ init_values=0.1,
174
+ with_cp=True,
175
+ use_flash_attn=True,
176
+ qk_normalization=False,
177
+ layerscale_force_fp32=False,
178
+ with_fpn=False,
179
+ freeze_vit=False,
180
+ use_final_norm=True,
181
+ interaction_indexes=[[0, 7], [8, 11], [12, 15], [16, 23]],
182
+ cffn_ratio=0.25,
183
+ deform_ratio=0.25,
184
+ qkv_bias=True,
185
+ norm_type='layer_norm',
186
+ pretrained='pretrained/ViTP_ViT_L_300M_rs.safetensors',
187
+ pretrained_type='full',
188
+ only_feat_out=True),
189
+ neck=dict(
190
+ type='SimpleFPN',
191
+ in_channels=[1024, 1024, 1024, 1024],
192
+ out_channels=256,
193
+ norm_cfg=dict(type='LN', requires_grad=True),
194
+ use_residual=False,
195
+ num_outs=5),
196
+ rpn_head=dict(
197
+ type='OrientedRPNHead',
198
+ in_channels=256,
199
+ feat_channels=256,
200
+ version='le90',
201
+ anchor_generator=dict(
202
+ type='AnchorGenerator',
203
+ scales=[8],
204
+ ratios=[0.5, 1.0, 2.0],
205
+ strides=[4, 8, 16, 32, 64]),
206
+ bbox_coder=dict(
207
+ type='MidpointOffsetCoder',
208
+ angle_range='le90',
209
+ target_means=[0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
210
+ target_stds=[1.0, 1.0, 1.0, 1.0, 0.5, 0.5]),
211
+ loss_cls=dict(
212
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
213
+ loss_bbox=dict(
214
+ type='SmoothL1Loss', beta=0.1111111111111111, loss_weight=1.0)),
215
+ roi_head=dict(
216
+ type='OrientedStandardRoIHead',
217
+ bbox_roi_extractor=dict(
218
+ type='RotatedSingleRoIExtractor',
219
+ roi_layer=dict(
220
+ type='RoIAlignRotated',
221
+ out_size=7,
222
+ sample_num=2,
223
+ clockwise=True),
224
+ out_channels=256,
225
+ featmap_strides=[4, 8, 16, 32]),
226
+ bbox_head=dict(
227
+ type='RotatedShared2FCBBoxHead',
228
+ in_channels=256,
229
+ fc_out_channels=1024,
230
+ roi_feat_size=7,
231
+ num_classes=18,
232
+ bbox_coder=dict(
233
+ type='DeltaXYWHAOBBoxCoder',
234
+ angle_range='le90',
235
+ norm_factor=None,
236
+ edge_swap=True,
237
+ proj_xy=True,
238
+ target_means=(0.0, 0.0, 0.0, 0.0, 0.0),
239
+ target_stds=(0.1, 0.1, 0.2, 0.2, 0.1)),
240
+ reg_class_agnostic=True,
241
+ loss_cls=dict(
242
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
243
+ loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))),
244
+ train_cfg=dict(
245
+ rpn=dict(
246
+ assigner=dict(
247
+ type='MaxIoUAssigner',
248
+ pos_iou_thr=0.7,
249
+ neg_iou_thr=0.3,
250
+ min_pos_iou=0.3,
251
+ match_low_quality=True,
252
+ gpu_assign_thr=1000,
253
+ ignore_iof_thr=-1),
254
+ sampler=dict(
255
+ type='RandomSampler',
256
+ num=256,
257
+ pos_fraction=0.5,
258
+ neg_pos_ub=-1,
259
+ add_gt_as_proposals=False),
260
+ allowed_border=0,
261
+ pos_weight=-1,
262
+ debug=False),
263
+ rpn_proposal=dict(
264
+ nms_pre=2000,
265
+ max_per_img=2000,
266
+ nms=dict(type='nms', iou_threshold=0.8),
267
+ min_bbox_size=0),
268
+ rcnn=dict(
269
+ assigner=dict(
270
+ type='MaxIoUAssigner',
271
+ pos_iou_thr=0.5,
272
+ neg_iou_thr=0.5,
273
+ min_pos_iou=0.5,
274
+ match_low_quality=False,
275
+ gpu_assign_thr=1000,
276
+ iou_calculator=dict(type='RBboxOverlaps2D'),
277
+ ignore_iof_thr=-1),
278
+ sampler=dict(
279
+ type='RRandomSampler',
280
+ num=512,
281
+ pos_fraction=0.25,
282
+ neg_pos_ub=-1,
283
+ add_gt_as_proposals=True),
284
+ pos_weight=-1,
285
+ debug=False)),
286
+ test_cfg=dict(
287
+ rpn=dict(
288
+ nms_pre=2000,
289
+ max_per_img=2000,
290
+ nms=dict(type='nms', iou_threshold=0.8),
291
+ min_bbox_size=0),
292
+ rcnn=dict(
293
+ nms_pre=2000,
294
+ min_bbox_size=0,
295
+ score_thr=0.05,
296
+ nms=dict(iou_thr=0.1),
297
+ max_per_img=2000)))
298
+ fp16 = dict(loss_scale=dict(init_scale=512))
299
+ work_dir = './work_dirs/vitp_dotav2_orcnn'
300
+ auto_resume = False
301
+ gpu_ids = range(0, 8)
302
+ device = 'cuda'
ckpts/vitp_isaid_upernet_7114/20250803_154801.log ADDED
The diff for this file is too large to render. See raw diff
 
ckpts/vitp_isaid_upernet_7114/20250803_154801.log.json ADDED
The diff for this file is too large to render. See raw diff
 
ckpts/vitp_isaid_upernet_7114/ViTP_isaid_upernet.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset_type = 'iSAIDDataset'
2
+ data_root = '/defaultShare/pubdata/remote_sensing/iSAID'
3
+ img_norm_cfg = dict(
4
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
5
+ crop_size = (896, 896)
6
+ data = dict(
7
+ samples_per_gpu=2,
8
+ workers_per_gpu=4,
9
+ train=dict(
10
+ type='iSAIDDataset',
11
+ data_root='/defaultShare/pubdata/remote_sensing/iSAID',
12
+ img_dir='img_dir/train',
13
+ ann_dir='ann_dir_old/train',
14
+ pipeline=[
15
+ dict(type='LoadImageFromFile'),
16
+ dict(type='LoadAnnotations', reduce_zero_label=False),
17
+ dict(
18
+ type='Resize',
19
+ img_scale=(896, 896),
20
+ ratio_range=None,
21
+ keep_ratio=True),
22
+ dict(type='RandomCrop', crop_size=(896, 896)),
23
+ dict(type='RandomFlip', prob=0.5),
24
+ dict(type='PhotoMetricDistortion'),
25
+ dict(
26
+ type='Normalize',
27
+ mean=[123.675, 116.28, 103.53],
28
+ std=[58.395, 57.12, 57.375],
29
+ to_rgb=True),
30
+ dict(type='Pad', size=(896, 896), pad_val=0, seg_pad_val=255),
31
+ dict(type='DefaultFormatBundle'),
32
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
33
+ ]),
34
+ val=dict(
35
+ type='iSAIDDataset',
36
+ data_root='/defaultShare/pubdata/remote_sensing/iSAID',
37
+ img_dir='img_dir/val',
38
+ ann_dir='ann_dir_old/val',
39
+ pipeline=[
40
+ dict(type='LoadImageFromFile'),
41
+ dict(
42
+ type='MultiScaleFlipAug',
43
+ img_scale=(896, 896),
44
+ flip=True,
45
+ img_ratios=[0.75,1.0,1.5],
46
+ transforms=[
47
+ dict(type='Resize', keep_ratio=True),
48
+ dict(type='RandomFlip', prob=1.0),
49
+ dict(
50
+ type='Pad',
51
+ size=(896, 896),
52
+ pad_val=0,
53
+ seg_pad_val=255),
54
+ dict(
55
+ type='Normalize',
56
+ mean=[123.675, 116.28, 103.53],
57
+ std=[58.395, 57.12, 57.375],
58
+ to_rgb=True),
59
+ dict(type='ImageToTensor', keys=['img']),
60
+ dict(type='Collect', keys=['img'])
61
+ ])
62
+ ]),
63
+ test=dict(
64
+ type='iSAIDDataset',
65
+ data_root='/defaultShare/pubdata/remote_sensing/iSAID',
66
+ img_dir='img_dir/val',
67
+ ann_dir='ann_dir_old/val',
68
+ pipeline=[
69
+ dict(type='LoadImageFromFile'),
70
+ dict(
71
+ type='MultiScaleFlipAug',
72
+ img_scale=(896, 896),
73
+ flip=True,
74
+ img_ratios=[0.75,1.0,1.5],
75
+ transforms=[
76
+ dict(type='Resize', keep_ratio=True),
77
+ dict(type='RandomFlip', prob=1.0),
78
+ dict(
79
+ type='Pad',
80
+ size=(896, 896),
81
+ pad_val=0,
82
+ seg_pad_val=255),
83
+ dict(
84
+ type='Normalize',
85
+ mean=[123.675, 116.28, 103.53],
86
+ std=[58.395, 57.12, 57.375],
87
+ to_rgb=True),
88
+ dict(type='ImageToTensor', keys=['img']),
89
+ dict(type='Collect', keys=['img'])
90
+ ])
91
+ ]))
92
+ log_config = dict(
93
+ interval=50,
94
+ hooks=[
95
+ dict(type='TextLoggerHook', by_epoch=False),
96
+ dict(type='TensorboardLoggerHook')
97
+ ])
98
+ dist_params = dict(backend='nccl')
99
+ log_level = 'INFO'
100
+ load_from = None
101
+ resume_from = None
102
+ workflow = [('train', 1)]
103
+ cudnn_benchmark = True
104
+ optimizer = dict(
105
+ type='AdamW',
106
+ lr=1.5e-05,
107
+ betas=(0.9, 0.999),
108
+ weight_decay=0.05,
109
+ constructor='InternViTAdapterLayerDecayOptimizerConstructor',
110
+ paramwise_cfg=dict(num_layers=24, layer_decay_rate=0.9))
111
+ optimizer_config = dict()
112
+ lr_config = dict(
113
+ policy='CosineAnnealing',
114
+ warmup='linear',
115
+ warmup_iters=1500,
116
+ warmup_ratio=1e-06,
117
+ min_lr=0.0)
118
+ runner = dict(type='IterBasedRunner', max_iters=80000)
119
+ checkpoint_config = dict(by_epoch=False, interval=8000, max_keep_ckpts=3)
120
+ evaluation = dict(interval=4000, metric='mIoU', pre_eval=True, metrics='mIoU')
121
+ val_pipeline = [
122
+ dict(type='LoadImageFromFile'),
123
+ dict(
124
+ type='MultiScaleFlipAug',
125
+ img_scale=(896, 896),
126
+ flip=True,
127
+ transforms=[
128
+ dict(type='Resize', keep_ratio=True),
129
+ dict(type='RandomFlip', prob=1.0),
130
+ dict(type='Pad', size=(896, 896), pad_val=0, seg_pad_val=255),
131
+ dict(
132
+ type='Normalize',
133
+ mean=[123.675, 116.28, 103.53],
134
+ std=[58.395, 57.12, 57.375],
135
+ to_rgb=True),
136
+ dict(type='ImageToTensor', keys=['img']),
137
+ dict(type='Collect', keys=['img'])
138
+ ])
139
+ ]
140
+ default_hooks = dict(
141
+ timer=dict(type='IterTimerHook'),
142
+ logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
143
+ sampler_seed=dict(type='DistSamplerSeedHook'))
144
+ pretrained = 'pretrained/ViTP_ViT_L_300M_rs.safetensors'
145
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
146
+ model = dict(
147
+ type='EncoderDecoder',
148
+ backbone=dict(
149
+ type='InternViTAdapter',
150
+ pretrain_size=448,
151
+ img_size=896,
152
+ patch_size=16,
153
+ embed_dim=1024,
154
+ depth=24,
155
+ num_heads=16,
156
+ mlp_ratio=4.0,
157
+ drop_path_rate=0.1,
158
+ init_values=0.1,
159
+ with_cp=True,
160
+ use_flash_attn=True,
161
+ qk_normalization=False,
162
+ layerscale_force_fp32=False,
163
+ with_fpn=False,
164
+ freeze_vit=False,
165
+ use_final_norm=True,
166
+ interaction_indexes=[[0, 7], [8, 11], [12, 15], [16, 23]],
167
+ cffn_ratio=0.25,
168
+ deform_ratio=0.25,
169
+ qkv_bias=True,
170
+ norm_type='layer_norm',
171
+ pretrained='pretrained/ViTP_ViT_L_300M_rs.safetensors',
172
+ pretrained_type='full'),
173
+ decode_head=dict(
174
+ type='UPerHead',
175
+ in_channels=[1024, 1024, 1024, 1024],
176
+ num_classes=16,
177
+ ignore_index=255,
178
+ in_index=[0, 1, 2, 3],
179
+ pool_scales=(1, 2, 3, 6),
180
+ channels=512,
181
+ dropout_ratio=0.1,
182
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
183
+ align_corners=False,
184
+ loss_decode=dict(
185
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
186
+ train_cfg=dict(),
187
+ test_cfg=dict(mode='slide', crop_size=crop_size, stride=(crop_size[0]//2, crop_size[1]//2)))
188
+ fp16 = dict(loss_scale=dict(init_scale=512))
189
+ randomness = dict(seed=3407)
190
+ work_dir = './work_dirs/ViTP_isaid_upernet'
191
+ gpu_ids = range(0, 8)
192
+ auto_resume = False
ckpts/vitp_isaid_upernet_7114/eval_20250921_141413.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": "./configs/internvit/upernet_internvit_adp_80k_isaid_cos_ldr90.py",
3
+ "metric": {
4
+ "aAcc": 0.9912000000000001,
5
+ "mIoU": 0.7114,
6
+ "mAcc": 0.7913,
7
+ "IoU.background": 0.9919000244140626,
8
+ "IoU.ship": 0.7715000152587891,
9
+ "IoU.store_tank": 0.7601000213623047,
10
+ "IoU.baseball_diamond": 0.8194000244140625,
11
+ "IoU.tennis_court": 0.9094000244140625,
12
+ "IoU.basketball_court": 0.7026000213623047,
13
+ "IoU.Ground_Track_Field": 0.65,
14
+ "IoU.Bridge": 0.494900016784668,
15
+ "IoU.Large_Vehicle": 0.7023999786376953,
16
+ "IoU.Small_Vehicle": 0.560099983215332,
17
+ "IoU.Helicopter": 0.44459999084472657,
18
+ "IoU.Swimming_pool": 0.5159999847412109,
19
+ "IoU.Roundabout": 0.7868000030517578,
20
+ "IoU.Soccer_ball_field": 0.7841999816894532,
21
+ "IoU.plane": 0.8708999633789063,
22
+ "IoU.Harbor": 0.6168999862670899,
23
+ "Acc.background": 0.9976000213623046,
24
+ "Acc.ship": 0.856500015258789,
25
+ "Acc.store_tank": 0.8390000152587891,
26
+ "Acc.baseball_diamond": 0.8686000061035156,
27
+ "Acc.tennis_court": 0.9333000183105469,
28
+ "Acc.basketball_court": 0.8120999908447266,
29
+ "Acc.Ground_Track_Field": 0.7269999694824218,
30
+ "Acc.Bridge": 0.5695000076293946,
31
+ "Acc.Large_Vehicle": 0.7880999755859375,
32
+ "Acc.Small_Vehicle": 0.6397000122070312,
33
+ "Acc.Helicopter": 0.7390000152587891,
34
+ "Acc.Swimming_pool": 0.5818000030517578,
35
+ "Acc.Roundabout": 0.8454000091552735,
36
+ "Acc.Soccer_ball_field": 0.8494999694824219,
37
+ "Acc.plane": 0.9266000366210938,
38
+ "Acc.Harbor": 0.6873999786376953
39
+ }
40
+ }
ckpts/vitp_isaid_upernet_7114/iter_80000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71e964b20d73596832f96920fa676404294b52981ecc04824546f0522120e82d
3
+ size 1435132133
ckpts/vitp_levir_upernet_7268/20250919_030132/20250919_030132.log ADDED
The diff for this file is too large to render. See raw diff
 
ckpts/vitp_levir_upernet_7268/20250919_030132/20250921_105914.log ADDED
@@ -0,0 +1,485 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025/09/21 10:59:18 - mmengine - INFO -
2
+ ------------------------------------------------------------
3
+ System environment:
4
+ sys.platform: linux
5
+ Python: 3.10.16 (main, Dec 11 2024, 16:24:50) [GCC 11.2.0]
6
+ CUDA available: True
7
+ MUSA available: False
8
+ numpy_random_seed: 908216666
9
+ GPU 0,1,2,3,4,5,6,7: NVIDIA GeForce RTX 3090
10
+ CUDA_HOME: /mnt/petrelfs/share_data/liqingyun/cuda/cuda-12.4/
11
+ GCC: gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0
12
+ PyTorch: 1.12.0
13
+ PyTorch compiling details: PyTorch built with:
14
+ - GCC 9.3
15
+ - C++ Version: 201402
16
+ - Intel(R) oneAPI Math Kernel Library Version 2024.0-Product Build 20231011 for Intel(R) 64 architecture applications
17
+ - Intel(R) MKL-DNN v2.6.0 (Git Hash 52b5f107dd9cf10910aaa19cb47f3abf9b349815)
18
+ - OpenMP 201511 (a.k.a. OpenMP 4.5)
19
+ - LAPACK is enabled (usually provided by MKL)
20
+ - NNPACK is enabled
21
+ - CPU capability usage: AVX2
22
+ - CUDA Runtime 11.3
23
+ - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_37,code=compute_37
24
+ - CuDNN 8.3.2 (built against CUDA 11.5)
25
+ - Magma 2.5.2
26
+ - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.3, CUDNN_VERSION=8.3.2, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.12.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=OFF, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF,
27
+
28
+ TorchVision: 0.13.0
29
+ OpenCV: 4.11.0
30
+ MMEngine: 0.10.7
31
+
32
+ Runtime environment:
33
+ cudnn_benchmark: True
34
+ mp_cfg: {'mp_start_method': 'fork', 'opencv_num_threads': 0}
35
+ dist_cfg: {'backend': 'nccl'}
36
+ seed: 908216666
37
+ Distributed launcher: pytorch
38
+ Distributed training: True
39
+ GPU number: 8
40
+ ------------------------------------------------------------
41
+
42
+ 2025/09/21 10:59:19 - mmengine - INFO - Config:
43
+ crop_size = (
44
+ 256,
45
+ 256,
46
+ )
47
+ data_preprocessor = dict(
48
+ bgr_to_rgb=True,
49
+ mean=[
50
+ 123.675,
51
+ 116.28,
52
+ 103.53,
53
+ 123.675,
54
+ 116.28,
55
+ 103.53,
56
+ ],
57
+ pad_val=0,
58
+ seg_pad_val=255,
59
+ size_divisor=32,
60
+ std=[
61
+ 58.395,
62
+ 57.12,
63
+ 57.375,
64
+ 58.395,
65
+ 57.12,
66
+ 57.375,
67
+ ],
68
+ test_cfg=dict(size_divisor=32),
69
+ type='DualInputSegDataPreProcessor')
70
+ data_root = '/defaultShare/pubdata/remote_sensing/LEVIR-CD-256'
71
+ dataset_type = 'LEVIR_CD_Dataset'
72
+ default_hooks = dict(
73
+ checkpoint=dict(by_epoch=False, interval=8000, type='CheckpointHook'),
74
+ logger=dict(interval=500, log_metric_by_epoch=False, type='LoggerHook'),
75
+ param_scheduler=dict(type='ParamSchedulerHook'),
76
+ sampler_seed=dict(type='DistSamplerSeedHook'),
77
+ timer=dict(type='IterTimerHook'),
78
+ visualization=dict(interval=1, type='CDVisualizationHook'))
79
+ default_scope = 'opencd'
80
+ env_cfg = dict(
81
+ cudnn_benchmark=True,
82
+ dist_cfg=dict(backend='nccl'),
83
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
84
+ fp16 = dict(loss_scale=dict(init_scale=512))
85
+ img_ratios = [
86
+ 0.75,
87
+ 1.0,
88
+ 1.25,
89
+ ]
90
+ launcher = 'pytorch'
91
+ load_from = './work_dirs/upernet_internvit_adp_levir/iter_80000.pth'
92
+ log_level = 'INFO'
93
+ log_processor = dict(by_epoch=False)
94
+ model = dict(
95
+ backbone=dict(
96
+ cffn_ratio=0.25,
97
+ deform_ratio=0.25,
98
+ depth=24,
99
+ drop_path_rate=0.1,
100
+ embed_dim=1024,
101
+ freeze_vit=False,
102
+ img_size=256,
103
+ init_values=0.1,
104
+ interaction_indexes=[
105
+ [
106
+ 0,
107
+ 7,
108
+ ],
109
+ [
110
+ 8,
111
+ 11,
112
+ ],
113
+ [
114
+ 12,
115
+ 15,
116
+ ],
117
+ [
118
+ 16,
119
+ 23,
120
+ ],
121
+ ],
122
+ layerscale_force_fp32=False,
123
+ mlp_ratio=4.0,
124
+ norm_type='layer_norm',
125
+ num_heads=16,
126
+ patch_size=16,
127
+ pretrain_size=448,
128
+ pretrained=
129
+ '/nfs/liyuxuan/zhangyicheng/mmrotate/pretrained/ft_full_1b_8ksteps_instruct_tuning_as_pretrain_TMAug75.safetensors',
130
+ pretrained_type='full',
131
+ qk_normalization=False,
132
+ qkv_bias=True,
133
+ type='InternViTAdapter',
134
+ use_final_norm=True,
135
+ use_flash_attn=False,
136
+ with_cp=True,
137
+ with_fpn=False),
138
+ data_preprocessor=dict(
139
+ bgr_to_rgb=True,
140
+ mean=[
141
+ 123.675,
142
+ 116.28,
143
+ 103.53,
144
+ 123.675,
145
+ 116.28,
146
+ 103.53,
147
+ ],
148
+ pad_val=0,
149
+ seg_pad_val=255,
150
+ size_divisor=32,
151
+ std=[
152
+ 58.395,
153
+ 57.12,
154
+ 57.375,
155
+ 58.395,
156
+ 57.12,
157
+ 57.375,
158
+ ],
159
+ test_cfg=dict(size_divisor=32),
160
+ type='DualInputSegDataPreProcessor'),
161
+ decode_head=dict(
162
+ align_corners=False,
163
+ channels=1024,
164
+ dropout_ratio=0.1,
165
+ in_channels=[
166
+ 2048,
167
+ 2048,
168
+ 2048,
169
+ 2048,
170
+ ],
171
+ in_index=[
172
+ 0,
173
+ 1,
174
+ 2,
175
+ 3,
176
+ ],
177
+ loss_decode=dict(
178
+ loss_weight=1.0, type='mmseg.CrossEntropyLoss', use_sigmoid=False),
179
+ norm_cfg=dict(requires_grad=True, type='SyncBN'),
180
+ num_classes=2,
181
+ pool_scales=(
182
+ 1,
183
+ 2,
184
+ 3,
185
+ 6,
186
+ ),
187
+ type='mmseg.UPerHead'),
188
+ neck=dict(policy='concat', type='FeatureFusionNeck'),
189
+ test_cfg=dict(crop_size=(
190
+ 256,
191
+ 256,
192
+ ), mode='slide', stride=(
193
+ 128,
194
+ 128,
195
+ )),
196
+ train_cfg=dict(),
197
+ type='SiamEncoderDecoder')
198
+ norm_cfg = dict(requires_grad=True, type='SyncBN')
199
+ optim_wrapper = dict(
200
+ clip_grad=None,
201
+ constructor='InternViTAdapterLayerDecayOptimizerConstructor',
202
+ optimizer=dict(
203
+ betas=(
204
+ 0.9,
205
+ 0.999,
206
+ ), lr=0.0001, type='AdamW', weight_decay=0.05),
207
+ paramwise_cfg=dict(layer_decay_rate=0.9, num_layers=24),
208
+ type='OptimWrapper')
209
+ optimizer = dict(lr=0.01, momentum=0.9, type='SGD', weight_decay=0.0005)
210
+ param_scheduler = [
211
+ dict(
212
+ begin=0, by_epoch=False, end=1500, start_factor=1e-06,
213
+ type='LinearLR'),
214
+ dict(
215
+ T_max=78500,
216
+ begin=1500,
217
+ by_epoch=False,
218
+ end=80000,
219
+ eta_min=0.0,
220
+ type='CosineAnnealingLR'),
221
+ ]
222
+ pretrained = '/nfs/liyuxuan/zhangyicheng/mmrotate/pretrained/ft_full_1b_8ksteps_instruct_tuning_as_pretrain_TMAug75.safetensors'
223
+ resume = False
224
+ test_cfg = dict(type='TestLoop')
225
+ test_dataloader = dict(
226
+ batch_size=1,
227
+ dataset=dict(
228
+ data_prefix=dict(
229
+ img_path_from='A', img_path_to='B', seg_map_path='label'),
230
+ data_root='/defaultShare/pubdata/remote_sensing/LEVIR_CD/test',
231
+ pipeline=[
232
+ dict(type='MultiImgLoadImageFromFile'),
233
+ dict(type='MultiImgLoadAnnotations'),
234
+ dict(type='MultiImgPackSegInputs'),
235
+ ],
236
+ type='LEVIR_CD_Dataset'),
237
+ num_workers=8,
238
+ persistent_workers=True,
239
+ sampler=dict(shuffle=False, type='DefaultSampler'))
240
+ test_evaluator = dict(
241
+ iou_metrics=[
242
+ 'mFscore',
243
+ 'mIoU',
244
+ ], type='mmseg.IoUMetric')
245
+ test_pipeline = [
246
+ dict(type='MultiImgLoadImageFromFile'),
247
+ dict(type='MultiImgLoadAnnotations'),
248
+ dict(type='MultiImgPackSegInputs'),
249
+ ]
250
+ train_cfg = dict(max_iters=80000, type='IterBasedTrainLoop', val_interval=8000)
251
+ train_dataloader = dict(
252
+ batch_size=4,
253
+ dataset=dict(
254
+ ann_file='list/train.txt',
255
+ data_prefix=dict(
256
+ img_path_from='A', img_path_to='B', seg_map_path='label'),
257
+ data_root='/defaultShare/pubdata/remote_sensing/LEVIR-CD-256',
258
+ pipeline=[
259
+ dict(type='MultiImgLoadImageFromFile'),
260
+ dict(type='MultiImgLoadAnnotations'),
261
+ dict(degree=180, prob=0.5, type='MultiImgRandomRotate'),
262
+ dict(
263
+ cat_max_ratio=0.75,
264
+ crop_size=(
265
+ 256,
266
+ 256,
267
+ ),
268
+ type='MultiImgRandomCrop'),
269
+ dict(direction='horizontal', prob=0.5, type='MultiImgRandomFlip'),
270
+ dict(direction='vertical', prob=0.5, type='MultiImgRandomFlip'),
271
+ dict(prob=0.5, type='MultiImgExchangeTime'),
272
+ dict(
273
+ brightness_delta=10,
274
+ contrast_range=(
275
+ 0.8,
276
+ 1.2,
277
+ ),
278
+ hue_delta=10,
279
+ saturation_range=(
280
+ 0.8,
281
+ 1.2,
282
+ ),
283
+ type='MultiImgPhotoMetricDistortion'),
284
+ dict(type='MultiImgPackSegInputs'),
285
+ ],
286
+ type='LEVIR_CD_Dataset'),
287
+ num_workers=8,
288
+ persistent_workers=True,
289
+ sampler=dict(shuffle=True, type='DefaultSampler'))
290
+ train_pipeline = [
291
+ dict(type='MultiImgLoadImageFromFile'),
292
+ dict(type='MultiImgLoadAnnotations'),
293
+ dict(degree=180, prob=0.5, type='MultiImgRandomRotate'),
294
+ dict(
295
+ cat_max_ratio=0.75, crop_size=(
296
+ 256,
297
+ 256,
298
+ ), type='MultiImgRandomCrop'),
299
+ dict(direction='horizontal', prob=0.5, type='MultiImgRandomFlip'),
300
+ dict(direction='vertical', prob=0.5, type='MultiImgRandomFlip'),
301
+ dict(prob=0.5, type='MultiImgExchangeTime'),
302
+ dict(
303
+ brightness_delta=10,
304
+ contrast_range=(
305
+ 0.8,
306
+ 1.2,
307
+ ),
308
+ hue_delta=10,
309
+ saturation_range=(
310
+ 0.8,
311
+ 1.2,
312
+ ),
313
+ type='MultiImgPhotoMetricDistortion'),
314
+ dict(type='MultiImgPackSegInputs'),
315
+ ]
316
+ tta_model = dict(type='mmseg.SegTTAModel')
317
+ tta_pipeline = [
318
+ dict(backend_args=None, type='MultiImgLoadImageFromFile'),
319
+ dict(
320
+ transforms=[
321
+ [
322
+ dict(
323
+ keep_ratio=True, scale_factor=0.75, type='MultiImgResize'),
324
+ dict(keep_ratio=True, scale_factor=1.0, type='MultiImgResize'),
325
+ dict(
326
+ keep_ratio=True, scale_factor=1.25, type='MultiImgResize'),
327
+ ],
328
+ [
329
+ dict(
330
+ direction='horizontal',
331
+ prob=0.0,
332
+ type='MultiImgRandomFlip'),
333
+ dict(
334
+ direction='horizontal',
335
+ prob=1.0,
336
+ type='MultiImgRandomFlip'),
337
+ ],
338
+ [
339
+ dict(type='MultiImgLoadAnnotations'),
340
+ ],
341
+ [
342
+ dict(type='MultiImgPackSegInputs'),
343
+ ],
344
+ ],
345
+ type='TestTimeAug'),
346
+ ]
347
+ val_cfg = dict(type='ValLoop')
348
+ val_dataloader = dict(
349
+ batch_size=1,
350
+ dataset=dict(
351
+ ann_file='list/test.txt',
352
+ data_prefix=dict(
353
+ img_path_from='A', img_path_to='B', seg_map_path='label'),
354
+ data_root='/defaultShare/pubdata/remote_sensing/LEVIR-CD-256',
355
+ pipeline=[
356
+ dict(type='MultiImgLoadImageFromFile'),
357
+ dict(type='MultiImgLoadAnnotations'),
358
+ dict(type='MultiImgPackSegInputs'),
359
+ ],
360
+ type='LEVIR_CD_Dataset'),
361
+ num_workers=8,
362
+ persistent_workers=True,
363
+ sampler=dict(shuffle=False, type='DefaultSampler'))
364
+ val_evaluator = dict(
365
+ iou_metrics=[
366
+ 'mFscore',
367
+ 'mIoU',
368
+ ], type='mmseg.IoUMetric')
369
+ val_pipeline = [
370
+ dict(type='MultiImgLoadImageFromFile'),
371
+ dict(keep_ratio=True, scale=(
372
+ 256,
373
+ 256,
374
+ ), type='MultiImgResize'),
375
+ dict(type='MultiImgLoadAnnotations'),
376
+ dict(type='MultiImgPackSegInputs'),
377
+ ]
378
+ vis_backends = [
379
+ dict(type='CDLocalVisBackend'),
380
+ ]
381
+ visualizer = dict(
382
+ alpha=1.0,
383
+ name='visualizer',
384
+ type='CDLocalVisualizer',
385
+ vis_backends=[
386
+ dict(type='CDLocalVisBackend'),
387
+ ])
388
+ work_dir = './work_dirs/upernet_internvit_adp_levir'
389
+
390
+ 2025/09/21 10:59:27 - mmengine - INFO - Hooks will be executed in the following order:
391
+ before_run:
392
+ (VERY_HIGH ) RuntimeInfoHook
393
+ (BELOW_NORMAL) LoggerHook
394
+ --------------------
395
+ before_train:
396
+ (VERY_HIGH ) RuntimeInfoHook
397
+ (NORMAL ) IterTimerHook
398
+ (VERY_LOW ) CheckpointHook
399
+ --------------------
400
+ before_train_epoch:
401
+ (VERY_HIGH ) RuntimeInfoHook
402
+ (NORMAL ) IterTimerHook
403
+ (NORMAL ) DistSamplerSeedHook
404
+ --------------------
405
+ before_train_iter:
406
+ (VERY_HIGH ) RuntimeInfoHook
407
+ (NORMAL ) IterTimerHook
408
+ --------------------
409
+ after_train_iter:
410
+ (VERY_HIGH ) RuntimeInfoHook
411
+ (NORMAL ) IterTimerHook
412
+ (NORMAL ) CDVisualizationHook
413
+ (BELOW_NORMAL) LoggerHook
414
+ (LOW ) ParamSchedulerHook
415
+ (VERY_LOW ) CheckpointHook
416
+ --------------------
417
+ after_train_epoch:
418
+ (NORMAL ) IterTimerHook
419
+ (LOW ) ParamSchedulerHook
420
+ (VERY_LOW ) CheckpointHook
421
+ --------------------
422
+ before_val:
423
+ (VERY_HIGH ) RuntimeInfoHook
424
+ --------------------
425
+ before_val_epoch:
426
+ (NORMAL ) IterTimerHook
427
+ --------------------
428
+ before_val_iter:
429
+ (NORMAL ) IterTimerHook
430
+ --------------------
431
+ after_val_iter:
432
+ (NORMAL ) IterTimerHook
433
+ (NORMAL ) CDVisualizationHook
434
+ (BELOW_NORMAL) LoggerHook
435
+ --------------------
436
+ after_val_epoch:
437
+ (VERY_HIGH ) RuntimeInfoHook
438
+ (NORMAL ) IterTimerHook
439
+ (BELOW_NORMAL) LoggerHook
440
+ (LOW ) ParamSchedulerHook
441
+ (VERY_LOW ) CheckpointHook
442
+ --------------------
443
+ after_val:
444
+ (VERY_HIGH ) RuntimeInfoHook
445
+ --------------------
446
+ after_train:
447
+ (VERY_HIGH ) RuntimeInfoHook
448
+ (VERY_LOW ) CheckpointHook
449
+ --------------------
450
+ before_test:
451
+ (VERY_HIGH ) RuntimeInfoHook
452
+ --------------------
453
+ before_test_epoch:
454
+ (NORMAL ) IterTimerHook
455
+ --------------------
456
+ before_test_iter:
457
+ (NORMAL ) IterTimerHook
458
+ --------------------
459
+ after_test_iter:
460
+ (NORMAL ) IterTimerHook
461
+ (NORMAL ) CDVisualizationHook
462
+ (BELOW_NORMAL) LoggerHook
463
+ --------------------
464
+ after_test_epoch:
465
+ (VERY_HIGH ) RuntimeInfoHook
466
+ (NORMAL ) IterTimerHook
467
+ (BELOW_NORMAL) LoggerHook
468
+ --------------------
469
+ after_test:
470
+ (VERY_HIGH ) RuntimeInfoHook
471
+ --------------------
472
+ after_run:
473
+ (BELOW_NORMAL) LoggerHook
474
+ --------------------
475
+ 2025/09/21 10:59:27 - mmengine - WARNING - The prefix is not set in metric class IoUMetric.
476
+ 2025/09/21 10:59:34 - mmengine - INFO - Load checkpoint from ./work_dirs/upernet_internvit_adp_levir/iter_80000.pth
477
+ 2025/09/21 11:00:38 - mmengine - INFO - per class results:
478
+ 2025/09/21 11:00:38 - mmengine - INFO -
479
+ +-----------+--------+-----------+--------+-------+-------+
480
+ | Class | Fscore | Precision | Recall | IoU | Acc |
481
+ +-----------+--------+-----------+--------+-------+-------+
482
+ | unchanged | 99.61 | 99.54 | 99.68 | 99.23 | 99.68 |
483
+ | changed | 92.67 | 93.92 | 91.45 | 86.34 | 91.45 |
484
+ +-----------+--------+-----------+--------+-------+-------+
485
+ 2025/09/21 11:00:38 - mmengine - INFO - Iter(test) [16/16] aAcc: 99.2500 mFscore: 96.0900 mPrecision: 96.7300 mRecall: 95.4700 mIoU: 92.7000 mAcc: 95.4700 data_time: 0.0690 time: 3.9244
ckpts/vitp_levir_upernet_7268/iter_80000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54ac1c9895902db02909169fc1aeb19b7c8732b777a42ad2a3d16876fc53da31
3
+ size 4569761364
ckpts/vitp_levir_upernet_7268/upernet_internvit_adp_levir.py ADDED
@@ -0,0 +1,344 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ crop_size = (
2
+ 256,
3
+ 256,
4
+ )
5
+ data_preprocessor = dict(
6
+ bgr_to_rgb=True,
7
+ mean=[
8
+ 123.675,
9
+ 116.28,
10
+ 103.53,
11
+ 123.675,
12
+ 116.28,
13
+ 103.53,
14
+ ],
15
+ pad_val=0,
16
+ seg_pad_val=255,
17
+ size_divisor=32,
18
+ std=[
19
+ 58.395,
20
+ 57.12,
21
+ 57.375,
22
+ 58.395,
23
+ 57.12,
24
+ 57.375,
25
+ ],
26
+ test_cfg=dict(size_divisor=32),
27
+ type='DualInputSegDataPreProcessor')
28
+ data_root = '/defaultShare/pubdata/remote_sensing/LEVIR-CD-256'
29
+ dataset_type = 'LEVIR_CD_Dataset'
30
+ default_hooks = dict(
31
+ checkpoint=dict(by_epoch=False, interval=8000, type='CheckpointHook'),
32
+ logger=dict(interval=500, log_metric_by_epoch=False, type='LoggerHook'),
33
+ param_scheduler=dict(type='ParamSchedulerHook'),
34
+ sampler_seed=dict(type='DistSamplerSeedHook'),
35
+ timer=dict(type='IterTimerHook'),
36
+ visualization=dict(interval=1, type='CDVisualizationHook'))
37
+ default_scope = 'opencd'
38
+ env_cfg = dict(
39
+ cudnn_benchmark=True,
40
+ dist_cfg=dict(backend='nccl'),
41
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
42
+ fp16 = dict(loss_scale=dict(init_scale=512))
43
+ img_ratios = [
44
+ 0.75,
45
+ 1.0,
46
+ 1.25,
47
+ ]
48
+ launcher = 'pytorch'
49
+ load_from = './work_dirs/upernet_internvit_adp_levir_cos/iter_80000.pth'
50
+ log_level = 'INFO'
51
+ log_processor = dict(by_epoch=False)
52
+ model = dict(
53
+ backbone=dict(
54
+ cffn_ratio=0.25,
55
+ deform_ratio=0.25,
56
+ depth=24,
57
+ drop_path_rate=0.1,
58
+ embed_dim=1024,
59
+ freeze_vit=False,
60
+ img_size=256,
61
+ init_values=0.1,
62
+ interaction_indexes=[
63
+ [
64
+ 0,
65
+ 7,
66
+ ],
67
+ [
68
+ 8,
69
+ 11,
70
+ ],
71
+ [
72
+ 12,
73
+ 15,
74
+ ],
75
+ [
76
+ 16,
77
+ 23,
78
+ ],
79
+ ],
80
+ layerscale_force_fp32=False,
81
+ mlp_ratio=4.0,
82
+ norm_type='layer_norm',
83
+ num_heads=16,
84
+ patch_size=16,
85
+ pretrain_size=448,
86
+ pretrained='pretrained/ViTP_ViT_L_300M_rs.safetensors',
87
+ pretrained_type='full',
88
+ qk_normalization=False,
89
+ qkv_bias=True,
90
+ type='InternViTAdapter',
91
+ use_final_norm=True,
92
+ use_flash_attn=False,
93
+ with_cp=True,
94
+ with_fpn=False),
95
+ data_preprocessor=dict(
96
+ bgr_to_rgb=True,
97
+ mean=[
98
+ 123.675,
99
+ 116.28,
100
+ 103.53,
101
+ 123.675,
102
+ 116.28,
103
+ 103.53,
104
+ ],
105
+ pad_val=0,
106
+ seg_pad_val=255,
107
+ size_divisor=32,
108
+ std=[
109
+ 58.395,
110
+ 57.12,
111
+ 57.375,
112
+ 58.395,
113
+ 57.12,
114
+ 57.375,
115
+ ],
116
+ test_cfg=dict(size_divisor=32),
117
+ type='DualInputSegDataPreProcessor'),
118
+ decode_head=dict(
119
+ align_corners=False,
120
+ channels=1024,
121
+ dropout_ratio=0.1,
122
+ in_channels=[
123
+ 2048,
124
+ 2048,
125
+ 2048,
126
+ 2048,
127
+ ],
128
+ in_index=[
129
+ 0,
130
+ 1,
131
+ 2,
132
+ 3,
133
+ ],
134
+ loss_decode=dict(
135
+ loss_weight=1.0, type='mmseg.CrossEntropyLoss', use_sigmoid=False),
136
+ norm_cfg=dict(requires_grad=True, type='SyncBN'),
137
+ num_classes=2,
138
+ pool_scales=(
139
+ 1,
140
+ 2,
141
+ 3,
142
+ 6,
143
+ ),
144
+ type='mmseg.UPerHead'),
145
+ neck=dict(policy='concat', type='FeatureFusionNeck'),
146
+ test_cfg=dict(crop_size=(
147
+ 256,
148
+ 256,
149
+ ), mode='slide', stride=(
150
+ 128,
151
+ 128,
152
+ )),
153
+ train_cfg=dict(),
154
+ type='SiamEncoderDecoder')
155
+ norm_cfg = dict(requires_grad=True, type='SyncBN')
156
+ optim_wrapper = dict(
157
+ clip_grad=None,
158
+ constructor='InternViTAdapterLayerDecayOptimizerConstructor',
159
+ optimizer=dict(
160
+ betas=(
161
+ 0.9,
162
+ 0.999,
163
+ ), lr=0.0001, type='AdamW', weight_decay=0.05),
164
+ paramwise_cfg=dict(layer_decay_rate=0.9, num_layers=24),
165
+ type='OptimWrapper')
166
+ optimizer = dict(lr=0.01, momentum=0.9, type='SGD', weight_decay=0.0005)
167
+ param_scheduler = [
168
+ dict(
169
+ begin=0, by_epoch=False, end=1500, start_factor=1e-06,
170
+ type='LinearLR'),
171
+ dict(
172
+ T_max=78500,
173
+ begin=1500,
174
+ by_epoch=False,
175
+ end=80000,
176
+ eta_min=0.0,
177
+ type='CosineAnnealingLR'),
178
+ ]
179
+ resume = False
180
+ test_cfg = dict(type='TestLoop')
181
+ test_dataloader = dict(
182
+ batch_size=1,
183
+ dataset=dict(
184
+ data_prefix=dict(
185
+ img_path_from='A', img_path_to='B', seg_map_path='label'),
186
+ data_root='/defaultShare/pubdata/remote_sensing/LEVIR_CD/test',
187
+ pipeline=[
188
+ dict(type='MultiImgLoadImageFromFile'),
189
+ dict(type='MultiImgLoadAnnotations'),
190
+ dict(type='MultiImgPackSegInputs'),
191
+ ],
192
+ type='LEVIR_CD_Dataset'),
193
+ num_workers=8,
194
+ persistent_workers=True,
195
+ sampler=dict(shuffle=False, type='DefaultSampler'))
196
+ test_evaluator = dict(
197
+ iou_metrics=[
198
+ 'mFscore',
199
+ 'mIoU',
200
+ ], type='mmseg.IoUMetric')
201
+ test_pipeline = [
202
+ dict(type='MultiImgLoadImageFromFile'),
203
+ dict(type='MultiImgLoadAnnotations'),
204
+ dict(type='MultiImgPackSegInputs'),
205
+ ]
206
+ train_cfg = dict(max_iters=80000, type='IterBasedTrainLoop', val_interval=8000)
207
+ train_dataloader = dict(
208
+ batch_size=4,
209
+ dataset=dict(
210
+ ann_file='list/train.txt',
211
+ data_prefix=dict(
212
+ img_path_from='A', img_path_to='B', seg_map_path='label'),
213
+ data_root='/defaultShare/pubdata/remote_sensing/LEVIR-CD-256',
214
+ pipeline=[
215
+ dict(type='MultiImgLoadImageFromFile'),
216
+ dict(type='MultiImgLoadAnnotations'),
217
+ dict(degree=180, prob=0.5, type='MultiImgRandomRotate'),
218
+ dict(
219
+ cat_max_ratio=0.75,
220
+ crop_size=(
221
+ 256,
222
+ 256,
223
+ ),
224
+ type='MultiImgRandomCrop'),
225
+ dict(direction='horizontal', prob=0.5, type='MultiImgRandomFlip'),
226
+ dict(direction='vertical', prob=0.5, type='MultiImgRandomFlip'),
227
+ dict(prob=0.5, type='MultiImgExchangeTime'),
228
+ dict(
229
+ brightness_delta=10,
230
+ contrast_range=(
231
+ 0.8,
232
+ 1.2,
233
+ ),
234
+ hue_delta=10,
235
+ saturation_range=(
236
+ 0.8,
237
+ 1.2,
238
+ ),
239
+ type='MultiImgPhotoMetricDistortion'),
240
+ dict(type='MultiImgPackSegInputs'),
241
+ ],
242
+ type='LEVIR_CD_Dataset'),
243
+ num_workers=8,
244
+ persistent_workers=True,
245
+ sampler=dict(shuffle=True, type='DefaultSampler'))
246
+ train_pipeline = [
247
+ dict(type='MultiImgLoadImageFromFile'),
248
+ dict(type='MultiImgLoadAnnotations'),
249
+ dict(degree=180, prob=0.5, type='MultiImgRandomRotate'),
250
+ dict(
251
+ cat_max_ratio=0.75, crop_size=(
252
+ 256,
253
+ 256,
254
+ ), type='MultiImgRandomCrop'),
255
+ dict(direction='horizontal', prob=0.5, type='MultiImgRandomFlip'),
256
+ dict(direction='vertical', prob=0.5, type='MultiImgRandomFlip'),
257
+ dict(prob=0.5, type='MultiImgExchangeTime'),
258
+ dict(
259
+ brightness_delta=10,
260
+ contrast_range=(
261
+ 0.8,
262
+ 1.2,
263
+ ),
264
+ hue_delta=10,
265
+ saturation_range=(
266
+ 0.8,
267
+ 1.2,
268
+ ),
269
+ type='MultiImgPhotoMetricDistortion'),
270
+ dict(type='MultiImgPackSegInputs'),
271
+ ]
272
+ tta_model = dict(type='mmseg.SegTTAModel')
273
+ tta_pipeline = [
274
+ dict(backend_args=None, type='MultiImgLoadImageFromFile'),
275
+ dict(
276
+ transforms=[
277
+ [
278
+ dict(
279
+ keep_ratio=True, scale_factor=0.75, type='MultiImgResize'),
280
+ dict(keep_ratio=True, scale_factor=1.0, type='MultiImgResize'),
281
+ dict(
282
+ keep_ratio=True, scale_factor=1.25, type='MultiImgResize'),
283
+ ],
284
+ [
285
+ dict(
286
+ direction='horizontal',
287
+ prob=0.0,
288
+ type='MultiImgRandomFlip'),
289
+ dict(
290
+ direction='horizontal',
291
+ prob=1.0,
292
+ type='MultiImgRandomFlip'),
293
+ ],
294
+ [
295
+ dict(type='MultiImgLoadAnnotations'),
296
+ ],
297
+ [
298
+ dict(type='MultiImgPackSegInputs'),
299
+ ],
300
+ ],
301
+ type='TestTimeAug'),
302
+ ]
303
+ val_cfg = dict(type='ValLoop')
304
+ val_dataloader = dict(
305
+ batch_size=1,
306
+ dataset=dict(
307
+ ann_file='list/test.txt',
308
+ data_prefix=dict(
309
+ img_path_from='A', img_path_to='B', seg_map_path='label'),
310
+ data_root='/defaultShare/pubdata/remote_sensing/LEVIR-CD-256',
311
+ pipeline=[
312
+ dict(type='MultiImgLoadImageFromFile'),
313
+ dict(type='MultiImgLoadAnnotations'),
314
+ dict(type='MultiImgPackSegInputs'),
315
+ ],
316
+ type='LEVIR_CD_Dataset'),
317
+ num_workers=8,
318
+ persistent_workers=True,
319
+ sampler=dict(shuffle=False, type='DefaultSampler'))
320
+ val_evaluator = dict(
321
+ iou_metrics=[
322
+ 'mFscore',
323
+ 'mIoU',
324
+ ], type='mmseg.IoUMetric')
325
+ val_pipeline = [
326
+ dict(type='MultiImgLoadImageFromFile'),
327
+ dict(keep_ratio=True, scale=(
328
+ 256,
329
+ 256,
330
+ ), type='MultiImgResize'),
331
+ dict(type='MultiImgLoadAnnotations'),
332
+ dict(type='MultiImgPackSegInputs'),
333
+ ]
334
+ vis_backends = [
335
+ dict(type='CDLocalVisBackend'),
336
+ ]
337
+ visualizer = dict(
338
+ alpha=1.0,
339
+ name='visualizer',
340
+ type='CDLocalVisualizer',
341
+ vis_backends=[
342
+ dict(type='CDLocalVisBackend'),
343
+ ])
344
+ work_dir = './work_dirs/upernet_internvit_adp_levir'
ckpts/vitp_loveda_upernet_5428/20250807_180314.log ADDED
The diff for this file is too large to render. See raw diff
 
ckpts/vitp_loveda_upernet_5428/20250807_180314.log.json ADDED
The diff for this file is too large to render. See raw diff
 
ckpts/vitp_loveda_upernet_5428/iter_80000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e6abbbb371c6c42da1d1ba6809404454a8a3bdbf14e27390f66ed040cfd5648
3
+ size 1426461989
ckpts/vitp_loveda_upernet_5428/vitp_loveda_upernet.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset_type = 'LoveDADataset'
2
+ data_root = '/defaultShare/pubdata/remote_sensing/loveda_dataset'
3
+ img_norm_cfg = dict(
4
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
5
+ crop_size = (512, 512)
6
+ train_pipeline = [
7
+ dict(type='LoadImageFromFile'),
8
+ dict(type='LoadAnnotations', reduce_zero_label=True),
9
+ dict(
10
+ type='Resize',
11
+ img_scale=(512, 512),
12
+ ratio_range=(0.5, 2.0),
13
+ keep_ratio=True),
14
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
15
+ dict(type='RandomFlip', prob=0.5),
16
+ dict(type='PhotoMetricDistortion'),
17
+ dict(
18
+ type='Normalize',
19
+ mean=[123.675, 116.28, 103.53],
20
+ std=[58.395, 57.12, 57.375],
21
+ to_rgb=True),
22
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255),
23
+ dict(type='DefaultFormatBundle'),
24
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
25
+ ]
26
+ test_pipeline = [
27
+ dict(type='LoadImageFromFile'),
28
+ dict(
29
+ type='MultiScaleFlipAug',
30
+ img_scale=(512, 512),
31
+ flip=False,
32
+ transforms=[
33
+ dict(type='Resize', keep_ratio=True),
34
+ dict(type='RandomFlip'),
35
+ dict(
36
+ type='Normalize',
37
+ mean=[123.675, 116.28, 103.53],
38
+ std=[58.395, 57.12, 57.375],
39
+ to_rgb=True),
40
+ dict(type='ImageToTensor', keys=['img']),
41
+ dict(type='Collect', keys=['img'])
42
+ ])
43
+ ]
44
+ data = dict(
45
+ samples_per_gpu=1,
46
+ workers_per_gpu=4,
47
+ train=dict(
48
+ type='LoveDADataset',
49
+ data_root='/defaultShare/pubdata/remote_sensing/loveda_dataset',
50
+ img_dir='trainval/images',
51
+ ann_dir='trainval/labels',
52
+ pipeline=[
53
+ dict(type='LoadImageFromFile'),
54
+ dict(type='LoadAnnotations', reduce_zero_label=True),
55
+ dict(
56
+ type='Resize',
57
+ img_scale=(512, 512),
58
+ ratio_range=(0.5, 2.0),
59
+ keep_ratio=True),
60
+ dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
61
+ dict(type='RandomFlip', prob=0.5),
62
+ dict(type='PhotoMetricDistortion'),
63
+ dict(
64
+ type='Normalize',
65
+ mean=[123.675, 116.28, 103.53],
66
+ std=[58.395, 57.12, 57.375],
67
+ to_rgb=True),
68
+ dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255),
69
+ dict(type='DefaultFormatBundle'),
70
+ dict(type='Collect', keys=['img', 'gt_semantic_seg'])
71
+ ]),
72
+ val=dict(
73
+ type='LoveDADataset',
74
+ data_root='/defaultShare/pubdata/remote_sensing/loveda_dataset',
75
+ img_dir='val/images',
76
+ ann_dir='val/labels',
77
+ pipeline=[
78
+ dict(type='LoadImageFromFile'),
79
+ dict(
80
+ type='MultiScaleFlipAug',
81
+ img_scale=(512, 512),
82
+ flip=False,
83
+ transforms=[
84
+ dict(type='Resize', keep_ratio=True),
85
+ dict(type='RandomFlip'),
86
+ dict(
87
+ type='Normalize',
88
+ mean=[123.675, 116.28, 103.53],
89
+ std=[58.395, 57.12, 57.375],
90
+ to_rgb=True),
91
+ dict(type='ImageToTensor', keys=['img']),
92
+ dict(type='Collect', keys=['img'])
93
+ ])
94
+ ]),
95
+ test=dict(
96
+ type='LoveDADataset',
97
+ data_root='/defaultShare/pubdata/remote_sensing/loveda_dataset',
98
+ img_dir='test/images',
99
+ ann_dir='test/labels',
100
+ pipeline=[
101
+ dict(type='LoadImageFromFile'),
102
+ dict(
103
+ type='MultiScaleFlipAug',
104
+ img_scale=(512, 512),
105
+ flip=False,
106
+ transforms=[
107
+ dict(type='Resize', keep_ratio=True),
108
+ dict(type='RandomFlip'),
109
+ dict(
110
+ type='Normalize',
111
+ mean=[123.675, 116.28, 103.53],
112
+ std=[58.395, 57.12, 57.375],
113
+ to_rgb=True),
114
+ dict(type='ImageToTensor', keys=['img']),
115
+ dict(type='Collect', keys=['img'])
116
+ ])
117
+ ]))
118
+ log_config = dict(
119
+ interval=50,
120
+ hooks=[
121
+ dict(type='TextLoggerHook', by_epoch=False),
122
+ dict(type='TensorboardLoggerHook')
123
+ ])
124
+ dist_params = dict(backend='nccl')
125
+ log_level = 'INFO'
126
+ load_from = None
127
+ resume_from = None
128
+ workflow = [('train', 1)]
129
+ cudnn_benchmark = True
130
+ optimizer = dict(
131
+ type='AdamW',
132
+ lr=1e-05,
133
+ betas=(0.9, 0.999),
134
+ weight_decay=0.1,
135
+ constructor='InternViTAdapterLayerDecayOptimizerConstructor',
136
+ paramwise_cfg=dict(num_layers=24, layer_decay_rate=0.9))
137
+ optimizer_config = dict()
138
+ lr_config = dict(
139
+ policy='CosineAnnealing',
140
+ warmup='linear',
141
+ warmup_iters=1500,
142
+ warmup_ratio=1e-06,
143
+ min_lr=0.0)
144
+ runner = dict(type='IterBasedRunner', max_iters=80000)
145
+ checkpoint_config = dict(by_epoch=False, interval=4000, max_keep_ckpts=10)
146
+ evaluation = dict(interval=4000, metric='mIoU', pre_eval=True, metrics='mIoU')
147
+ default_hooks = dict(
148
+ timer=dict(type='IterTimerHook'),
149
+ logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
150
+ sampler_seed=dict(type='DistSamplerSeedHook'),
151
+ visualization=dict(type='SegVisualizationHook', draw=True, interval=1000))
152
+ pretrained = 'pretrained/ViTP_ViT_L_300M_rs.safetensors'
153
+ norm_cfg = dict(type='SyncBN', requires_grad=True)
154
+ data_preprocessor = dict(
155
+ type='SegDataPreProcessor',
156
+ size=(512, 512),
157
+ mean=[123.675, 116.28, 103.53],
158
+ std=[58.395, 57.12, 57.375],
159
+ bgr_to_rgb=True,
160
+ pad_val=0,
161
+ seg_pad_val=255)
162
+ model = dict(
163
+ type='EncoderDecoder',
164
+ backbone=dict(
165
+ type='InternViTAdapter',
166
+ pretrain_size=448,
167
+ img_size=512,
168
+ patch_size=16,
169
+ embed_dim=1024,
170
+ depth=24,
171
+ num_heads=16,
172
+ mlp_ratio=4.0,
173
+ drop_path_rate=0.1,
174
+ init_values=0.1,
175
+ with_cp=True,
176
+ use_flash_attn=True,
177
+ qk_normalization=False,
178
+ layerscale_force_fp32=False,
179
+ with_fpn=False,
180
+ freeze_vit=False,
181
+ use_final_norm=True,
182
+ interaction_indexes=[[0, 7], [8, 11], [12, 15], [16, 23]],
183
+ cffn_ratio=0.25,
184
+ deform_ratio=0.25,
185
+ qkv_bias=True,
186
+ norm_type='layer_norm',
187
+ pretrained='pretrained/ViTP_ViT_L_300M_rs.safetensors',
188
+ pretrained_type='full'),
189
+ decode_head=dict(
190
+ type='UPerHead',
191
+ in_channels=[1024, 1024, 1024, 1024],
192
+ num_classes=7,
193
+ ignore_index=255,
194
+ in_index=[0, 1, 2, 3],
195
+ pool_scales=(1, 2, 3, 6),
196
+ channels=512,
197
+ dropout_ratio=0.1,
198
+ norm_cfg=dict(type='SyncBN', requires_grad=True),
199
+ align_corners=False,
200
+ loss_decode=dict(
201
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
202
+ train_cfg=dict(),
203
+ test_cfg=dict(mode='slide', stride=(384, 384), crop_size=(512, 512)))
204
+ fp16 = dict(loss_scale=dict(init_scale=512))
205
+ randomness = dict(seed=3407)
206
+ work_dir = './work_dirs/vitp_loveda_upernet'
207
+ gpu_ids = range(0, 8)
208
+ auto_resume = False
ckpts/vitp_rsar_orcnn_7231/20250716_042910.log ADDED
The diff for this file is too large to render. See raw diff
 
ckpts/vitp_rsar_orcnn_7231/20250716_042910.log.json ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"env_info": "sys.platform: linux\nPython: 3.10.16 (main, Dec 11 2024, 16:24:50) [GCC 11.2.0]\nCUDA available: True\nGPU 0,1,2,3,4,5,6,7: NVIDIA GeForce RTX 3090\nCUDA_HOME: /mnt/petrelfs/share_data/liqingyun/cuda/cuda-12.4/\nGCC: gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0\nPyTorch: 1.12.0\nPyTorch compiling details: PyTorch built with:\n - GCC 9.3\n - C++ Version: 201402\n - Intel(R) oneAPI Math Kernel Library Version 2024.0-Product Build 20231011 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v2.6.0 (Git Hash 52b5f107dd9cf10910aaa19cb47f3abf9b349815)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 11.3\n - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_37,code=compute_37\n - CuDNN 8.3.2 (built against CUDA 11.5)\n - Magma 2.5.2\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.3, CUDNN_VERSION=8.3.2, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.12.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=OFF, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, \n\nTorchVision: 0.13.0\nOpenCV: 4.11.0\nMMCV: 1.6.1\nMMCV Compiler: GCC 9.3\nMMCV CUDA Compiler: 11.4\nMMRotate: 0.3.4+6fc0c4e", "config": "dataset_type = 'RSARDataset'\ndata_root = '/liyuxuan/DATA/RSAR/'\nangle_version = 'le90'\nimg_norm_cfg = dict(\n mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n dict(type='LoadImageFromFile'),\n dict(type='LoadAnnotations', with_bbox=True),\n dict(type='RResize', img_scale=(800, 800), keep_ratio=False),\n dict(\n type='RRandomFlip',\n flip_ratio=[0.25, 0.25, 0.25],\n direction=['horizontal', 'vertical', 'diagonal'],\n version='le90'),\n dict(\n type='PolyRandomRotate',\n rotate_ratio=0.5,\n angles_range=180,\n auto_bound=False,\n rect_classes=[3],\n version='le90'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size_divisor=32),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])\n]\ntest_pipeline = [\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(800, 800),\n flip=False,\n transforms=[\n dict(type='RResize', img_scale=(800, 800), keep_ratio=False),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size_divisor=32),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img'])\n ])\n]\ndata = dict(\n samples_per_gpu=1,\n workers_per_gpu=4,\n train=dict(\n type='RSARDataset',\n ann_file='/liyuxuan/DATA/RSAR/train/annfiles/',\n img_prefix='/liyuxuan/DATA/RSAR/train/images/',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(type='LoadAnnotations', with_bbox=True),\n dict(type='RResize', img_scale=(800, 800), keep_ratio=False),\n dict(\n type='RRandomFlip',\n flip_ratio=[0.25, 0.25, 0.25],\n direction=['horizontal', 'vertical', 'diagonal'],\n version='le90'),\n dict(\n type='PolyRandomRotate',\n rotate_ratio=0.5,\n angles_range=180,\n auto_bound=False,\n rect_classes=[3],\n version='le90'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size_divisor=32),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])\n ],\n version='le90'),\n val=dict(\n type='RSARDataset',\n ann_file='/liyuxuan/DATA/RSAR/test/annfiles/',\n img_prefix='/liyuxuan/DATA/RSAR/test/images/',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(800, 800),\n flip=False,\n transforms=[\n dict(\n type='RResize', img_scale=(800, 800),\n keep_ratio=False),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size_divisor=32),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img'])\n ])\n ],\n version='le90'),\n test=dict(\n type='RSARDataset',\n ann_file='/liyuxuan/DATA/RSAR/test/images/',\n img_prefix='/liyuxuan/DATA/RSAR/test/images/',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(800, 800),\n flip=False,\n transforms=[\n dict(\n type='RResize', img_scale=(800, 800),\n keep_ratio=False),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size_divisor=32),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img'])\n ])\n ],\n version='le90'))\nevaluation = dict(interval=1, metric='mAP')\noptimizer = dict(\n type='AdamW',\n lr=2.5e-05,\n betas=(0.9, 0.999),\n weight_decay=0.05,\n constructor='LayerDecayOptimizerConstructor',\n paramwise_cfg=dict(num_layers=24, layer_decay_rate=0.75))\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\nlr_config = dict(\n policy='step',\n warmup='linear',\n warmup_iters=500,\n warmup_ratio=0.3333333333333333,\n step=[8, 11])\nrunner = dict(type='EpochBasedRunner', max_epochs=12)\ncheckpoint_config = dict(interval=1, max_keep_ckpts=1)\nlog_config = dict(interval=500, hooks=[dict(type='TextLoggerHook')])\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\nopencv_num_threads = 0\nmp_start_method = 'fork'\npretrained = 'pretrained/ft_full_1b_8ksteps_instruct_tuning_as_pretrain_TMAug75.safetensors'\nnorm_cfg = dict(type='LN', requires_grad=True)\nmodel = dict(\n type='OrientedRCNN',\n backbone=dict(\n type='InternViTAdapter',\n pretrain_size=448,\n img_size=800,\n patch_size=16,\n embed_dim=1024,\n depth=24,\n num_heads=16,\n mlp_ratio=4.0,\n drop_path_rate=0.1,\n init_values=0.1,\n with_cp=True,\n use_flash_attn=True,\n qk_normalization=False,\n layerscale_force_fp32=False,\n with_fpn=False,\n freeze_vit=False,\n use_final_norm=True,\n interaction_indexes=[[0, 7], [8, 11], [12, 15], [16, 23]],\n cffn_ratio=0.25,\n deform_ratio=0.25,\n qkv_bias=True,\n norm_type='layer_norm',\n pretrained=\n 'pretrained/ft_full_1b_8ksteps_instruct_tuning_as_pretrain_TMAug75.safetensors',\n pretrained_type='full',\n only_feat_out=True),\n neck=dict(\n type='SimpleFPN',\n in_channels=[1024, 1024, 1024, 1024],\n out_channels=256,\n norm_cfg=dict(type='LN', requires_grad=True),\n use_residual=False,\n num_outs=5),\n rpn_head=dict(\n type='OrientedRPNHead',\n in_channels=256,\n feat_channels=256,\n version='le90',\n anchor_generator=dict(\n type='AnchorGenerator',\n scales=[8],\n ratios=[0.5, 1.0, 2.0],\n strides=[4, 8, 16, 32, 64]),\n bbox_coder=dict(\n type='MidpointOffsetCoder',\n angle_range='le90',\n target_means=[0.0, 0.0, 0.0, 0.0, 0.0, 0.0],\n target_stds=[1.0, 1.0, 1.0, 1.0, 0.5, 0.5]),\n loss_cls=dict(\n type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n loss_bbox=dict(\n type='SmoothL1Loss', beta=0.1111111111111111, loss_weight=1.0)),\n roi_head=dict(\n type='OrientedStandardRoIHead',\n bbox_roi_extractor=dict(\n type='RotatedSingleRoIExtractor',\n roi_layer=dict(\n type='RoIAlignRotated',\n out_size=7,\n sample_num=2,\n clockwise=True),\n out_channels=256,\n featmap_strides=[4, 8, 16, 32]),\n bbox_head=dict(\n type='RotatedShared2FCBBoxHead',\n in_channels=256,\n fc_out_channels=1024,\n roi_feat_size=7,\n num_classes=6,\n bbox_coder=dict(\n type='DeltaXYWHAOBBoxCoder',\n angle_range='le90',\n norm_factor=None,\n edge_swap=True,\n proj_xy=True,\n target_means=(0.0, 0.0, 0.0, 0.0, 0.0),\n target_stds=(0.1, 0.1, 0.2, 0.2, 0.1)),\n reg_class_agnostic=True,\n loss_cls=dict(\n type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))),\n train_cfg=dict(\n rpn=dict(\n assigner=dict(\n type='MaxIoUAssigner',\n pos_iou_thr=0.7,\n neg_iou_thr=0.3,\n min_pos_iou=0.3,\n match_low_quality=True,\n gpu_assign_thr=1000,\n ignore_iof_thr=-1),\n sampler=dict(\n type='RandomSampler',\n num=256,\n pos_fraction=0.5,\n neg_pos_ub=-1,\n add_gt_as_proposals=False),\n allowed_border=0,\n pos_weight=-1,\n debug=False),\n rpn_proposal=dict(\n nms_pre=2000,\n max_per_img=2000,\n nms=dict(type='nms', iou_threshold=0.8),\n min_bbox_size=0),\n rcnn=dict(\n assigner=dict(\n type='MaxIoUAssigner',\n pos_iou_thr=0.5,\n neg_iou_thr=0.5,\n min_pos_iou=0.5,\n match_low_quality=False,\n gpu_assign_thr=1000,\n iou_calculator=dict(type='RBboxOverlaps2D'),\n ignore_iof_thr=-1),\n sampler=dict(\n type='RRandomSampler',\n num=512,\n pos_fraction=0.25,\n neg_pos_ub=-1,\n add_gt_as_proposals=True),\n pos_weight=-1,\n debug=False)),\n test_cfg=dict(\n rpn=dict(\n nms_pre=2000,\n max_per_img=2000,\n nms=dict(type='nms', iou_threshold=0.8),\n min_bbox_size=0),\n rcnn=dict(\n nms_pre=2000,\n min_bbox_size=0,\n score_thr=0.05,\n nms=dict(iou_thr=0.1),\n max_per_img=2000)))\nfp16 = dict(loss_scale=dict(init_scale=512))\nwork_dir = './work_dirs/rsar_8k_TMAug75_orcnn_8k'\nauto_resume = False\ngpu_ids = range(0, 8)\ndevice = 'cuda'\n", "seed": 0, "exp_name": "rsar_8k_TMAug75_orcnn_8k.py"}
2
+ {"mode": "train", "epoch": 1, "iter": 500, "lr": 2e-05, "memory": 9594, "data_time": 0.01252, "loss_rpn_cls": 0.14024, "loss_rpn_bbox": 0.04185, "loss_cls": 0.06949, "acc": 98.53896, "loss_bbox": 0.05972, "loss": 0.3113, "grad_norm": 2.83047, "time": 0.72335}
3
+ {"mode": "train", "epoch": 1, "iter": 1000, "lr": 3e-05, "memory": 9609, "data_time": 0.00319, "loss_rpn_cls": 0.03151, "loss_rpn_bbox": 0.03272, "loss_cls": 0.06674, "acc": 98.07568, "loss_bbox": 0.06939, "loss": 0.20037, "grad_norm": 3.47344, "time": 0.70378}
4
+ {"mode": "train", "epoch": 1, "iter": 1500, "lr": 3e-05, "memory": 9609, "data_time": 0.00326, "loss_rpn_cls": 0.03022, "loss_rpn_bbox": 0.03259, "loss_cls": 0.0768, "acc": 97.52959, "loss_bbox": 0.09163, "loss": 0.23124, "grad_norm": 4.1174, "time": 0.70302}
5
+ {"mode": "train", "epoch": 1, "iter": 2000, "lr": 3e-05, "memory": 9804, "data_time": 0.00329, "loss_rpn_cls": 0.02422, "loss_rpn_bbox": 0.02749, "loss_cls": 0.08185, "acc": 97.22637, "loss_bbox": 0.10107, "loss": 0.23464, "grad_norm": 4.28043, "time": 0.70249}
6
+ {"mode": "train", "epoch": 1, "iter": 2500, "lr": 3e-05, "memory": 9804, "data_time": 0.00323, "loss_rpn_cls": 0.02269, "loss_rpn_bbox": 0.02681, "loss_cls": 0.08763, "acc": 96.94111, "loss_bbox": 0.1116, "loss": 0.24873, "grad_norm": 4.45184, "time": 0.70266}
7
+ {"mode": "train", "epoch": 1, "iter": 3000, "lr": 3e-05, "memory": 9804, "data_time": 0.00318, "loss_rpn_cls": 0.02193, "loss_rpn_bbox": 0.02498, "loss_cls": 0.08141, "acc": 97.06045, "loss_bbox": 0.1079, "loss": 0.23622, "grad_norm": 4.20132, "time": 0.70263}
8
+ {"mode": "train", "epoch": 1, "iter": 3500, "lr": 3e-05, "memory": 9804, "data_time": 0.00313, "loss_rpn_cls": 0.01956, "loss_rpn_bbox": 0.02406, "loss_cls": 0.07905, "acc": 97.08457, "loss_bbox": 0.10492, "loss": 0.22758, "grad_norm": 3.9723, "time": 0.70249}
9
+ {"mode": "train", "epoch": 1, "iter": 4000, "lr": 3e-05, "memory": 9804, "data_time": 0.00306, "loss_rpn_cls": 0.01821, "loss_rpn_bbox": 0.02268, "loss_cls": 0.08242, "acc": 96.94653, "loss_bbox": 0.10654, "loss": 0.22985, "grad_norm": 4.01182, "time": 0.7028}
10
+ {"mode": "train", "epoch": 1, "iter": 4500, "lr": 3e-05, "memory": 9863, "data_time": 0.00303, "loss_rpn_cls": 0.02081, "loss_rpn_bbox": 0.02342, "loss_cls": 0.0812, "acc": 97.02617, "loss_bbox": 0.10412, "loss": 0.22954, "grad_norm": 3.8387, "time": 0.70321}
11
+ {"mode": "train", "epoch": 1, "iter": 5000, "lr": 3e-05, "memory": 9863, "data_time": 0.00311, "loss_rpn_cls": 0.01721, "loss_rpn_bbox": 0.02267, "loss_cls": 0.08074, "acc": 96.88999, "loss_bbox": 0.1063, "loss": 0.22692, "grad_norm": 3.90445, "time": 0.70261}
12
+ {"mode": "train", "epoch": 1, "iter": 5500, "lr": 3e-05, "memory": 10002, "data_time": 0.00323, "loss_rpn_cls": 0.01705, "loss_rpn_bbox": 0.02129, "loss_cls": 0.07841, "acc": 97.06235, "loss_bbox": 0.10166, "loss": 0.2184, "grad_norm": 3.74052, "time": 0.70236}
13
+ {"mode": "train", "epoch": 1, "iter": 6000, "lr": 3e-05, "memory": 10002, "data_time": 0.00317, "loss_rpn_cls": 0.01783, "loss_rpn_bbox": 0.02099, "loss_cls": 0.08222, "acc": 96.87183, "loss_bbox": 0.10329, "loss": 0.22433, "grad_norm": 3.64682, "time": 0.70259}
14
+ {"mode": "train", "epoch": 1, "iter": 6500, "lr": 3e-05, "memory": 10002, "data_time": 0.0032, "loss_rpn_cls": 0.01543, "loss_rpn_bbox": 0.01982, "loss_cls": 0.07824, "acc": 96.98604, "loss_bbox": 0.10209, "loss": 0.21558, "grad_norm": 3.73653, "time": 0.70237}
15
+ {"mode": "train", "epoch": 1, "iter": 7000, "lr": 3e-05, "memory": 10002, "data_time": 0.00315, "loss_rpn_cls": 0.01463, "loss_rpn_bbox": 0.01899, "loss_cls": 0.08044, "acc": 96.89829, "loss_bbox": 0.10384, "loss": 0.2179, "grad_norm": 3.7265, "time": 0.70222}
16
+ {"mode": "train", "epoch": 1, "iter": 7500, "lr": 3e-05, "memory": 10002, "data_time": 0.00314, "loss_rpn_cls": 0.01677, "loss_rpn_bbox": 0.01934, "loss_cls": 0.07842, "acc": 96.9874, "loss_bbox": 0.10063, "loss": 0.21516, "grad_norm": 3.47063, "time": 0.70232}
17
+ {"mode": "train", "epoch": 1, "iter": 8000, "lr": 3e-05, "memory": 10002, "data_time": 0.00307, "loss_rpn_cls": 0.01495, "loss_rpn_bbox": 0.01959, "loss_cls": 0.07729, "acc": 97.01699, "loss_bbox": 0.10029, "loss": 0.21212, "grad_norm": 3.4604, "time": 0.70215}
18
+ {"mode": "train", "epoch": 1, "iter": 8500, "lr": 3e-05, "memory": 10002, "data_time": 0.00308, "loss_rpn_cls": 0.01268, "loss_rpn_bbox": 0.01807, "loss_cls": 0.0763, "acc": 97.00757, "loss_bbox": 0.09835, "loss": 0.20541, "grad_norm": 3.35526, "time": 0.70215}
19
+ {"mode": "train", "epoch": 1, "iter": 9000, "lr": 3e-05, "memory": 10002, "data_time": 0.00307, "loss_rpn_cls": 0.01364, "loss_rpn_bbox": 0.01792, "loss_cls": 0.07642, "acc": 96.99141, "loss_bbox": 0.09923, "loss": 0.20722, "grad_norm": Infinity, "time": 0.70215}
20
+ {"mode": "train", "epoch": 1, "iter": 9500, "lr": 3e-05, "memory": 10002, "data_time": 0.00313, "loss_rpn_cls": 0.01348, "loss_rpn_bbox": 0.01866, "loss_cls": 0.07748, "acc": 96.94526, "loss_bbox": 0.10217, "loss": 0.21179, "grad_norm": 3.06029, "time": 0.70261}
21
+ {"mode": "val", "epoch": 1, "iter": 1068, "lr": 3e-05, "mAP": 0.54913}
22
+ {"mode": "train", "epoch": 2, "iter": 500, "lr": 3e-05, "memory": 10002, "data_time": 0.01001, "loss_rpn_cls": 0.01264, "loss_rpn_bbox": 0.01745, "loss_cls": 0.07466, "acc": 97.05649, "loss_bbox": 0.09693, "loss": 0.20169, "grad_norm": 3.06205, "time": 0.71073}
23
+ {"mode": "train", "epoch": 2, "iter": 1000, "lr": 3e-05, "memory": 10002, "data_time": 0.00363, "loss_rpn_cls": 0.01453, "loss_rpn_bbox": 0.01903, "loss_cls": 0.07678, "acc": 96.98926, "loss_bbox": 0.10161, "loss": 0.21195, "grad_norm": 2.81391, "time": 0.70426}
24
+ {"mode": "train", "epoch": 2, "iter": 1500, "lr": 3e-05, "memory": 10002, "data_time": 0.00342, "loss_rpn_cls": 0.01272, "loss_rpn_bbox": 0.01806, "loss_cls": 0.07582, "acc": 96.99722, "loss_bbox": 0.10491, "loss": 0.21151, "grad_norm": 2.69963, "time": 0.7033}
25
+ {"mode": "train", "epoch": 2, "iter": 2000, "lr": 3e-05, "memory": 10002, "data_time": 0.00317, "loss_rpn_cls": 0.01317, "loss_rpn_bbox": 0.01781, "loss_cls": 0.07812, "acc": 96.9375, "loss_bbox": 0.10101, "loss": 0.2101, "grad_norm": Infinity, "time": 0.70344}
26
+ {"mode": "train", "epoch": 2, "iter": 2500, "lr": 3e-05, "memory": 10002, "data_time": 0.0032, "loss_rpn_cls": 0.01183, "loss_rpn_bbox": 0.0188, "loss_cls": 0.07497, "acc": 97.01626, "loss_bbox": 0.10139, "loss": 0.20698, "grad_norm": Infinity, "time": 0.70333}
27
+ {"mode": "train", "epoch": 2, "iter": 3000, "lr": 3e-05, "memory": 10002, "data_time": 0.00309, "loss_rpn_cls": 0.01219, "loss_rpn_bbox": 0.01684, "loss_cls": 0.07539, "acc": 97.01655, "loss_bbox": 0.10069, "loss": 0.20511, "grad_norm": 2.86299, "time": 0.70291}
28
+ {"mode": "train", "epoch": 2, "iter": 3500, "lr": 3e-05, "memory": 10002, "data_time": 0.00318, "loss_rpn_cls": 0.01403, "loss_rpn_bbox": 0.01747, "loss_cls": 0.07667, "acc": 97.03076, "loss_bbox": 0.09864, "loss": 0.20681, "grad_norm": 2.91259, "time": 0.70318}
29
+ {"mode": "train", "epoch": 2, "iter": 4000, "lr": 3e-05, "memory": 10002, "data_time": 0.00318, "loss_rpn_cls": 0.01174, "loss_rpn_bbox": 0.01663, "loss_cls": 0.07314, "acc": 97.09561, "loss_bbox": 0.09782, "loss": 0.19933, "grad_norm": 2.3958, "time": 0.70322}
30
+ {"mode": "train", "epoch": 2, "iter": 4500, "lr": 3e-05, "memory": 10002, "data_time": 0.00321, "loss_rpn_cls": 0.01325, "loss_rpn_bbox": 0.01766, "loss_cls": 0.07407, "acc": 97.0645, "loss_bbox": 0.09748, "loss": 0.20247, "grad_norm": 2.62973, "time": 0.70296}
31
+ {"mode": "train", "epoch": 2, "iter": 5000, "lr": 3e-05, "memory": 10002, "data_time": 0.00325, "loss_rpn_cls": 0.01175, "loss_rpn_bbox": 0.01704, "loss_cls": 0.0751, "acc": 97.01191, "loss_bbox": 0.10018, "loss": 0.20407, "grad_norm": 2.54269, "time": 0.70272}
32
+ {"mode": "train", "epoch": 2, "iter": 5500, "lr": 3e-05, "memory": 10002, "data_time": 0.00318, "loss_rpn_cls": 0.01187, "loss_rpn_bbox": 0.01637, "loss_cls": 0.07613, "acc": 96.95884, "loss_bbox": 0.10095, "loss": 0.20532, "grad_norm": 2.58255, "time": 0.70285}
33
+ {"mode": "train", "epoch": 2, "iter": 6000, "lr": 3e-05, "memory": 10002, "data_time": 0.00318, "loss_rpn_cls": 0.01242, "loss_rpn_bbox": 0.0171, "loss_cls": 0.07773, "acc": 96.92734, "loss_bbox": 0.10004, "loss": 0.20729, "grad_norm": 2.56368, "time": 0.70258}
34
+ {"mode": "train", "epoch": 2, "iter": 6500, "lr": 3e-05, "memory": 10002, "data_time": 0.00322, "loss_rpn_cls": 0.01204, "loss_rpn_bbox": 0.01746, "loss_cls": 0.0758, "acc": 96.97783, "loss_bbox": 0.10155, "loss": 0.20685, "grad_norm": 2.38867, "time": 0.70278}
35
+ {"mode": "train", "epoch": 2, "iter": 7000, "lr": 3e-05, "memory": 10002, "data_time": 0.00322, "loss_rpn_cls": 0.01155, "loss_rpn_bbox": 0.01716, "loss_cls": 0.07403, "acc": 97.05576, "loss_bbox": 0.09605, "loss": 0.19879, "grad_norm": 2.26777, "time": 0.70241}
36
+ {"mode": "train", "epoch": 2, "iter": 7500, "lr": 3e-05, "memory": 10002, "data_time": 0.00335, "loss_rpn_cls": 0.01152, "loss_rpn_bbox": 0.01741, "loss_cls": 0.07043, "acc": 97.19365, "loss_bbox": 0.09526, "loss": 0.19462, "grad_norm": 2.26232, "time": 0.70225}
37
+ {"mode": "train", "epoch": 2, "iter": 8000, "lr": 3e-05, "memory": 10002, "data_time": 0.00329, "loss_rpn_cls": 0.01188, "loss_rpn_bbox": 0.01723, "loss_cls": 0.07551, "acc": 96.99136, "loss_bbox": 0.09941, "loss": 0.20404, "grad_norm": Infinity, "time": 0.70277}
38
+ {"mode": "train", "epoch": 2, "iter": 8500, "lr": 3e-05, "memory": 10002, "data_time": 0.00326, "loss_rpn_cls": 0.0112, "loss_rpn_bbox": 0.01575, "loss_cls": 0.07394, "acc": 97.05522, "loss_bbox": 0.09978, "loss": 0.20066, "grad_norm": 2.22689, "time": 0.7032}
39
+ {"mode": "train", "epoch": 2, "iter": 9000, "lr": 3e-05, "memory": 10002, "data_time": 0.00318, "loss_rpn_cls": 0.01149, "loss_rpn_bbox": 0.01683, "loss_cls": 0.07321, "acc": 97.07358, "loss_bbox": 0.09761, "loss": 0.19915, "grad_norm": 2.31324, "time": 0.70255}
40
+ {"mode": "train", "epoch": 2, "iter": 9500, "lr": 3e-05, "memory": 10002, "data_time": 0.00306, "loss_rpn_cls": 0.01133, "loss_rpn_bbox": 0.0162, "loss_cls": 0.07127, "acc": 97.15786, "loss_bbox": 0.09541, "loss": 0.19421, "grad_norm": 2.2412, "time": 0.70273}
41
+ {"mode": "val", "epoch": 2, "iter": 1068, "lr": 3e-05, "mAP": 0.53252}
42
+ {"mode": "train", "epoch": 3, "iter": 500, "lr": 3e-05, "memory": 10002, "data_time": 0.00981, "loss_rpn_cls": 0.00997, "loss_rpn_bbox": 0.01622, "loss_cls": 0.07148, "acc": 97.13555, "loss_bbox": 0.09721, "loss": 0.19488, "grad_norm": 2.10508, "time": 0.71277}
43
+ {"mode": "train", "epoch": 3, "iter": 1000, "lr": 3e-05, "memory": 10003, "data_time": 0.00315, "loss_rpn_cls": 0.01148, "loss_rpn_bbox": 0.01587, "loss_cls": 0.07184, "acc": 97.15791, "loss_bbox": 0.09583, "loss": 0.19502, "grad_norm": 2.16656, "time": 0.70523}
44
+ {"mode": "train", "epoch": 3, "iter": 1500, "lr": 3e-05, "memory": 10003, "data_time": 0.00307, "loss_rpn_cls": 0.01019, "loss_rpn_bbox": 0.01632, "loss_cls": 0.07246, "acc": 97.08247, "loss_bbox": 0.0952, "loss": 0.19417, "grad_norm": 2.15082, "time": 0.70447}
45
+ {"mode": "train", "epoch": 3, "iter": 2000, "lr": 3e-05, "memory": 10003, "data_time": 0.00304, "loss_rpn_cls": 0.00923, "loss_rpn_bbox": 0.01648, "loss_cls": 0.07168, "acc": 97.10303, "loss_bbox": 0.09521, "loss": 0.19261, "grad_norm": 2.20279, "time": 0.70484}
46
+ {"mode": "train", "epoch": 3, "iter": 2500, "lr": 3e-05, "memory": 10003, "data_time": 0.00311, "loss_rpn_cls": 0.01206, "loss_rpn_bbox": 0.01617, "loss_cls": 0.07087, "acc": 97.18853, "loss_bbox": 0.09401, "loss": 0.19311, "grad_norm": 2.11463, "time": 0.70431}
47
+ {"mode": "train", "epoch": 3, "iter": 3000, "lr": 3e-05, "memory": 10003, "data_time": 0.0031, "loss_rpn_cls": 0.01025, "loss_rpn_bbox": 0.01513, "loss_cls": 0.0698, "acc": 97.18262, "loss_bbox": 0.09556, "loss": 0.19074, "grad_norm": Infinity, "time": 0.70573}
48
+ {"mode": "train", "epoch": 3, "iter": 3500, "lr": 3e-05, "memory": 10003, "data_time": 0.00319, "loss_rpn_cls": 0.00986, "loss_rpn_bbox": 0.01545, "loss_cls": 0.07453, "acc": 96.99062, "loss_bbox": 0.10101, "loss": 0.20086, "grad_norm": 2.13368, "time": 0.70543}
49
+ {"mode": "train", "epoch": 3, "iter": 4000, "lr": 3e-05, "memory": 10003, "data_time": 0.00318, "loss_rpn_cls": 0.00933, "loss_rpn_bbox": 0.01608, "loss_cls": 0.07074, "acc": 97.16372, "loss_bbox": 0.0962, "loss": 0.19235, "grad_norm": 2.08251, "time": 0.70585}
50
+ {"mode": "train", "epoch": 3, "iter": 4500, "lr": 3e-05, "memory": 10003, "data_time": 0.0031, "loss_rpn_cls": 0.0093, "loss_rpn_bbox": 0.0148, "loss_cls": 0.06874, "acc": 97.23203, "loss_bbox": 0.09521, "loss": 0.18805, "grad_norm": 2.01941, "time": 0.70526}
51
+ {"mode": "train", "epoch": 3, "iter": 5000, "lr": 3e-05, "memory": 10003, "data_time": 0.00308, "loss_rpn_cls": 0.00913, "loss_rpn_bbox": 0.01516, "loss_cls": 0.06954, "acc": 97.19756, "loss_bbox": 0.09339, "loss": 0.18722, "grad_norm": Infinity, "time": 0.70571}
52
+ {"mode": "train", "epoch": 3, "iter": 5500, "lr": 3e-05, "memory": 10003, "data_time": 0.00314, "loss_rpn_cls": 0.00987, "loss_rpn_bbox": 0.01553, "loss_cls": 0.07124, "acc": 97.12363, "loss_bbox": 0.09675, "loss": 0.1934, "grad_norm": 1.8974, "time": 0.70542}
53
+ {"mode": "train", "epoch": 3, "iter": 6000, "lr": 3e-05, "memory": 10003, "data_time": 0.00312, "loss_rpn_cls": 0.00988, "loss_rpn_bbox": 0.01508, "loss_cls": 0.07057, "acc": 97.13765, "loss_bbox": 0.09445, "loss": 0.18997, "grad_norm": 1.98686, "time": 0.7051}
54
+ {"mode": "train", "epoch": 3, "iter": 6500, "lr": 3e-05, "memory": 10003, "data_time": 0.00311, "loss_rpn_cls": 0.00969, "loss_rpn_bbox": 0.0159, "loss_cls": 0.07231, "acc": 97.1084, "loss_bbox": 0.09718, "loss": 0.19508, "grad_norm": 1.90614, "time": 0.70588}
55
+ {"mode": "train", "epoch": 3, "iter": 7000, "lr": 3e-05, "memory": 10003, "data_time": 0.00299, "loss_rpn_cls": 0.00856, "loss_rpn_bbox": 0.01414, "loss_cls": 0.06858, "acc": 97.20181, "loss_bbox": 0.095, "loss": 0.18629, "grad_norm": 1.8433, "time": 0.70592}
56
+ {"mode": "train", "epoch": 3, "iter": 7500, "lr": 3e-05, "memory": 10003, "data_time": 0.00296, "loss_rpn_cls": 0.01094, "loss_rpn_bbox": 0.01469, "loss_cls": 0.07238, "acc": 97.1252, "loss_bbox": 0.0941, "loss": 0.19212, "grad_norm": Infinity, "time": 0.7066}
57
+ {"mode": "train", "epoch": 3, "iter": 8000, "lr": 3e-05, "memory": 10003, "data_time": 0.00302, "loss_rpn_cls": 0.00917, "loss_rpn_bbox": 0.01499, "loss_cls": 0.07211, "acc": 97.07046, "loss_bbox": 0.09669, "loss": 0.19296, "grad_norm": 1.8321, "time": 0.70517}
58
+ {"mode": "train", "epoch": 3, "iter": 8500, "lr": 3e-05, "memory": 10003, "data_time": 0.00296, "loss_rpn_cls": 0.0107, "loss_rpn_bbox": 0.016, "loss_cls": 0.07017, "acc": 97.21187, "loss_bbox": 0.09263, "loss": 0.1895, "grad_norm": Infinity, "time": 0.70567}
59
+ {"mode": "train", "epoch": 3, "iter": 9000, "lr": 3e-05, "memory": 10003, "data_time": 0.00304, "loss_rpn_cls": 0.00963, "loss_rpn_bbox": 0.01576, "loss_cls": 0.06813, "acc": 97.2709, "loss_bbox": 0.0945, "loss": 0.18802, "grad_norm": 1.82404, "time": 0.70469}
60
+ {"mode": "train", "epoch": 3, "iter": 9500, "lr": 3e-05, "memory": 10003, "data_time": 0.00314, "loss_rpn_cls": 0.01016, "loss_rpn_bbox": 0.01574, "loss_cls": 0.06758, "acc": 97.31445, "loss_bbox": 0.09189, "loss": 0.18537, "grad_norm": 1.73728, "time": 0.70551}
61
+ {"mode": "val", "epoch": 3, "iter": 1068, "lr": 3e-05, "mAP": 0.6086}
62
+ {"mode": "train", "epoch": 4, "iter": 500, "lr": 3e-05, "memory": 10003, "data_time": 0.01032, "loss_rpn_cls": 0.00869, "loss_rpn_bbox": 0.01404, "loss_cls": 0.07056, "acc": 97.15483, "loss_bbox": 0.09609, "loss": 0.18937, "grad_norm": 1.8767, "time": 0.71473}
63
+ {"mode": "train", "epoch": 4, "iter": 1000, "lr": 3e-05, "memory": 10003, "data_time": 0.00329, "loss_rpn_cls": 0.00907, "loss_rpn_bbox": 0.01438, "loss_cls": 0.067, "acc": 97.28423, "loss_bbox": 0.09107, "loss": 0.18152, "grad_norm": 1.75521, "time": 0.70711}
64
+ {"mode": "train", "epoch": 4, "iter": 1500, "lr": 3e-05, "memory": 10003, "data_time": 0.00318, "loss_rpn_cls": 0.00821, "loss_rpn_bbox": 0.01507, "loss_cls": 0.06811, "acc": 97.21646, "loss_bbox": 0.0941, "loss": 0.18549, "grad_norm": 1.79809, "time": 0.7077}
65
+ {"mode": "train", "epoch": 4, "iter": 2000, "lr": 3e-05, "memory": 10003, "data_time": 0.00307, "loss_rpn_cls": 0.00864, "loss_rpn_bbox": 0.0144, "loss_cls": 0.06952, "acc": 97.19336, "loss_bbox": 0.09287, "loss": 0.18542, "grad_norm": 1.81303, "time": 0.70657}
66
+ {"mode": "train", "epoch": 4, "iter": 2500, "lr": 3e-05, "memory": 10003, "data_time": 0.00309, "loss_rpn_cls": 0.0091, "loss_rpn_bbox": 0.01468, "loss_cls": 0.07162, "acc": 97.10884, "loss_bbox": 0.09516, "loss": 0.19055, "grad_norm": 1.80157, "time": 0.70669}
67
+ {"mode": "train", "epoch": 4, "iter": 3000, "lr": 3e-05, "memory": 10003, "data_time": 0.00315, "loss_rpn_cls": 0.00997, "loss_rpn_bbox": 0.01518, "loss_cls": 0.07082, "acc": 97.13716, "loss_bbox": 0.09598, "loss": 0.19195, "grad_norm": Infinity, "time": 0.70753}
68
+ {"mode": "train", "epoch": 4, "iter": 3500, "lr": 3e-05, "memory": 10003, "data_time": 0.00312, "loss_rpn_cls": 0.00887, "loss_rpn_bbox": 0.01468, "loss_cls": 0.06601, "acc": 97.33809, "loss_bbox": 0.09029, "loss": 0.17985, "grad_norm": 1.69034, "time": 0.70737}
69
+ {"mode": "train", "epoch": 4, "iter": 4000, "lr": 3e-05, "memory": 10003, "data_time": 0.00309, "loss_rpn_cls": 0.00873, "loss_rpn_bbox": 0.01396, "loss_cls": 0.06786, "acc": 97.22559, "loss_bbox": 0.09471, "loss": 0.18526, "grad_norm": 1.71019, "time": 0.70798}
70
+ {"mode": "train", "epoch": 4, "iter": 4500, "lr": 3e-05, "memory": 10003, "data_time": 0.0031, "loss_rpn_cls": 0.0089, "loss_rpn_bbox": 0.01543, "loss_cls": 0.0673, "acc": 97.28882, "loss_bbox": 0.09298, "loss": 0.1846, "grad_norm": 1.73542, "time": 0.70716}
71
+ {"mode": "train", "epoch": 4, "iter": 5000, "lr": 3e-05, "memory": 10003, "data_time": 0.00304, "loss_rpn_cls": 0.00868, "loss_rpn_bbox": 0.01478, "loss_cls": 0.06884, "acc": 97.20615, "loss_bbox": 0.09509, "loss": 0.18739, "grad_norm": Infinity, "time": 0.7072}
72
+ {"mode": "train", "epoch": 4, "iter": 5500, "lr": 3e-05, "memory": 10003, "data_time": 0.0031, "loss_rpn_cls": 0.00945, "loss_rpn_bbox": 0.01537, "loss_cls": 0.0675, "acc": 97.27456, "loss_bbox": 0.09177, "loss": 0.18408, "grad_norm": 1.70174, "time": 0.7061}
73
+ {"mode": "train", "epoch": 4, "iter": 6000, "lr": 3e-05, "memory": 10003, "data_time": 0.00302, "loss_rpn_cls": 0.0083, "loss_rpn_bbox": 0.01522, "loss_cls": 0.06722, "acc": 97.24316, "loss_bbox": 0.09413, "loss": 0.18486, "grad_norm": 1.58961, "time": 0.70672}
74
+ {"mode": "train", "epoch": 4, "iter": 6500, "lr": 3e-05, "memory": 10003, "data_time": 0.00299, "loss_rpn_cls": 0.00867, "loss_rpn_bbox": 0.01511, "loss_cls": 0.06884, "acc": 97.21025, "loss_bbox": 0.09541, "loss": 0.18803, "grad_norm": 1.74774, "time": 0.70614}
75
+ {"mode": "train", "epoch": 4, "iter": 7000, "lr": 3e-05, "memory": 10003, "data_time": 0.00298, "loss_rpn_cls": 0.00855, "loss_rpn_bbox": 0.01443, "loss_cls": 0.06406, "acc": 97.42847, "loss_bbox": 0.08737, "loss": 0.17441, "grad_norm": 1.55191, "time": 0.70692}
76
+ {"mode": "train", "epoch": 4, "iter": 7500, "lr": 3e-05, "memory": 10003, "data_time": 0.00311, "loss_rpn_cls": 0.00791, "loss_rpn_bbox": 0.01405, "loss_cls": 0.06507, "acc": 97.36416, "loss_bbox": 0.09107, "loss": 0.1781, "grad_norm": Infinity, "time": 0.70683}
77
+ {"mode": "train", "epoch": 4, "iter": 8000, "lr": 3e-05, "memory": 10003, "data_time": 0.00293, "loss_rpn_cls": 0.00856, "loss_rpn_bbox": 0.01447, "loss_cls": 0.06834, "acc": 97.2417, "loss_bbox": 0.09381, "loss": 0.18517, "grad_norm": 1.60042, "time": 0.70698}
78
+ {"mode": "train", "epoch": 4, "iter": 8500, "lr": 3e-05, "memory": 10003, "data_time": 0.003, "loss_rpn_cls": 0.00841, "loss_rpn_bbox": 0.01396, "loss_cls": 0.06623, "acc": 97.31113, "loss_bbox": 0.0903, "loss": 0.17891, "grad_norm": Infinity, "time": 0.70621}
79
+ {"mode": "train", "epoch": 4, "iter": 9000, "lr": 3e-05, "memory": 10003, "data_time": 0.00315, "loss_rpn_cls": 0.00782, "loss_rpn_bbox": 0.01505, "loss_cls": 0.06803, "acc": 97.20898, "loss_bbox": 0.09263, "loss": 0.18354, "grad_norm": 1.5997, "time": 0.70617}
80
+ {"mode": "train", "epoch": 4, "iter": 9500, "lr": 3e-05, "memory": 10003, "data_time": 0.00305, "loss_rpn_cls": 0.00786, "loss_rpn_bbox": 0.01421, "loss_cls": 0.06668, "acc": 97.28804, "loss_bbox": 0.09168, "loss": 0.18044, "grad_norm": 1.6306, "time": 0.70627}
81
+ {"mode": "val", "epoch": 4, "iter": 1068, "lr": 3e-05, "mAP": 0.60241}
82
+ {"mode": "train", "epoch": 5, "iter": 500, "lr": 3e-05, "memory": 10004, "data_time": 0.00982, "loss_rpn_cls": 0.00797, "loss_rpn_bbox": 0.0148, "loss_cls": 0.06447, "acc": 97.35972, "loss_bbox": 0.0921, "loss": 0.17934, "grad_norm": 1.54504, "time": 0.71315}
83
+ {"mode": "train", "epoch": 5, "iter": 1000, "lr": 3e-05, "memory": 10004, "data_time": 0.00318, "loss_rpn_cls": 0.0074, "loss_rpn_bbox": 0.01338, "loss_cls": 0.06703, "acc": 97.26191, "loss_bbox": 0.09351, "loss": 0.18133, "grad_norm": 1.64228, "time": 0.70567}
84
+ {"mode": "train", "epoch": 5, "iter": 1500, "lr": 3e-05, "memory": 10004, "data_time": 0.00313, "loss_rpn_cls": 0.00752, "loss_rpn_bbox": 0.01398, "loss_cls": 0.06632, "acc": 97.3188, "loss_bbox": 0.09033, "loss": 0.17815, "grad_norm": Infinity, "time": 0.70605}
85
+ {"mode": "train", "epoch": 5, "iter": 2000, "lr": 3e-05, "memory": 10004, "data_time": 0.00316, "loss_rpn_cls": 0.00822, "loss_rpn_bbox": 0.01453, "loss_cls": 0.06723, "acc": 97.26997, "loss_bbox": 0.09379, "loss": 0.18376, "grad_norm": 1.57531, "time": 0.70583}
86
+ {"mode": "train", "epoch": 5, "iter": 2500, "lr": 3e-05, "memory": 10004, "data_time": 0.00315, "loss_rpn_cls": 0.00659, "loss_rpn_bbox": 0.01368, "loss_cls": 0.06615, "acc": 97.30889, "loss_bbox": 0.09161, "loss": 0.17803, "grad_norm": 1.47328, "time": 0.70617}
87
+ {"mode": "train", "epoch": 5, "iter": 3000, "lr": 3e-05, "memory": 10004, "data_time": 0.00317, "loss_rpn_cls": 0.00753, "loss_rpn_bbox": 0.01355, "loss_cls": 0.06472, "acc": 97.35776, "loss_bbox": 0.09029, "loss": 0.1761, "grad_norm": 1.4743, "time": 0.70599}
88
+ {"mode": "train", "epoch": 5, "iter": 3500, "lr": 3e-05, "memory": 10004, "data_time": 0.00341, "loss_rpn_cls": 0.00867, "loss_rpn_bbox": 0.0137, "loss_cls": 0.06716, "acc": 97.30776, "loss_bbox": 0.09246, "loss": 0.18198, "grad_norm": 1.57557, "time": 0.7047}
89
+ {"mode": "train", "epoch": 5, "iter": 4000, "lr": 3e-05, "memory": 10004, "data_time": 0.00329, "loss_rpn_cls": 0.00725, "loss_rpn_bbox": 0.01336, "loss_cls": 0.06399, "acc": 97.36479, "loss_bbox": 0.09183, "loss": 0.17643, "grad_norm": 1.53289, "time": 0.7041}
90
+ {"mode": "train", "epoch": 5, "iter": 4500, "lr": 3e-05, "memory": 10004, "data_time": 0.0032, "loss_rpn_cls": 0.0077, "loss_rpn_bbox": 0.01343, "loss_cls": 0.06722, "acc": 97.27275, "loss_bbox": 0.0919, "loss": 0.18025, "grad_norm": 1.64129, "time": 0.70387}
91
+ {"mode": "train", "epoch": 5, "iter": 5000, "lr": 3e-05, "memory": 10004, "data_time": 0.00322, "loss_rpn_cls": 0.00719, "loss_rpn_bbox": 0.01408, "loss_cls": 0.06313, "acc": 97.4249, "loss_bbox": 0.09242, "loss": 0.17681, "grad_norm": 1.48663, "time": 0.70352}
92
+ {"mode": "train", "epoch": 5, "iter": 5500, "lr": 3e-05, "memory": 10004, "data_time": 0.0032, "loss_rpn_cls": 0.00771, "loss_rpn_bbox": 0.01382, "loss_cls": 0.06601, "acc": 97.30776, "loss_bbox": 0.09194, "loss": 0.17947, "grad_norm": 1.44187, "time": 0.70352}
93
+ {"mode": "train", "epoch": 5, "iter": 6000, "lr": 3e-05, "memory": 10004, "data_time": 0.00319, "loss_rpn_cls": 0.00798, "loss_rpn_bbox": 0.01414, "loss_cls": 0.06534, "acc": 97.36636, "loss_bbox": 0.0932, "loss": 0.18065, "grad_norm": NaN, "time": 0.70363}
94
+ {"mode": "train", "epoch": 5, "iter": 6500, "lr": 3e-05, "memory": 10004, "data_time": 0.00316, "loss_rpn_cls": 0.00667, "loss_rpn_bbox": 0.01421, "loss_cls": 0.06538, "acc": 97.32427, "loss_bbox": 0.09177, "loss": 0.17802, "grad_norm": 1.51969, "time": 0.70379}
95
+ {"mode": "train", "epoch": 5, "iter": 7000, "lr": 3e-05, "memory": 10004, "data_time": 0.00314, "loss_rpn_cls": 0.00796, "loss_rpn_bbox": 0.01494, "loss_cls": 0.06666, "acc": 97.28408, "loss_bbox": 0.09254, "loss": 0.18209, "grad_norm": 1.49924, "time": 0.70353}
96
+ {"mode": "train", "epoch": 5, "iter": 7500, "lr": 3e-05, "memory": 10004, "data_time": 0.00322, "loss_rpn_cls": 0.00761, "loss_rpn_bbox": 0.01475, "loss_cls": 0.06705, "acc": 97.26299, "loss_bbox": 0.09411, "loss": 0.18353, "grad_norm": 1.49761, "time": 0.70387}
97
+ {"mode": "train", "epoch": 5, "iter": 8000, "lr": 3e-05, "memory": 10004, "data_time": 0.00319, "loss_rpn_cls": 0.00806, "loss_rpn_bbox": 0.01437, "loss_cls": 0.06515, "acc": 97.33442, "loss_bbox": 0.09193, "loss": 0.17951, "grad_norm": 1.39995, "time": 0.70343}
98
+ {"mode": "train", "epoch": 5, "iter": 8500, "lr": 3e-05, "memory": 10004, "data_time": 0.00311, "loss_rpn_cls": 0.00788, "loss_rpn_bbox": 0.0141, "loss_cls": 0.06625, "acc": 97.28555, "loss_bbox": 0.09326, "loss": 0.18149, "grad_norm": 1.48954, "time": 0.70349}
99
+ {"mode": "train", "epoch": 5, "iter": 9000, "lr": 3e-05, "memory": 10004, "data_time": 0.00309, "loss_rpn_cls": 0.00756, "loss_rpn_bbox": 0.01379, "loss_cls": 0.06828, "acc": 97.23066, "loss_bbox": 0.09411, "loss": 0.18374, "grad_norm": 1.44517, "time": 0.704}
100
+ {"mode": "train", "epoch": 5, "iter": 9500, "lr": 3e-05, "memory": 10004, "data_time": 0.00312, "loss_rpn_cls": 0.00687, "loss_rpn_bbox": 0.01412, "loss_cls": 0.0643, "acc": 97.37168, "loss_bbox": 0.09014, "loss": 0.17544, "grad_norm": NaN, "time": 0.70304}
101
+ {"mode": "val", "epoch": 5, "iter": 1068, "lr": 3e-05, "mAP": 0.635}
102
+ {"mode": "train", "epoch": 6, "iter": 500, "lr": 3e-05, "memory": 10004, "data_time": 0.00969, "loss_rpn_cls": 0.00758, "loss_rpn_bbox": 0.01393, "loss_cls": 0.06761, "acc": 97.24829, "loss_bbox": 0.09384, "loss": 0.18295, "grad_norm": 1.52964, "time": 0.7108}
103
+ {"mode": "train", "epoch": 6, "iter": 1000, "lr": 3e-05, "memory": 10004, "data_time": 0.00317, "loss_rpn_cls": 0.00741, "loss_rpn_bbox": 0.01353, "loss_cls": 0.06772, "acc": 97.2397, "loss_bbox": 0.09614, "loss": 0.18481, "grad_norm": 1.50269, "time": 0.70345}
104
+ {"mode": "train", "epoch": 6, "iter": 1500, "lr": 3e-05, "memory": 10004, "data_time": 0.00317, "loss_rpn_cls": 0.00773, "loss_rpn_bbox": 0.01487, "loss_cls": 0.06721, "acc": 97.20547, "loss_bbox": 0.09557, "loss": 0.18539, "grad_norm": 1.4774, "time": 0.70405}
105
+ {"mode": "train", "epoch": 6, "iter": 2000, "lr": 3e-05, "memory": 10004, "data_time": 0.00314, "loss_rpn_cls": 0.00694, "loss_rpn_bbox": 0.01344, "loss_cls": 0.06499, "acc": 97.33574, "loss_bbox": 0.09222, "loss": 0.17759, "grad_norm": 1.42718, "time": 0.70324}
106
+ {"mode": "train", "epoch": 6, "iter": 2500, "lr": 3e-05, "memory": 10004, "data_time": 0.00315, "loss_rpn_cls": 0.00729, "loss_rpn_bbox": 0.01414, "loss_cls": 0.06448, "acc": 97.371, "loss_bbox": 0.09273, "loss": 0.17864, "grad_norm": 1.40054, "time": 0.7034}
107
+ {"mode": "train", "epoch": 6, "iter": 3000, "lr": 3e-05, "memory": 10004, "data_time": 0.00316, "loss_rpn_cls": 0.00631, "loss_rpn_bbox": 0.01256, "loss_cls": 0.06312, "acc": 97.39292, "loss_bbox": 0.09029, "loss": 0.17228, "grad_norm": Infinity, "time": 0.70377}
108
+ {"mode": "train", "epoch": 6, "iter": 3500, "lr": 3e-05, "memory": 10004, "data_time": 0.00306, "loss_rpn_cls": 0.00797, "loss_rpn_bbox": 0.01365, "loss_cls": 0.06694, "acc": 97.28818, "loss_bbox": 0.09198, "loss": 0.18055, "grad_norm": 1.46973, "time": 0.70329}
109
+ {"mode": "train", "epoch": 6, "iter": 4000, "lr": 3e-05, "memory": 10004, "data_time": 0.00315, "loss_rpn_cls": 0.0067, "loss_rpn_bbox": 0.01313, "loss_cls": 0.06606, "acc": 97.29551, "loss_bbox": 0.09322, "loss": 0.1791, "grad_norm": 1.4166, "time": 0.70342}
110
+ {"mode": "train", "epoch": 6, "iter": 4500, "lr": 3e-05, "memory": 10004, "data_time": 0.00317, "loss_rpn_cls": 0.00608, "loss_rpn_bbox": 0.01367, "loss_cls": 0.06253, "acc": 97.42217, "loss_bbox": 0.09013, "loss": 0.17241, "grad_norm": 1.3777, "time": 0.70328}
111
+ {"mode": "train", "epoch": 6, "iter": 5000, "lr": 3e-05, "memory": 10004, "data_time": 0.00316, "loss_rpn_cls": 0.00722, "loss_rpn_bbox": 0.01366, "loss_cls": 0.06422, "acc": 97.36304, "loss_bbox": 0.08906, "loss": 0.17416, "grad_norm": NaN, "time": 0.70365}
112
+ {"mode": "train", "epoch": 6, "iter": 5500, "lr": 3e-05, "memory": 10004, "data_time": 0.00313, "loss_rpn_cls": 0.00645, "loss_rpn_bbox": 0.01318, "loss_cls": 0.06122, "acc": 97.50303, "loss_bbox": 0.0903, "loss": 0.17115, "grad_norm": 1.33215, "time": 0.70295}
113
+ {"mode": "train", "epoch": 6, "iter": 6000, "lr": 3e-05, "memory": 10004, "data_time": 0.00314, "loss_rpn_cls": 0.00727, "loss_rpn_bbox": 0.01344, "loss_cls": 0.06388, "acc": 97.36821, "loss_bbox": 0.0907, "loss": 0.17529, "grad_norm": 1.3809, "time": 0.70382}
114
+ {"mode": "train", "epoch": 6, "iter": 6500, "lr": 3e-05, "memory": 10004, "data_time": 0.00319, "loss_rpn_cls": 0.00633, "loss_rpn_bbox": 0.01213, "loss_cls": 0.06113, "acc": 97.52168, "loss_bbox": 0.08842, "loss": 0.16801, "grad_norm": 1.34015, "time": 0.70386}
115
+ {"mode": "train", "epoch": 6, "iter": 7000, "lr": 3e-05, "memory": 10004, "data_time": 0.0031, "loss_rpn_cls": 0.00712, "loss_rpn_bbox": 0.01389, "loss_cls": 0.06616, "acc": 97.30811, "loss_bbox": 0.09317, "loss": 0.18034, "grad_norm": 1.39726, "time": 0.70336}
116
+ {"mode": "train", "epoch": 6, "iter": 7500, "lr": 3e-05, "memory": 10004, "data_time": 0.00314, "loss_rpn_cls": 0.00669, "loss_rpn_bbox": 0.01386, "loss_cls": 0.06566, "acc": 97.33442, "loss_bbox": 0.09314, "loss": 0.17935, "grad_norm": 1.43897, "time": 0.70315}
117
+ {"mode": "train", "epoch": 6, "iter": 8000, "lr": 3e-05, "memory": 10004, "data_time": 0.00315, "loss_rpn_cls": 0.00684, "loss_rpn_bbox": 0.01376, "loss_cls": 0.06401, "acc": 97.40571, "loss_bbox": 0.09196, "loss": 0.17657, "grad_norm": 1.32909, "time": 0.70396}
118
+ {"mode": "train", "epoch": 6, "iter": 8500, "lr": 3e-05, "memory": 10004, "data_time": 0.00318, "loss_rpn_cls": 0.0066, "loss_rpn_bbox": 0.01342, "loss_cls": 0.06328, "acc": 97.38291, "loss_bbox": 0.09124, "loss": 0.17454, "grad_norm": 1.38079, "time": 0.70355}
119
+ {"mode": "train", "epoch": 6, "iter": 9000, "lr": 3e-05, "memory": 10004, "data_time": 0.00317, "loss_rpn_cls": 0.00644, "loss_rpn_bbox": 0.01311, "loss_cls": 0.06469, "acc": 97.32671, "loss_bbox": 0.09195, "loss": 0.1762, "grad_norm": 1.36109, "time": 0.7034}
120
+ {"mode": "train", "epoch": 6, "iter": 9500, "lr": 3e-05, "memory": 10004, "data_time": 0.00319, "loss_rpn_cls": 0.00624, "loss_rpn_bbox": 0.0122, "loss_cls": 0.06377, "acc": 97.36157, "loss_bbox": 0.09188, "loss": 0.17409, "grad_norm": 1.31857, "time": 0.70464}
121
+ {"mode": "val", "epoch": 6, "iter": 1068, "lr": 3e-05, "mAP": 0.63884}
122
+ {"mode": "train", "epoch": 7, "iter": 500, "lr": 3e-05, "memory": 10004, "data_time": 0.01027, "loss_rpn_cls": 0.00685, "loss_rpn_bbox": 0.01373, "loss_cls": 0.06395, "acc": 97.36973, "loss_bbox": 0.09241, "loss": 0.17694, "grad_norm": Infinity, "time": 0.71474}
123
+ {"mode": "train", "epoch": 7, "iter": 1000, "lr": 3e-05, "memory": 10004, "data_time": 0.00314, "loss_rpn_cls": 0.00616, "loss_rpn_bbox": 0.0136, "loss_cls": 0.06221, "acc": 97.42837, "loss_bbox": 0.09066, "loss": 0.17262, "grad_norm": 1.3114, "time": 0.70725}
124
+ {"mode": "train", "epoch": 7, "iter": 1500, "lr": 3e-05, "memory": 10004, "data_time": 0.00316, "loss_rpn_cls": 0.00643, "loss_rpn_bbox": 0.01377, "loss_cls": 0.06172, "acc": 97.43096, "loss_bbox": 0.08918, "loss": 0.1711, "grad_norm": 1.29066, "time": 0.70709}
125
+ {"mode": "train", "epoch": 7, "iter": 2000, "lr": 3e-05, "memory": 10004, "data_time": 0.00314, "loss_rpn_cls": 0.00601, "loss_rpn_bbox": 0.01277, "loss_cls": 0.06304, "acc": 97.40586, "loss_bbox": 0.08979, "loss": 0.1716, "grad_norm": 1.35276, "time": 0.70546}
126
+ {"mode": "train", "epoch": 7, "iter": 2500, "lr": 3e-05, "memory": 10004, "data_time": 0.00306, "loss_rpn_cls": 0.00642, "loss_rpn_bbox": 0.01393, "loss_cls": 0.06648, "acc": 97.26895, "loss_bbox": 0.09322, "loss": 0.18005, "grad_norm": 1.38566, "time": 0.70778}
127
+ {"mode": "train", "epoch": 7, "iter": 3000, "lr": 3e-05, "memory": 10004, "data_time": 0.00313, "loss_rpn_cls": 0.0062, "loss_rpn_bbox": 0.01256, "loss_cls": 0.06256, "acc": 97.41543, "loss_bbox": 0.08949, "loss": 0.17082, "grad_norm": NaN, "time": 0.70611}
128
+ {"mode": "train", "epoch": 7, "iter": 3500, "lr": 3e-05, "memory": 10004, "data_time": 0.00322, "loss_rpn_cls": 0.00676, "loss_rpn_bbox": 0.01293, "loss_cls": 0.06427, "acc": 97.3709, "loss_bbox": 0.09181, "loss": 0.17578, "grad_norm": 1.3366, "time": 0.70723}
129
+ {"mode": "train", "epoch": 7, "iter": 4000, "lr": 3e-05, "memory": 10004, "data_time": 0.00337, "loss_rpn_cls": 0.0057, "loss_rpn_bbox": 0.01296, "loss_cls": 0.06401, "acc": 97.37295, "loss_bbox": 0.0906, "loss": 0.17327, "grad_norm": 1.33023, "time": 0.70982}
130
+ {"mode": "train", "epoch": 7, "iter": 4500, "lr": 3e-05, "memory": 10004, "data_time": 0.00325, "loss_rpn_cls": 0.00674, "loss_rpn_bbox": 0.01322, "loss_cls": 0.06736, "acc": 97.21436, "loss_bbox": 0.09479, "loss": 0.18211, "grad_norm": 1.33832, "time": 0.70937}
131
+ {"mode": "train", "epoch": 7, "iter": 5000, "lr": 3e-05, "memory": 10004, "data_time": 0.00333, "loss_rpn_cls": 0.00679, "loss_rpn_bbox": 0.0143, "loss_cls": 0.06497, "acc": 97.30078, "loss_bbox": 0.09545, "loss": 0.18151, "grad_norm": 1.32336, "time": 0.70671}
132
+ {"mode": "train", "epoch": 7, "iter": 5500, "lr": 3e-05, "memory": 10004, "data_time": 0.00335, "loss_rpn_cls": 0.0067, "loss_rpn_bbox": 0.01376, "loss_cls": 0.06426, "acc": 97.34722, "loss_bbox": 0.09269, "loss": 0.17742, "grad_norm": Infinity, "time": 0.70309}
133
+ {"mode": "train", "epoch": 7, "iter": 6000, "lr": 3e-05, "memory": 10004, "data_time": 0.00344, "loss_rpn_cls": 0.0065, "loss_rpn_bbox": 0.01291, "loss_cls": 0.06326, "acc": 97.42749, "loss_bbox": 0.09275, "loss": 0.17542, "grad_norm": 1.2851, "time": 0.70416}
134
+ {"mode": "train", "epoch": 7, "iter": 6500, "lr": 3e-05, "memory": 10004, "data_time": 0.00351, "loss_rpn_cls": 0.00605, "loss_rpn_bbox": 0.01399, "loss_cls": 0.06299, "acc": 97.40537, "loss_bbox": 0.09415, "loss": 0.17718, "grad_norm": 1.34968, "time": 0.70305}
135
+ {"mode": "train", "epoch": 7, "iter": 7000, "lr": 3e-05, "memory": 10004, "data_time": 0.0035, "loss_rpn_cls": 0.00615, "loss_rpn_bbox": 0.01284, "loss_cls": 0.06222, "acc": 97.44932, "loss_bbox": 0.08895, "loss": 0.17016, "grad_norm": 1.2599, "time": 0.70376}
136
+ {"mode": "train", "epoch": 7, "iter": 7500, "lr": 3e-05, "memory": 10004, "data_time": 0.00345, "loss_rpn_cls": 0.00613, "loss_rpn_bbox": 0.01281, "loss_cls": 0.064, "acc": 97.34673, "loss_bbox": 0.0924, "loss": 0.17534, "grad_norm": 1.31366, "time": 0.70371}
137
+ {"mode": "train", "epoch": 7, "iter": 8000, "lr": 3e-05, "memory": 10004, "data_time": 0.00347, "loss_rpn_cls": 0.00594, "loss_rpn_bbox": 0.01243, "loss_cls": 0.06329, "acc": 97.40229, "loss_bbox": 0.08854, "loss": 0.17021, "grad_norm": NaN, "time": 0.70413}
138
+ {"mode": "train", "epoch": 7, "iter": 8500, "lr": 3e-05, "memory": 10004, "data_time": 0.00389, "loss_rpn_cls": 0.00602, "loss_rpn_bbox": 0.01294, "loss_cls": 0.06216, "acc": 97.44419, "loss_bbox": 0.09134, "loss": 0.17245, "grad_norm": 1.32243, "time": 0.7034}
139
+ {"mode": "train", "epoch": 7, "iter": 9000, "lr": 3e-05, "memory": 10004, "data_time": 0.00373, "loss_rpn_cls": 0.00637, "loss_rpn_bbox": 0.01273, "loss_cls": 0.06169, "acc": 97.45122, "loss_bbox": 0.08917, "loss": 0.16996, "grad_norm": 1.32909, "time": 0.70342}
140
+ {"mode": "train", "epoch": 7, "iter": 9500, "lr": 3e-05, "memory": 10004, "data_time": 0.00361, "loss_rpn_cls": 0.00604, "loss_rpn_bbox": 0.01278, "loss_cls": 0.06167, "acc": 97.45146, "loss_bbox": 0.08879, "loss": 0.16928, "grad_norm": 1.24395, "time": 0.7037}
141
+ {"mode": "val", "epoch": 7, "iter": 1068, "lr": 3e-05, "mAP": 0.65362}
142
+ {"mode": "train", "epoch": 8, "iter": 500, "lr": 3e-05, "memory": 10004, "data_time": 0.01034, "loss_rpn_cls": 0.00607, "loss_rpn_bbox": 0.01297, "loss_cls": 0.06371, "acc": 97.37153, "loss_bbox": 0.0903, "loss": 0.17305, "grad_norm": NaN, "time": 0.71107}
143
+ {"mode": "train", "epoch": 8, "iter": 1000, "lr": 3e-05, "memory": 10004, "data_time": 0.00319, "loss_rpn_cls": 0.00623, "loss_rpn_bbox": 0.013, "loss_cls": 0.06318, "acc": 97.40625, "loss_bbox": 0.0913, "loss": 0.17371, "grad_norm": 1.27415, "time": 0.70521}
144
+ {"mode": "train", "epoch": 8, "iter": 1500, "lr": 3e-05, "memory": 10004, "data_time": 0.00326, "loss_rpn_cls": 0.00542, "loss_rpn_bbox": 0.01337, "loss_cls": 0.0618, "acc": 97.41802, "loss_bbox": 0.08969, "loss": 0.17029, "grad_norm": 1.25077, "time": 0.70549}
145
+ {"mode": "train", "epoch": 8, "iter": 2000, "lr": 3e-05, "memory": 10004, "data_time": 0.00317, "loss_rpn_cls": 0.00589, "loss_rpn_bbox": 0.0127, "loss_cls": 0.06165, "acc": 97.46597, "loss_bbox": 0.0908, "loss": 0.17103, "grad_norm": 1.22051, "time": 0.70561}
146
+ {"mode": "train", "epoch": 8, "iter": 2500, "lr": 3e-05, "memory": 10004, "data_time": 0.0032, "loss_rpn_cls": 0.00561, "loss_rpn_bbox": 0.01292, "loss_cls": 0.06164, "acc": 97.45371, "loss_bbox": 0.09063, "loss": 0.1708, "grad_norm": 1.30841, "time": 0.70471}
147
+ {"mode": "train", "epoch": 8, "iter": 3000, "lr": 3e-05, "memory": 10004, "data_time": 0.0033, "loss_rpn_cls": 0.0059, "loss_rpn_bbox": 0.01316, "loss_cls": 0.06146, "acc": 97.45718, "loss_bbox": 0.08864, "loss": 0.16917, "grad_norm": 1.24525, "time": 0.70526}
148
+ {"mode": "train", "epoch": 8, "iter": 3500, "lr": 3e-05, "memory": 10004, "data_time": 0.00319, "loss_rpn_cls": 0.00577, "loss_rpn_bbox": 0.01213, "loss_cls": 0.06265, "acc": 97.44863, "loss_bbox": 0.09067, "loss": 0.17121, "grad_norm": 1.23977, "time": 0.70482}
149
+ {"mode": "train", "epoch": 8, "iter": 4000, "lr": 3e-05, "memory": 10004, "data_time": 0.00325, "loss_rpn_cls": 0.00628, "loss_rpn_bbox": 0.01313, "loss_cls": 0.06282, "acc": 97.3937, "loss_bbox": 0.09135, "loss": 0.17358, "grad_norm": 1.30069, "time": 0.70511}
150
+ {"mode": "train", "epoch": 8, "iter": 4500, "lr": 3e-05, "memory": 10004, "data_time": 0.00313, "loss_rpn_cls": 0.00556, "loss_rpn_bbox": 0.01266, "loss_cls": 0.06305, "acc": 97.40029, "loss_bbox": 0.09225, "loss": 0.17353, "grad_norm": 1.24952, "time": 0.705}
151
+ {"mode": "train", "epoch": 8, "iter": 5000, "lr": 3e-05, "memory": 10004, "data_time": 0.00318, "loss_rpn_cls": 0.0054, "loss_rpn_bbox": 0.01244, "loss_cls": 0.06059, "acc": 97.48159, "loss_bbox": 0.08863, "loss": 0.16706, "grad_norm": NaN, "time": 0.70445}
152
+ {"mode": "train", "epoch": 8, "iter": 5500, "lr": 3e-05, "memory": 10004, "data_time": 0.00312, "loss_rpn_cls": 0.00524, "loss_rpn_bbox": 0.01271, "loss_cls": 0.06255, "acc": 97.41436, "loss_bbox": 0.08984, "loss": 0.17035, "grad_norm": 1.21673, "time": 0.70431}
153
+ {"mode": "train", "epoch": 8, "iter": 6000, "lr": 3e-05, "memory": 10004, "data_time": 0.00317, "loss_rpn_cls": 0.0061, "loss_rpn_bbox": 0.0142, "loss_cls": 0.06329, "acc": 97.40269, "loss_bbox": 0.09341, "loss": 0.177, "grad_norm": 1.27723, "time": 0.70393}
154
+ {"mode": "train", "epoch": 8, "iter": 6500, "lr": 3e-05, "memory": 10004, "data_time": 0.00312, "loss_rpn_cls": 0.0055, "loss_rpn_bbox": 0.01249, "loss_cls": 0.06222, "acc": 97.40522, "loss_bbox": 0.09164, "loss": 0.17185, "grad_norm": 1.22069, "time": 0.70418}
155
+ {"mode": "train", "epoch": 8, "iter": 7000, "lr": 3e-05, "memory": 10004, "data_time": 0.00319, "loss_rpn_cls": 0.00561, "loss_rpn_bbox": 0.01308, "loss_cls": 0.06063, "acc": 97.50688, "loss_bbox": 0.09079, "loss": 0.17011, "grad_norm": NaN, "time": 0.70416}
156
+ {"mode": "train", "epoch": 8, "iter": 7500, "lr": 3e-05, "memory": 10004, "data_time": 0.00312, "loss_rpn_cls": 0.00559, "loss_rpn_bbox": 0.01216, "loss_cls": 0.06069, "acc": 97.48203, "loss_bbox": 0.0894, "loss": 0.16784, "grad_norm": 1.22819, "time": 0.70464}
157
+ {"mode": "train", "epoch": 8, "iter": 8000, "lr": 3e-05, "memory": 10004, "data_time": 0.00309, "loss_rpn_cls": 0.00609, "loss_rpn_bbox": 0.01369, "loss_cls": 0.06292, "acc": 97.39512, "loss_bbox": 0.09196, "loss": 0.17466, "grad_norm": 1.28696, "time": 0.7039}
158
+ {"mode": "train", "epoch": 8, "iter": 8500, "lr": 3e-05, "memory": 10004, "data_time": 0.00312, "loss_rpn_cls": 0.00633, "loss_rpn_bbox": 0.01368, "loss_cls": 0.06267, "acc": 97.43198, "loss_bbox": 0.0905, "loss": 0.17319, "grad_norm": 1.22907, "time": 0.70491}
159
+ {"mode": "train", "epoch": 8, "iter": 9000, "lr": 3e-05, "memory": 10004, "data_time": 0.00314, "loss_rpn_cls": 0.00643, "loss_rpn_bbox": 0.01311, "loss_cls": 0.06295, "acc": 97.40654, "loss_bbox": 0.09002, "loss": 0.17251, "grad_norm": 1.24505, "time": 0.70456}
160
+ {"mode": "train", "epoch": 8, "iter": 9500, "lr": 3e-05, "memory": 10004, "data_time": 0.00316, "loss_rpn_cls": 0.00613, "loss_rpn_bbox": 0.01365, "loss_cls": 0.0627, "acc": 97.40762, "loss_bbox": 0.08999, "loss": 0.17247, "grad_norm": 1.26403, "time": 0.70388}
161
+ {"mode": "val", "epoch": 8, "iter": 1068, "lr": 3e-05, "mAP": 0.68028}
162
+ {"mode": "train", "epoch": 9, "iter": 500, "lr": 0.0, "memory": 10004, "data_time": 0.0096, "loss_rpn_cls": 0.00531, "loss_rpn_bbox": 0.01194, "loss_cls": 0.05948, "acc": 97.52534, "loss_bbox": 0.0875, "loss": 0.16423, "grad_norm": 1.13119, "time": 0.71111}
163
+ {"mode": "train", "epoch": 9, "iter": 1000, "lr": 0.0, "memory": 10004, "data_time": 0.00315, "loss_rpn_cls": 0.00485, "loss_rpn_bbox": 0.01133, "loss_cls": 0.0576, "acc": 97.61523, "loss_bbox": 0.08432, "loss": 0.1581, "grad_norm": NaN, "time": 0.70328}
164
+ {"mode": "train", "epoch": 9, "iter": 1500, "lr": 0.0, "memory": 10004, "data_time": 0.00318, "loss_rpn_cls": 0.00479, "loss_rpn_bbox": 0.01147, "loss_cls": 0.05719, "acc": 97.63667, "loss_bbox": 0.08564, "loss": 0.15909, "grad_norm": 1.10588, "time": 0.70418}
165
+ {"mode": "train", "epoch": 9, "iter": 2000, "lr": 0.0, "memory": 10004, "data_time": 0.00328, "loss_rpn_cls": 0.00445, "loss_rpn_bbox": 0.01102, "loss_cls": 0.05777, "acc": 97.60742, "loss_bbox": 0.08281, "loss": 0.15605, "grad_norm": 1.10485, "time": 0.70377}
166
+ {"mode": "train", "epoch": 9, "iter": 2500, "lr": 0.0, "memory": 10004, "data_time": 0.00323, "loss_rpn_cls": 0.004, "loss_rpn_bbox": 0.0104, "loss_cls": 0.05642, "acc": 97.65225, "loss_bbox": 0.08345, "loss": 0.15427, "grad_norm": 1.08479, "time": 0.70413}
167
+ {"mode": "train", "epoch": 9, "iter": 3000, "lr": 0.0, "memory": 10009, "data_time": 0.00321, "loss_rpn_cls": 0.0043, "loss_rpn_bbox": 0.01177, "loss_cls": 0.05633, "acc": 97.63774, "loss_bbox": 0.08429, "loss": 0.15669, "grad_norm": 1.0901, "time": 0.70429}
168
+ {"mode": "train", "epoch": 9, "iter": 3500, "lr": 0.0, "memory": 10009, "data_time": 0.00336, "loss_rpn_cls": 0.00444, "loss_rpn_bbox": 0.01165, "loss_cls": 0.05807, "acc": 97.59429, "loss_bbox": 0.08766, "loss": 0.16183, "grad_norm": 1.09523, "time": 0.70379}
169
+ {"mode": "train", "epoch": 9, "iter": 4000, "lr": 0.0, "memory": 10009, "data_time": 0.00323, "loss_rpn_cls": 0.00416, "loss_rpn_bbox": 0.01136, "loss_cls": 0.05576, "acc": 97.6645, "loss_bbox": 0.08266, "loss": 0.15395, "grad_norm": 1.08236, "time": 0.70433}
170
+ {"mode": "train", "epoch": 9, "iter": 4500, "lr": 0.0, "memory": 10009, "data_time": 0.00327, "loss_rpn_cls": 0.00403, "loss_rpn_bbox": 0.01108, "loss_cls": 0.05465, "acc": 97.73335, "loss_bbox": 0.0813, "loss": 0.15106, "grad_norm": 1.09286, "time": 0.70361}
171
+ {"mode": "train", "epoch": 9, "iter": 5000, "lr": 0.0, "memory": 10009, "data_time": 0.00312, "loss_rpn_cls": 0.00432, "loss_rpn_bbox": 0.01168, "loss_cls": 0.05585, "acc": 97.68145, "loss_bbox": 0.08638, "loss": 0.15823, "grad_norm": 1.08831, "time": 0.70429}
172
+ {"mode": "train", "epoch": 9, "iter": 5500, "lr": 0.0, "memory": 10009, "data_time": 0.00318, "loss_rpn_cls": 0.00476, "loss_rpn_bbox": 0.01131, "loss_cls": 0.05941, "acc": 97.49692, "loss_bbox": 0.08783, "loss": 0.16332, "grad_norm": NaN, "time": 0.7037}
173
+ {"mode": "train", "epoch": 9, "iter": 6000, "lr": 0.0, "memory": 10009, "data_time": 0.00317, "loss_rpn_cls": 0.00435, "loss_rpn_bbox": 0.01112, "loss_cls": 0.05707, "acc": 97.60728, "loss_bbox": 0.08462, "loss": 0.15716, "grad_norm": 1.09507, "time": 0.70447}
174
+ {"mode": "train", "epoch": 9, "iter": 6500, "lr": 0.0, "memory": 10009, "data_time": 0.0032, "loss_rpn_cls": 0.00469, "loss_rpn_bbox": 0.01169, "loss_cls": 0.05773, "acc": 97.60688, "loss_bbox": 0.08611, "loss": 0.16022, "grad_norm": 1.09424, "time": 0.70396}
175
+ {"mode": "train", "epoch": 9, "iter": 7000, "lr": 0.0, "memory": 10009, "data_time": 0.00312, "loss_rpn_cls": 0.00392, "loss_rpn_bbox": 0.01088, "loss_cls": 0.05582, "acc": 97.67026, "loss_bbox": 0.08469, "loss": 0.1553, "grad_norm": 1.08446, "time": 0.70505}
176
+ {"mode": "train", "epoch": 9, "iter": 7500, "lr": 0.0, "memory": 10009, "data_time": 0.00306, "loss_rpn_cls": 0.00398, "loss_rpn_bbox": 0.01162, "loss_cls": 0.05699, "acc": 97.62109, "loss_bbox": 0.08521, "loss": 0.15779, "grad_norm": NaN, "time": 0.7043}
177
+ {"mode": "train", "epoch": 9, "iter": 8000, "lr": 0.0, "memory": 10009, "data_time": 0.00314, "loss_rpn_cls": 0.00398, "loss_rpn_bbox": 0.01094, "loss_cls": 0.0542, "acc": 97.7332, "loss_bbox": 0.08182, "loss": 0.15094, "grad_norm": 1.06138, "time": 0.70418}
178
+ {"mode": "train", "epoch": 9, "iter": 8500, "lr": 0.0, "memory": 10009, "data_time": 0.00307, "loss_rpn_cls": 0.00364, "loss_rpn_bbox": 0.0109, "loss_cls": 0.05573, "acc": 97.67026, "loss_bbox": 0.08424, "loss": 0.15451, "grad_norm": 1.08848, "time": 0.70459}
179
+ {"mode": "train", "epoch": 9, "iter": 9000, "lr": 0.0, "memory": 10009, "data_time": 0.00303, "loss_rpn_cls": 0.00388, "loss_rpn_bbox": 0.01061, "loss_cls": 0.05564, "acc": 97.67539, "loss_bbox": 0.08523, "loss": 0.15535, "grad_norm": 1.07302, "time": 0.70385}
180
+ {"mode": "train", "epoch": 9, "iter": 9500, "lr": 0.0, "memory": 10009, "data_time": 0.00317, "loss_rpn_cls": 0.00389, "loss_rpn_bbox": 0.01115, "loss_cls": 0.05628, "acc": 97.64321, "loss_bbox": 0.08461, "loss": 0.15593, "grad_norm": NaN, "time": 0.70431}
181
+ {"mode": "val", "epoch": 9, "iter": 1068, "lr": 0.0, "mAP": 0.70837}
182
+ {"mode": "train", "epoch": 10, "iter": 500, "lr": 0.0, "memory": 10009, "data_time": 0.01063, "loss_rpn_cls": 0.00395, "loss_rpn_bbox": 0.01082, "loss_cls": 0.05517, "acc": 97.72305, "loss_bbox": 0.08164, "loss": 0.15158, "grad_norm": NaN, "time": 0.71268}
183
+ {"mode": "train", "epoch": 10, "iter": 1000, "lr": 0.0, "memory": 10009, "data_time": 0.00316, "loss_rpn_cls": 0.00342, "loss_rpn_bbox": 0.0106, "loss_cls": 0.05366, "acc": 97.74199, "loss_bbox": 0.08213, "loss": 0.1498, "grad_norm": 1.07341, "time": 0.70521}
184
+ {"mode": "train", "epoch": 10, "iter": 1500, "lr": 0.0, "memory": 10009, "data_time": 0.00305, "loss_rpn_cls": 0.00408, "loss_rpn_bbox": 0.01086, "loss_cls": 0.05396, "acc": 97.72798, "loss_bbox": 0.08246, "loss": 0.15137, "grad_norm": 1.08158, "time": 0.70519}
185
+ {"mode": "train", "epoch": 10, "iter": 2000, "lr": 0.0, "memory": 10009, "data_time": 0.00314, "loss_rpn_cls": 0.00389, "loss_rpn_bbox": 0.01112, "loss_cls": 0.05468, "acc": 97.70991, "loss_bbox": 0.08288, "loss": 0.15256, "grad_norm": 1.09361, "time": 0.70513}
186
+ {"mode": "train", "epoch": 10, "iter": 2500, "lr": 0.0, "memory": 10009, "data_time": 0.00309, "loss_rpn_cls": 0.00388, "loss_rpn_bbox": 0.01121, "loss_cls": 0.05566, "acc": 97.68081, "loss_bbox": 0.08443, "loss": 0.15519, "grad_norm": 1.08328, "time": 0.70546}
187
+ {"mode": "train", "epoch": 10, "iter": 3000, "lr": 0.0, "memory": 10009, "data_time": 0.0031, "loss_rpn_cls": 0.004, "loss_rpn_bbox": 0.01074, "loss_cls": 0.05571, "acc": 97.67822, "loss_bbox": 0.08369, "loss": 0.15414, "grad_norm": 1.10646, "time": 0.70534}
188
+ {"mode": "train", "epoch": 10, "iter": 3500, "lr": 0.0, "memory": 10009, "data_time": 0.00336, "loss_rpn_cls": 0.00415, "loss_rpn_bbox": 0.01122, "loss_cls": 0.05643, "acc": 97.65176, "loss_bbox": 0.0838, "loss": 0.1556, "grad_norm": 1.09534, "time": 0.70596}
189
+ {"mode": "train", "epoch": 10, "iter": 4000, "lr": 0.0, "memory": 10009, "data_time": 0.0029, "loss_rpn_cls": 0.00371, "loss_rpn_bbox": 0.01119, "loss_cls": 0.05531, "acc": 97.68486, "loss_bbox": 0.08163, "loss": 0.15184, "grad_norm": 1.10048, "time": 0.70512}
190
+ {"mode": "train", "epoch": 10, "iter": 4500, "lr": 0.0, "memory": 10009, "data_time": 0.00289, "loss_rpn_cls": 0.00382, "loss_rpn_bbox": 0.01138, "loss_cls": 0.05524, "acc": 97.67988, "loss_bbox": 0.08332, "loss": 0.15375, "grad_norm": NaN, "time": 0.70496}
191
+ {"mode": "train", "epoch": 10, "iter": 5000, "lr": 0.0, "memory": 10009, "data_time": 0.00288, "loss_rpn_cls": 0.00357, "loss_rpn_bbox": 0.01085, "loss_cls": 0.05408, "acc": 97.7229, "loss_bbox": 0.08338, "loss": 0.15188, "grad_norm": 1.10658, "time": 0.70486}
192
+ {"mode": "train", "epoch": 10, "iter": 5500, "lr": 0.0, "memory": 10009, "data_time": 0.00301, "loss_rpn_cls": 0.00427, "loss_rpn_bbox": 0.0111, "loss_cls": 0.05591, "acc": 97.66392, "loss_bbox": 0.08524, "loss": 0.15652, "grad_norm": 1.10742, "time": 0.70545}
193
+ {"mode": "train", "epoch": 10, "iter": 6000, "lr": 0.0, "memory": 10009, "data_time": 0.00293, "loss_rpn_cls": 0.00393, "loss_rpn_bbox": 0.01117, "loss_cls": 0.05514, "acc": 97.68887, "loss_bbox": 0.08468, "loss": 0.15491, "grad_norm": 1.11964, "time": 0.7053}
194
+ {"mode": "train", "epoch": 10, "iter": 6500, "lr": 0.0, "memory": 10009, "data_time": 0.00299, "loss_rpn_cls": 0.00376, "loss_rpn_bbox": 0.01106, "loss_cls": 0.05526, "acc": 97.66675, "loss_bbox": 0.08385, "loss": 0.15394, "grad_norm": NaN, "time": 0.70445}
195
+ {"mode": "train", "epoch": 10, "iter": 7000, "lr": 0.0, "memory": 10009, "data_time": 0.00306, "loss_rpn_cls": 0.00336, "loss_rpn_bbox": 0.01008, "loss_cls": 0.05288, "acc": 97.79194, "loss_bbox": 0.08038, "loss": 0.1467, "grad_norm": 1.06805, "time": 0.70536}
196
+ {"mode": "train", "epoch": 10, "iter": 7500, "lr": 0.0, "memory": 10009, "data_time": 0.00301, "loss_rpn_cls": 0.00388, "loss_rpn_bbox": 0.01073, "loss_cls": 0.05486, "acc": 97.67368, "loss_bbox": 0.08336, "loss": 0.15283, "grad_norm": 1.10242, "time": 0.70423}
197
+ {"mode": "train", "epoch": 10, "iter": 8000, "lr": 0.0, "memory": 10009, "data_time": 0.00298, "loss_rpn_cls": 0.00389, "loss_rpn_bbox": 0.01098, "loss_cls": 0.05431, "acc": 97.74551, "loss_bbox": 0.08222, "loss": 0.15141, "grad_norm": 1.10702, "time": 0.70494}
198
+ {"mode": "train", "epoch": 10, "iter": 8500, "lr": 0.0, "memory": 10009, "data_time": 0.00306, "loss_rpn_cls": 0.00372, "loss_rpn_bbox": 0.01069, "loss_cls": 0.0551, "acc": 97.67173, "loss_bbox": 0.08371, "loss": 0.15322, "grad_norm": Infinity, "time": 0.70463}
199
+ {"mode": "train", "epoch": 10, "iter": 9000, "lr": 0.0, "memory": 10009, "data_time": 0.00307, "loss_rpn_cls": 0.00388, "loss_rpn_bbox": 0.00996, "loss_cls": 0.05326, "acc": 97.77632, "loss_bbox": 0.08341, "loss": 0.15051, "grad_norm": NaN, "time": 0.70506}
200
+ {"mode": "train", "epoch": 10, "iter": 9500, "lr": 0.0, "memory": 10009, "data_time": 0.00305, "loss_rpn_cls": 0.00339, "loss_rpn_bbox": 0.01055, "loss_cls": 0.05413, "acc": 97.7187, "loss_bbox": 0.08359, "loss": 0.15166, "grad_norm": 1.09676, "time": 0.70465}
201
+ {"mode": "val", "epoch": 10, "iter": 1068, "lr": 0.0, "mAP": 0.71806}
202
+ {"mode": "train", "epoch": 11, "iter": 500, "lr": 0.0, "memory": 10009, "data_time": 0.00993, "loss_rpn_cls": 0.00358, "loss_rpn_bbox": 0.01072, "loss_cls": 0.05342, "acc": 97.76577, "loss_bbox": 0.08256, "loss": 0.15029, "grad_norm": 1.08447, "time": 0.71147}
203
+ {"mode": "train", "epoch": 11, "iter": 1000, "lr": 0.0, "memory": 10009, "data_time": 0.00317, "loss_rpn_cls": 0.00364, "loss_rpn_bbox": 0.01064, "loss_cls": 0.05456, "acc": 97.71826, "loss_bbox": 0.08111, "loss": 0.14995, "grad_norm": 1.09291, "time": 0.70473}
204
+ {"mode": "train", "epoch": 11, "iter": 1500, "lr": 0.0, "memory": 10009, "data_time": 0.00302, "loss_rpn_cls": 0.00372, "loss_rpn_bbox": 0.01046, "loss_cls": 0.05177, "acc": 97.85234, "loss_bbox": 0.07964, "loss": 0.14559, "grad_norm": 1.06566, "time": 0.70538}
205
+ {"mode": "train", "epoch": 11, "iter": 2000, "lr": 0.0, "memory": 10009, "data_time": 0.0031, "loss_rpn_cls": 0.00351, "loss_rpn_bbox": 0.01054, "loss_cls": 0.05183, "acc": 97.8043, "loss_bbox": 0.07928, "loss": 0.14516, "grad_norm": 1.07884, "time": 0.70506}
206
+ {"mode": "train", "epoch": 11, "iter": 2500, "lr": 0.0, "memory": 10009, "data_time": 0.00306, "loss_rpn_cls": 0.00432, "loss_rpn_bbox": 0.01178, "loss_cls": 0.05501, "acc": 97.68223, "loss_bbox": 0.08295, "loss": 0.15406, "grad_norm": Infinity, "time": 0.7046}
207
+ {"mode": "train", "epoch": 11, "iter": 3000, "lr": 0.0, "memory": 10009, "data_time": 0.00302, "loss_rpn_cls": 0.00349, "loss_rpn_bbox": 0.0108, "loss_cls": 0.05439, "acc": 97.71929, "loss_bbox": 0.08185, "loss": 0.15053, "grad_norm": 1.11171, "time": 0.70499}
208
+ {"mode": "train", "epoch": 11, "iter": 3500, "lr": 0.0, "memory": 10009, "data_time": 0.00308, "loss_rpn_cls": 0.0038, "loss_rpn_bbox": 0.01078, "loss_cls": 0.05348, "acc": 97.76387, "loss_bbox": 0.08163, "loss": 0.14969, "grad_norm": 1.10862, "time": 0.70411}
209
+ {"mode": "train", "epoch": 11, "iter": 4000, "lr": 0.0, "memory": 10009, "data_time": 0.00311, "loss_rpn_cls": 0.00364, "loss_rpn_bbox": 0.01079, "loss_cls": 0.05264, "acc": 97.80186, "loss_bbox": 0.08219, "loss": 0.14924, "grad_norm": 1.12278, "time": 0.70511}
210
+ {"mode": "train", "epoch": 11, "iter": 4500, "lr": 0.0, "memory": 10009, "data_time": 0.0031, "loss_rpn_cls": 0.00357, "loss_rpn_bbox": 0.01058, "loss_cls": 0.05312, "acc": 97.77192, "loss_bbox": 0.082, "loss": 0.14927, "grad_norm": 1.10774, "time": 0.70475}
211
+ {"mode": "train", "epoch": 11, "iter": 5000, "lr": 0.0, "memory": 10009, "data_time": 0.00299, "loss_rpn_cls": 0.00374, "loss_rpn_bbox": 0.01074, "loss_cls": 0.05332, "acc": 97.76885, "loss_bbox": 0.0818, "loss": 0.14959, "grad_norm": 1.14153, "time": 0.705}
212
+ {"mode": "train", "epoch": 11, "iter": 5500, "lr": 0.0, "memory": 10009, "data_time": 0.00308, "loss_rpn_cls": 0.004, "loss_rpn_bbox": 0.01122, "loss_cls": 0.05388, "acc": 97.73979, "loss_bbox": 0.08105, "loss": 0.15015, "grad_norm": 1.09937, "time": 0.70434}
213
+ {"mode": "train", "epoch": 11, "iter": 6000, "lr": 0.0, "memory": 10009, "data_time": 0.00309, "loss_rpn_cls": 0.00319, "loss_rpn_bbox": 0.01023, "loss_cls": 0.05108, "acc": 97.86206, "loss_bbox": 0.07845, "loss": 0.14295, "grad_norm": 1.07846, "time": 0.70489}
214
+ {"mode": "train", "epoch": 11, "iter": 6500, "lr": 0.0, "memory": 10009, "data_time": 0.00306, "loss_rpn_cls": 0.00338, "loss_rpn_bbox": 0.01059, "loss_cls": 0.05251, "acc": 97.77788, "loss_bbox": 0.08275, "loss": 0.14923, "grad_norm": NaN, "time": 0.70467}
215
+ {"mode": "train", "epoch": 11, "iter": 7000, "lr": 0.0, "memory": 10009, "data_time": 0.00306, "loss_rpn_cls": 0.00346, "loss_rpn_bbox": 0.01052, "loss_cls": 0.05154, "acc": 97.83223, "loss_bbox": 0.07954, "loss": 0.14507, "grad_norm": 1.09093, "time": 0.70452}
216
+ {"mode": "train", "epoch": 11, "iter": 7500, "lr": 0.0, "memory": 10009, "data_time": 0.00302, "loss_rpn_cls": 0.00337, "loss_rpn_bbox": 0.01088, "loss_cls": 0.05198, "acc": 97.81436, "loss_bbox": 0.0806, "loss": 0.14683, "grad_norm": NaN, "time": 0.70503}
217
+ {"mode": "train", "epoch": 11, "iter": 8000, "lr": 0.0, "memory": 10009, "data_time": 0.00295, "loss_rpn_cls": 0.00312, "loss_rpn_bbox": 0.01076, "loss_cls": 0.05211, "acc": 97.78564, "loss_bbox": 0.08148, "loss": 0.14747, "grad_norm": 1.10249, "time": 0.70475}
218
+ {"mode": "train", "epoch": 11, "iter": 8500, "lr": 0.0, "memory": 10009, "data_time": 0.00302, "loss_rpn_cls": 0.0035, "loss_rpn_bbox": 0.01046, "loss_cls": 0.05512, "acc": 97.67129, "loss_bbox": 0.08317, "loss": 0.15225, "grad_norm": 1.15187, "time": 0.7053}
219
+ {"mode": "train", "epoch": 11, "iter": 9000, "lr": 0.0, "memory": 10009, "data_time": 0.00306, "loss_rpn_cls": 0.00366, "loss_rpn_bbox": 0.01094, "loss_cls": 0.05509, "acc": 97.69233, "loss_bbox": 0.08481, "loss": 0.15451, "grad_norm": 1.12321, "time": 0.70453}
220
+ {"mode": "train", "epoch": 11, "iter": 9500, "lr": 0.0, "memory": 10009, "data_time": 0.00305, "loss_rpn_cls": 0.00345, "loss_rpn_bbox": 0.01031, "loss_cls": 0.05243, "acc": 97.80938, "loss_bbox": 0.08172, "loss": 0.1479, "grad_norm": 1.11, "time": 0.70508}
221
+ {"mode": "val", "epoch": 11, "iter": 1068, "lr": 0.0, "mAP": 0.72434}
222
+ {"mode": "train", "epoch": 12, "iter": 500, "lr": 0.0, "memory": 10009, "data_time": 0.01007, "loss_rpn_cls": 0.00302, "loss_rpn_bbox": 0.01048, "loss_cls": 0.05212, "acc": 97.79941, "loss_bbox": 0.08335, "loss": 0.14897, "grad_norm": 1.12888, "time": 0.71227}
223
+ {"mode": "train", "epoch": 12, "iter": 1000, "lr": 0.0, "memory": 10009, "data_time": 0.0031, "loss_rpn_cls": 0.00344, "loss_rpn_bbox": 0.01089, "loss_cls": 0.0534, "acc": 97.74932, "loss_bbox": 0.08156, "loss": 0.1493, "grad_norm": 1.12052, "time": 0.70505}
224
+ {"mode": "train", "epoch": 12, "iter": 1500, "lr": 0.0, "memory": 10009, "data_time": 0.00305, "loss_rpn_cls": 0.00351, "loss_rpn_bbox": 0.01095, "loss_cls": 0.05265, "acc": 97.81016, "loss_bbox": 0.08192, "loss": 0.14903, "grad_norm": 1.12187, "time": 0.70543}
225
+ {"mode": "train", "epoch": 12, "iter": 2000, "lr": 0.0, "memory": 10009, "data_time": 0.00305, "loss_rpn_cls": 0.00358, "loss_rpn_bbox": 0.01068, "loss_cls": 0.05105, "acc": 97.85352, "loss_bbox": 0.08012, "loss": 0.14543, "grad_norm": 1.0949, "time": 0.70484}
226
+ {"mode": "train", "epoch": 12, "iter": 2500, "lr": 0.0, "memory": 10009, "data_time": 0.00309, "loss_rpn_cls": 0.00355, "loss_rpn_bbox": 0.01052, "loss_cls": 0.05276, "acc": 97.77173, "loss_bbox": 0.08316, "loss": 0.14999, "grad_norm": 1.11161, "time": 0.70482}
227
+ {"mode": "train", "epoch": 12, "iter": 3000, "lr": 0.0, "memory": 10009, "data_time": 0.00305, "loss_rpn_cls": 0.00347, "loss_rpn_bbox": 0.01055, "loss_cls": 0.05195, "acc": 97.81763, "loss_bbox": 0.08121, "loss": 0.14717, "grad_norm": 1.11311, "time": 0.70448}
228
+ {"mode": "train", "epoch": 12, "iter": 3500, "lr": 0.0, "memory": 10009, "data_time": 0.00304, "loss_rpn_cls": 0.00338, "loss_rpn_bbox": 0.01077, "loss_cls": 0.05245, "acc": 97.80835, "loss_bbox": 0.08213, "loss": 0.14873, "grad_norm": 1.10862, "time": 0.70473}
229
+ {"mode": "train", "epoch": 12, "iter": 4000, "lr": 0.0, "memory": 10009, "data_time": 0.00301, "loss_rpn_cls": 0.00279, "loss_rpn_bbox": 0.00975, "loss_cls": 0.04994, "acc": 97.91138, "loss_bbox": 0.07697, "loss": 0.13945, "grad_norm": 1.07978, "time": 0.70436}
230
+ {"mode": "train", "epoch": 12, "iter": 4500, "lr": 0.0, "memory": 10009, "data_time": 0.00302, "loss_rpn_cls": 0.0033, "loss_rpn_bbox": 0.00993, "loss_cls": 0.05284, "acc": 97.77275, "loss_bbox": 0.08139, "loss": 0.14746, "grad_norm": NaN, "time": 0.70436}
231
+ {"mode": "train", "epoch": 12, "iter": 5000, "lr": 0.0, "memory": 10009, "data_time": 0.00305, "loss_rpn_cls": 0.00336, "loss_rpn_bbox": 0.01, "loss_cls": 0.05166, "acc": 97.83843, "loss_bbox": 0.07827, "loss": 0.14329, "grad_norm": 1.08559, "time": 0.70442}
232
+ {"mode": "train", "epoch": 12, "iter": 5500, "lr": 0.0, "memory": 10009, "data_time": 0.00307, "loss_rpn_cls": 0.00333, "loss_rpn_bbox": 0.01073, "loss_cls": 0.05257, "acc": 97.78047, "loss_bbox": 0.08102, "loss": 0.14765, "grad_norm": 1.11994, "time": 0.70424}
233
+ {"mode": "train", "epoch": 12, "iter": 6000, "lr": 0.0, "memory": 10009, "data_time": 0.0031, "loss_rpn_cls": 0.00359, "loss_rpn_bbox": 0.01083, "loss_cls": 0.05184, "acc": 97.83369, "loss_bbox": 0.08052, "loss": 0.14678, "grad_norm": 1.11324, "time": 0.70492}
234
+ {"mode": "train", "epoch": 12, "iter": 6500, "lr": 0.0, "memory": 10009, "data_time": 0.00305, "loss_rpn_cls": 0.00362, "loss_rpn_bbox": 0.01003, "loss_cls": 0.05318, "acc": 97.76509, "loss_bbox": 0.08026, "loss": 0.14709, "grad_norm": NaN, "time": 0.70432}
235
+ {"mode": "train", "epoch": 12, "iter": 7000, "lr": 0.0, "memory": 10009, "data_time": 0.00307, "loss_rpn_cls": 0.00322, "loss_rpn_bbox": 0.01013, "loss_cls": 0.04982, "acc": 97.90566, "loss_bbox": 0.07814, "loss": 0.14131, "grad_norm": 1.07412, "time": 0.70462}
236
+ {"mode": "train", "epoch": 12, "iter": 7500, "lr": 0.0, "memory": 10009, "data_time": 0.0031, "loss_rpn_cls": 0.00361, "loss_rpn_bbox": 0.0098, "loss_cls": 0.05257, "acc": 97.78535, "loss_bbox": 0.07989, "loss": 0.14587, "grad_norm": 1.0987, "time": 0.70449}
237
+ {"mode": "train", "epoch": 12, "iter": 8000, "lr": 0.0, "memory": 10009, "data_time": 0.00306, "loss_rpn_cls": 0.00342, "loss_rpn_bbox": 0.01055, "loss_cls": 0.05234, "acc": 97.80322, "loss_bbox": 0.07813, "loss": 0.14443, "grad_norm": 1.10172, "time": 0.70486}
238
+ {"mode": "train", "epoch": 12, "iter": 8500, "lr": 0.0, "memory": 10009, "data_time": 0.00307, "loss_rpn_cls": 0.00337, "loss_rpn_bbox": 0.00982, "loss_cls": 0.05132, "acc": 97.85327, "loss_bbox": 0.07973, "loss": 0.14424, "grad_norm": 1.09227, "time": 0.70446}
239
+ {"mode": "train", "epoch": 12, "iter": 9000, "lr": 0.0, "memory": 10009, "data_time": 0.00303, "loss_rpn_cls": 0.00345, "loss_rpn_bbox": 0.01016, "loss_cls": 0.05223, "acc": 97.80249, "loss_bbox": 0.08096, "loss": 0.14681, "grad_norm": NaN, "time": 0.7044}
240
+ {"mode": "train", "epoch": 12, "iter": 9500, "lr": 0.0, "memory": 10009, "data_time": 0.00305, "loss_rpn_cls": 0.00329, "loss_rpn_bbox": 0.01069, "loss_cls": 0.0527, "acc": 97.78486, "loss_bbox": 0.08108, "loss": 0.14776, "grad_norm": 1.09702, "time": 0.70477}
241
+ {"mode": "val", "epoch": 12, "iter": 1068, "lr": 0.0, "mAP": 0.72313}
ckpts/vitp_rsar_orcnn_7231/epoch_12.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54800c340acf4ef30dc51ef73df16fb9e39808d907a774aaedc596d657a50168
3
+ size 1366683565
ckpts/vitp_rsar_orcnn_7231/vitp_rsar_orcnn.py ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dataset_type = 'RSARDataset'
2
+ data_root = '/liyuxuan/DATA/RSAR/'
3
+ angle_version = 'le90'
4
+ img_norm_cfg = dict(
5
+ mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
6
+ train_pipeline = [
7
+ dict(type='LoadImageFromFile'),
8
+ dict(type='LoadAnnotations', with_bbox=True),
9
+ dict(type='RResize', img_scale=(800, 800), keep_ratio=False),
10
+ dict(
11
+ type='RRandomFlip',
12
+ flip_ratio=[0.25, 0.25, 0.25],
13
+ direction=['horizontal', 'vertical', 'diagonal'],
14
+ version='le90'),
15
+ dict(
16
+ type='PolyRandomRotate',
17
+ rotate_ratio=0.5,
18
+ angles_range=180,
19
+ auto_bound=False,
20
+ rect_classes=[3],
21
+ version='le90'),
22
+ dict(
23
+ type='Normalize',
24
+ mean=[123.675, 116.28, 103.53],
25
+ std=[58.395, 57.12, 57.375],
26
+ to_rgb=True),
27
+ dict(type='Pad', size_divisor=32),
28
+ dict(type='DefaultFormatBundle'),
29
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
30
+ ]
31
+ test_pipeline = [
32
+ dict(type='LoadImageFromFile'),
33
+ dict(
34
+ type='MultiScaleFlipAug',
35
+ img_scale=(800, 800),
36
+ flip=False,
37
+ transforms=[
38
+ dict(type='RResize', img_scale=(800, 800), keep_ratio=False),
39
+ dict(
40
+ type='Normalize',
41
+ mean=[123.675, 116.28, 103.53],
42
+ std=[58.395, 57.12, 57.375],
43
+ to_rgb=True),
44
+ dict(type='Pad', size_divisor=32),
45
+ dict(type='DefaultFormatBundle'),
46
+ dict(type='Collect', keys=['img'])
47
+ ])
48
+ ]
49
+ data = dict(
50
+ samples_per_gpu=1,
51
+ workers_per_gpu=4,
52
+ train=dict(
53
+ type='RSARDataset',
54
+ ann_file='/liyuxuan/DATA/RSAR/train/annfiles/',
55
+ img_prefix='/liyuxuan/DATA/RSAR/train/images/',
56
+ pipeline=[
57
+ dict(type='LoadImageFromFile'),
58
+ dict(type='LoadAnnotations', with_bbox=True),
59
+ dict(type='RResize', img_scale=(800, 800), keep_ratio=False),
60
+ dict(
61
+ type='RRandomFlip',
62
+ flip_ratio=[0.25, 0.25, 0.25],
63
+ direction=['horizontal', 'vertical', 'diagonal'],
64
+ version='le90'),
65
+ dict(
66
+ type='PolyRandomRotate',
67
+ rotate_ratio=0.5,
68
+ angles_range=180,
69
+ auto_bound=False,
70
+ rect_classes=[3],
71
+ version='le90'),
72
+ dict(
73
+ type='Normalize',
74
+ mean=[123.675, 116.28, 103.53],
75
+ std=[58.395, 57.12, 57.375],
76
+ to_rgb=True),
77
+ dict(type='Pad', size_divisor=32),
78
+ dict(type='DefaultFormatBundle'),
79
+ dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
80
+ ],
81
+ version='le90'),
82
+ val=dict(
83
+ type='RSARDataset',
84
+ ann_file='/liyuxuan/DATA/RSAR/test/annfiles/',
85
+ img_prefix='/liyuxuan/DATA/RSAR/test/images/',
86
+ pipeline=[
87
+ dict(type='LoadImageFromFile'),
88
+ dict(
89
+ type='MultiScaleFlipAug',
90
+ img_scale=(800, 800),
91
+ flip=False,
92
+ transforms=[
93
+ dict(
94
+ type='RResize', img_scale=(800, 800),
95
+ keep_ratio=False),
96
+ dict(
97
+ type='Normalize',
98
+ mean=[123.675, 116.28, 103.53],
99
+ std=[58.395, 57.12, 57.375],
100
+ to_rgb=True),
101
+ dict(type='Pad', size_divisor=32),
102
+ dict(type='DefaultFormatBundle'),
103
+ dict(type='Collect', keys=['img'])
104
+ ])
105
+ ],
106
+ version='le90'),
107
+ test=dict(
108
+ type='RSARDataset',
109
+ ann_file='/liyuxuan/DATA/RSAR/test/images/',
110
+ img_prefix='/liyuxuan/DATA/RSAR/test/images/',
111
+ pipeline=[
112
+ dict(type='LoadImageFromFile'),
113
+ dict(
114
+ type='MultiScaleFlipAug',
115
+ img_scale=(800, 800),
116
+ flip=False,
117
+ transforms=[
118
+ dict(
119
+ type='RResize', img_scale=(800, 800),
120
+ keep_ratio=False),
121
+ dict(
122
+ type='Normalize',
123
+ mean=[123.675, 116.28, 103.53],
124
+ std=[58.395, 57.12, 57.375],
125
+ to_rgb=True),
126
+ dict(type='Pad', size_divisor=32),
127
+ dict(type='DefaultFormatBundle'),
128
+ dict(type='Collect', keys=['img'])
129
+ ])
130
+ ],
131
+ version='le90'))
132
+ evaluation = dict(interval=1, metric='mAP')
133
+ optimizer = dict(
134
+ type='AdamW',
135
+ lr=2.5e-05,
136
+ betas=(0.9, 0.999),
137
+ weight_decay=0.05,
138
+ constructor='InternViTAdapterLayerDecayOptimizerConstructor',
139
+ paramwise_cfg=dict(num_layers=24, layer_decay_rate=0.95))
140
+ optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
141
+ lr_config = dict(
142
+ policy='step',
143
+ warmup='linear',
144
+ warmup_iters=500,
145
+ warmup_ratio=0.3333333333333333,
146
+ step=[8, 11])
147
+ runner = dict(type='EpochBasedRunner', max_epochs=12)
148
+ checkpoint_config = dict(interval=1, max_keep_ckpts=1)
149
+ log_config = dict(interval=500, hooks=[dict(type='TextLoggerHook')])
150
+ dist_params = dict(backend='nccl')
151
+ log_level = 'INFO'
152
+ load_from = None
153
+ resume_from = None
154
+ workflow = [('train', 1)]
155
+ opencv_num_threads = 0
156
+ mp_start_method = 'fork'
157
+ pretrained = 'pretrained/ViTP_ViT_L_300M_rs.safetensors'
158
+ norm_cfg = dict(type='LN', requires_grad=True)
159
+ model = dict(
160
+ type='OrientedRCNN',
161
+ backbone=dict(
162
+ type='InternViTAdapter',
163
+ pretrain_size=448,
164
+ img_size=800,
165
+ patch_size=16,
166
+ embed_dim=1024,
167
+ depth=24,
168
+ num_heads=16,
169
+ mlp_ratio=4.0,
170
+ drop_path_rate=0.1,
171
+ init_values=0.1,
172
+ with_cp=True,
173
+ use_flash_attn=True,
174
+ qk_normalization=False,
175
+ layerscale_force_fp32=False,
176
+ with_fpn=False,
177
+ freeze_vit=False,
178
+ use_final_norm=True,
179
+ interaction_indexes=[[0, 7], [8, 11], [12, 15], [16, 23]],
180
+ cffn_ratio=0.25,
181
+ deform_ratio=0.25,
182
+ qkv_bias=True,
183
+ norm_type='layer_norm',
184
+ pretrained='pretrained/ViTP_ViT_L_300M_rs.safetensors',
185
+ pretrained_type='full',
186
+ only_feat_out=True),
187
+ neck=dict(
188
+ type='SimpleFPN',
189
+ in_channels=[1024, 1024, 1024, 1024],
190
+ out_channels=256,
191
+ norm_cfg=dict(type='LN', requires_grad=True),
192
+ use_residual=False,
193
+ num_outs=5),
194
+ rpn_head=dict(
195
+ type='OrientedRPNHead',
196
+ in_channels=256,
197
+ feat_channels=256,
198
+ version='le90',
199
+ anchor_generator=dict(
200
+ type='AnchorGenerator',
201
+ scales=[8],
202
+ ratios=[0.5, 1.0, 2.0],
203
+ strides=[4, 8, 16, 32, 64]),
204
+ bbox_coder=dict(
205
+ type='MidpointOffsetCoder',
206
+ angle_range='le90',
207
+ target_means=[0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
208
+ target_stds=[1.0, 1.0, 1.0, 1.0, 0.5, 0.5]),
209
+ loss_cls=dict(
210
+ type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
211
+ loss_bbox=dict(
212
+ type='SmoothL1Loss', beta=0.1111111111111111, loss_weight=1.0)),
213
+ roi_head=dict(
214
+ type='OrientedStandardRoIHead',
215
+ bbox_roi_extractor=dict(
216
+ type='RotatedSingleRoIExtractor',
217
+ roi_layer=dict(
218
+ type='RoIAlignRotated',
219
+ out_size=7,
220
+ sample_num=2,
221
+ clockwise=True),
222
+ out_channels=256,
223
+ featmap_strides=[4, 8, 16, 32]),
224
+ bbox_head=dict(
225
+ type='RotatedShared2FCBBoxHead',
226
+ in_channels=256,
227
+ fc_out_channels=1024,
228
+ roi_feat_size=7,
229
+ num_classes=6,
230
+ bbox_coder=dict(
231
+ type='DeltaXYWHAOBBoxCoder',
232
+ angle_range='le90',
233
+ norm_factor=None,
234
+ edge_swap=True,
235
+ proj_xy=True,
236
+ target_means=(0.0, 0.0, 0.0, 0.0, 0.0),
237
+ target_stds=(0.1, 0.1, 0.2, 0.2, 0.1)),
238
+ reg_class_agnostic=True,
239
+ loss_cls=dict(
240
+ type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
241
+ loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))),
242
+ train_cfg=dict(
243
+ rpn=dict(
244
+ assigner=dict(
245
+ type='MaxIoUAssigner',
246
+ pos_iou_thr=0.7,
247
+ neg_iou_thr=0.3,
248
+ min_pos_iou=0.3,
249
+ match_low_quality=True,
250
+ gpu_assign_thr=1000,
251
+ ignore_iof_thr=-1),
252
+ sampler=dict(
253
+ type='RandomSampler',
254
+ num=256,
255
+ pos_fraction=0.5,
256
+ neg_pos_ub=-1,
257
+ add_gt_as_proposals=False),
258
+ allowed_border=0,
259
+ pos_weight=-1,
260
+ debug=False),
261
+ rpn_proposal=dict(
262
+ nms_pre=2000,
263
+ max_per_img=2000,
264
+ nms=dict(type='nms', iou_threshold=0.8),
265
+ min_bbox_size=0),
266
+ rcnn=dict(
267
+ assigner=dict(
268
+ type='MaxIoUAssigner',
269
+ pos_iou_thr=0.5,
270
+ neg_iou_thr=0.5,
271
+ min_pos_iou=0.5,
272
+ match_low_quality=False,
273
+ gpu_assign_thr=1000,
274
+ iou_calculator=dict(type='RBboxOverlaps2D'),
275
+ ignore_iof_thr=-1),
276
+ sampler=dict(
277
+ type='RRandomSampler',
278
+ num=512,
279
+ pos_fraction=0.25,
280
+ neg_pos_ub=-1,
281
+ add_gt_as_proposals=True),
282
+ pos_weight=-1,
283
+ debug=False)),
284
+ test_cfg=dict(
285
+ rpn=dict(
286
+ nms_pre=2000,
287
+ max_per_img=2000,
288
+ nms=dict(type='nms', iou_threshold=0.8),
289
+ min_bbox_size=0),
290
+ rcnn=dict(
291
+ nms_pre=2000,
292
+ min_bbox_size=0,
293
+ score_thr=0.05,
294
+ nms=dict(iou_thr=0.1),
295
+ max_per_img=2000)))
296
+ fp16 = dict(loss_scale=dict(init_scale=512))
297
+ work_dir = './work_dirs/vitp_rsar_orcnn'
298
+ auto_resume = False
299
+ gpu_ids = range(0, 8)
300
+ device = 'cuda'
ckpts/vitp_s2looking_upernet_6989/20250915_140502/20250915_140502.log ADDED
The diff for this file is too large to render. See raw diff
 
ckpts/vitp_s2looking_upernet_6989/best_checkpoint.pth.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ae8a0e0f2b75ebdaf146614481221186d01ba5efd4131edc8e4318c341bacd6
3
+ size 1522950309
ckpts/vitp_s2looking_upernet_6989/vitp_s2looking_upernet.py ADDED
@@ -0,0 +1,360 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ crop_size = (
2
+ 512,
3
+ 512,
4
+ )
5
+ data_preprocessor = dict(
6
+ bgr_to_rgb=True,
7
+ mean=[
8
+ 123.675,
9
+ 116.28,
10
+ 103.53,
11
+ 123.675,
12
+ 116.28,
13
+ 103.53,
14
+ ],
15
+ pad_val=0,
16
+ seg_pad_val=255,
17
+ size_divisor=32,
18
+ std=[
19
+ 58.395,
20
+ 57.12,
21
+ 57.375,
22
+ 58.395,
23
+ 57.12,
24
+ 57.375,
25
+ ],
26
+ test_cfg=dict(size_divisor=32),
27
+ type='DualInputSegDataPreProcessor')
28
+ data_root = '/defaultShare/pubdata/remote_sensing/S2Looking'
29
+ dataset_type = 'S2Looking_Dataset'
30
+ default_hooks = dict(
31
+ checkpoint=dict(by_epoch=False, interval=12000, type='CheckpointHook'),
32
+ logger=dict(interval=50, log_metric_by_epoch=False, type='LoggerHook'),
33
+ param_scheduler=dict(type='ParamSchedulerHook'),
34
+ sampler_seed=dict(type='DistSamplerSeedHook'),
35
+ timer=dict(type='IterTimerHook'),
36
+ visualization=dict(interval=1, type='CDVisualizationHook'))
37
+ default_scope = 'opencd'
38
+ env_cfg = dict(
39
+ cudnn_benchmark=True,
40
+ dist_cfg=dict(backend='nccl'),
41
+ mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
42
+ fp16 = dict(loss_scale=dict(init_scale=512))
43
+ img_ratios = [
44
+ 0.75,
45
+ 1.0,
46
+ 1.25,
47
+ ]
48
+ launcher = 'pytorch'
49
+ load_from = None
50
+ log_level = 'INFO'
51
+ log_processor = dict(by_epoch=False)
52
+ model = dict(
53
+ backbone=dict(
54
+ cffn_ratio=0.25,
55
+ deform_ratio=0.25,
56
+ depth=24,
57
+ drop_path_rate=0.1,
58
+ embed_dim=1024,
59
+ freeze_vit=False,
60
+ img_size=512,
61
+ init_values=0.1,
62
+ interaction_indexes=[
63
+ [
64
+ 0,
65
+ 7,
66
+ ],
67
+ [
68
+ 8,
69
+ 11,
70
+ ],
71
+ [
72
+ 12,
73
+ 15,
74
+ ],
75
+ [
76
+ 16,
77
+ 23,
78
+ ],
79
+ ],
80
+ layerscale_force_fp32=False,
81
+ mlp_ratio=4.0,
82
+ norm_type='layer_norm',
83
+ num_heads=16,
84
+ patch_size=16,
85
+ pretrain_size=448,
86
+ pretrained='pretrained/ViTP_ViT_L_300M_rs.safetensors',
87
+ pretrained_type='full',
88
+ qk_normalization=False,
89
+ qkv_bias=True,
90
+ type='InternViTAdapter',
91
+ use_final_norm=True,
92
+ use_flash_attn=False,
93
+ with_cp=True,
94
+ with_fpn=False),
95
+ data_preprocessor=dict(
96
+ bgr_to_rgb=True,
97
+ mean=[
98
+ 123.675,
99
+ 116.28,
100
+ 103.53,
101
+ 123.675,
102
+ 116.28,
103
+ 103.53,
104
+ ],
105
+ pad_val=0,
106
+ seg_pad_val=255,
107
+ size_divisor=32,
108
+ std=[
109
+ 58.395,
110
+ 57.12,
111
+ 57.375,
112
+ 58.395,
113
+ 57.12,
114
+ 57.375,
115
+ ],
116
+ test_cfg=dict(size_divisor=32),
117
+ type='DualInputSegDataPreProcessor'),
118
+ decode_head=dict(
119
+ align_corners=False,
120
+ channels=512,
121
+ dropout_ratio=0.1,
122
+ in_channels=[
123
+ 2048,
124
+ 2048,
125
+ 2048,
126
+ 2048,
127
+ ],
128
+ in_index=[
129
+ 0,
130
+ 1,
131
+ 2,
132
+ 3,
133
+ ],
134
+ loss_decode=dict(
135
+ loss_weight=1.0, type='mmseg.CrossEntropyLoss', use_sigmoid=False),
136
+ norm_cfg=dict(requires_grad=True, type='SyncBN'),
137
+ num_classes=2,
138
+ pool_scales=(
139
+ 1,
140
+ 2,
141
+ 3,
142
+ 6,
143
+ ),
144
+ type='mmseg.UPerHead'),
145
+ neck=dict(policy='concat', type='FeatureFusionNeck'),
146
+ test_cfg=dict(crop_size=(
147
+ 512,
148
+ 512,
149
+ ), mode='slide', stride=(
150
+ 256,
151
+ 256,
152
+ )),
153
+ train_cfg=dict(),
154
+ type='SiamEncoderDecoder')
155
+ norm_cfg = dict(requires_grad=True, type='SyncBN')
156
+ optim_wrapper = dict(
157
+ clip_grad=None,
158
+ constructor='InternViTAdapterLayerDecayOptimizerConstructor',
159
+ optimizer=dict(
160
+ betas=(
161
+ 0.9,
162
+ 0.999,
163
+ ), lr=2e-05, type='AdamW', weight_decay=0.05),
164
+ paramwise_cfg=dict(layer_decay_rate=0.9, num_layers=24),
165
+ type='OptimWrapper')
166
+ optimizer = dict(lr=0.01, momentum=0.9, type='SGD', weight_decay=0.0005)
167
+ param_scheduler = [
168
+ dict(
169
+ begin=0, by_epoch=False, end=1000, start_factor=1e-06,
170
+ type='LinearLR'),
171
+ dict(
172
+ begin=1000,
173
+ by_epoch=False,
174
+ end=120000,
175
+ eta_min=0.0,
176
+ power=1.0,
177
+ type='PolyLR'),
178
+ ]
179
+ pretrained = 'pretrained/ViTP_ViT_L_300M_rs.safetensors'
180
+ resume = False
181
+ test_cfg = dict(type='TestLoop')
182
+ test_dataloader = dict(
183
+ batch_size=1,
184
+ dataset=dict(
185
+ data_prefix=dict(
186
+ img_path_from='test/Image1',
187
+ img_path_to='test/Image2',
188
+ seg_map_path='test/label'),
189
+ data_root='/defaultShare/pubdata/remote_sensing/S2Looking',
190
+ pipeline=[
191
+ dict(type='MultiImgLoadImageFromFile'),
192
+ dict(type='MultiImgLoadAnnotations'),
193
+ dict(type='MultiImgPackSegInputs'),
194
+ ],
195
+ type='S2Looking_Dataset'),
196
+ num_workers=8,
197
+ persistent_workers=True,
198
+ sampler=dict(shuffle=False, type='DefaultSampler'))
199
+ test_evaluator = dict(
200
+ iou_metrics=[
201
+ 'mFscore',
202
+ 'mIoU',
203
+ ], type='mmseg.IoUMetric')
204
+ test_pipeline = [
205
+ dict(type='MultiImgLoadImageFromFile'),
206
+ dict(type='MultiImgLoadAnnotations'),
207
+ dict(type='MultiImgPackSegInputs'),
208
+ ]
209
+ train_cfg = dict(
210
+ max_iters=120000, type='IterBasedTrainLoop', val_interval=12000)
211
+ train_dataloader = dict(
212
+ batch_size=1,
213
+ dataset=dict(
214
+ data_prefix=dict(
215
+ img_path_from='train/Image1',
216
+ img_path_to='train/Image2',
217
+ seg_map_path='train/label'),
218
+ data_root='/defaultShare/pubdata/remote_sensing/S2Looking',
219
+ pipeline=[
220
+ dict(type='MultiImgLoadImageFromFile'),
221
+ dict(type='MultiImgLoadAnnotations'),
222
+ dict(
223
+ degree=(
224
+ -20,
225
+ 20,
226
+ ),
227
+ flip_prob=0.5,
228
+ rotate_prob=0.5,
229
+ type='MultiImgRandomRotFlip'),
230
+ dict(
231
+ cat_max_ratio=0.75,
232
+ crop_size=(
233
+ 512,
234
+ 512,
235
+ ),
236
+ type='MultiImgRandomCrop'),
237
+ dict(prob=0.5, type='MultiImgExchangeTime'),
238
+ dict(
239
+ brightness_delta=10,
240
+ contrast_range=(
241
+ 0.8,
242
+ 1.2,
243
+ ),
244
+ hue_delta=10,
245
+ saturation_range=(
246
+ 0.8,
247
+ 1.2,
248
+ ),
249
+ type='MultiImgPhotoMetricDistortion'),
250
+ dict(type='MultiImgPackSegInputs'),
251
+ ],
252
+ type='S2Looking_Dataset'),
253
+ num_workers=8,
254
+ persistent_workers=True,
255
+ sampler=dict(shuffle=True, type='DefaultSampler'))
256
+ train_pipeline = [
257
+ dict(type='MultiImgLoadImageFromFile'),
258
+ dict(type='MultiImgLoadAnnotations'),
259
+ dict(
260
+ degree=(
261
+ -20,
262
+ 20,
263
+ ),
264
+ flip_prob=0.5,
265
+ rotate_prob=0.5,
266
+ type='MultiImgRandomRotFlip'),
267
+ dict(
268
+ cat_max_ratio=0.75, crop_size=(
269
+ 512,
270
+ 512,
271
+ ), type='MultiImgRandomCrop'),
272
+ dict(prob=0.5, type='MultiImgExchangeTime'),
273
+ dict(
274
+ brightness_delta=10,
275
+ contrast_range=(
276
+ 0.8,
277
+ 1.2,
278
+ ),
279
+ hue_delta=10,
280
+ saturation_range=(
281
+ 0.8,
282
+ 1.2,
283
+ ),
284
+ type='MultiImgPhotoMetricDistortion'),
285
+ dict(type='MultiImgPackSegInputs'),
286
+ ]
287
+ tta_model = dict(type='mmseg.SegTTAModel')
288
+ tta_pipeline = [
289
+ dict(backend_args=None, type='MultiImgLoadImageFromFile'),
290
+ dict(
291
+ transforms=[
292
+ [
293
+ dict(
294
+ keep_ratio=True, scale_factor=0.75, type='MultiImgResize'),
295
+ dict(keep_ratio=True, scale_factor=1.0, type='MultiImgResize'),
296
+ dict(
297
+ keep_ratio=True, scale_factor=1.25, type='MultiImgResize'),
298
+ ],
299
+ [
300
+ dict(
301
+ direction='horizontal',
302
+ prob=0.0,
303
+ type='MultiImgRandomFlip'),
304
+ dict(
305
+ direction='horizontal',
306
+ prob=1.0,
307
+ type='MultiImgRandomFlip'),
308
+ ],
309
+ [
310
+ dict(type='MultiImgLoadAnnotations'),
311
+ ],
312
+ [
313
+ dict(type='MultiImgPackSegInputs'),
314
+ ],
315
+ ],
316
+ type='TestTimeAug'),
317
+ ]
318
+ val_cfg = dict(type='ValLoop')
319
+ val_dataloader = dict(
320
+ batch_size=1,
321
+ dataset=dict(
322
+ data_prefix=dict(
323
+ img_path_from='val/Image1',
324
+ img_path_to='val/Image2',
325
+ seg_map_path='val/label'),
326
+ data_root='/defaultShare/pubdata/remote_sensing/S2Looking',
327
+ pipeline=[
328
+ dict(type='MultiImgLoadImageFromFile'),
329
+ dict(type='MultiImgLoadAnnotations'),
330
+ dict(type='MultiImgPackSegInputs'),
331
+ ],
332
+ type='S2Looking_Dataset'),
333
+ num_workers=8,
334
+ persistent_workers=True,
335
+ sampler=dict(shuffle=False, type='DefaultSampler'))
336
+ val_evaluator = dict(
337
+ iou_metrics=[
338
+ 'mFscore',
339
+ 'mIoU',
340
+ ], type='mmseg.IoUMetric')
341
+ val_pipeline = [
342
+ dict(type='MultiImgLoadImageFromFile'),
343
+ dict(keep_ratio=True, scale=(
344
+ 1024,
345
+ 1024,
346
+ ), type='MultiImgResize'),
347
+ dict(type='MultiImgLoadAnnotations'),
348
+ dict(type='MultiImgPackSegInputs'),
349
+ ]
350
+ vis_backends = [
351
+ dict(type='CDLocalVisBackend'),
352
+ ]
353
+ visualizer = dict(
354
+ alpha=1.0,
355
+ name='visualizer',
356
+ type='CDLocalVisualizer',
357
+ vis_backends=[
358
+ dict(type='CDLocalVisBackend'),
359
+ ])
360
+ work_dir = './work_dirs/vitp_s2looking_upernet'