Upload 125 files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +26 -0
- README.md +26 -0
- ckpts/ViTP_InternVL_1B_med.safetensors +3 -0
- ckpts/ViTP_InternVL_1B_rs.safetensors +3 -0
- ckpts/ViTP_ViT_L_300M_med.safetensors +3 -0
- ckpts/ViTP_ViT_L_300M_rs.safetensors +3 -0
- ckpts/reduct_pth.py +31 -0
- ckpts/vitp_amos_upernet_9060/20250905_141251.log +0 -0
- ckpts/vitp_amos_upernet_9060/best_mDice.pth +3 -0
- ckpts/vitp_amos_upernet_9060/eval_single_scale_20250906_143650.json +54 -0
- ckpts/vitp_amos_upernet_9060/vitp_amos_upernet.py +218 -0
- ckpts/vitp_brats_upernet_7211/20250907_130222.log +0 -0
- ckpts/vitp_brats_upernet_7211/best_mDice.pth +3 -0
- ckpts/vitp_brats_upernet_7211/eval_single_scale_20250908_054047.json +18 -0
- ckpts/vitp_brats_upernet_7211/vitp_brats_upernet.py +201 -0
- ckpts/vitp_convid_upernet_9155/20250902_103001.log +0 -0
- ckpts/vitp_convid_upernet_9155/best_mDice.pth +3 -0
- ckpts/vitp_convid_upernet_9155/eval_single_scale_20250902_233031.json +15 -0
- ckpts/vitp_convid_upernet_9155/vitp_convid_upernet.py +207 -0
- ckpts/vitp_dior_cascade_rcnn_7960/20250730_223238.log +0 -0
- ckpts/vitp_dior_cascade_rcnn_7960/20250730_223238.log.json +19 -0
- ckpts/vitp_dior_cascade_rcnn_7960/epoch_12.pth +3 -0
- ckpts/vitp_dior_cascade_rcnn_7960/vitp_dior_cascade_rcnn.py +308 -0
- ckpts/vitp_diorr_orcnn_7508/20250918_082138.log +0 -0
- ckpts/vitp_diorr_orcnn_7508/epoch_12.pth +3 -0
- ckpts/vitp_diorr_orcnn_7508/vitp_diorr_orcnn.py +311 -0
- ckpts/vitp_dotav2_orcnn_6073/20250726_012424.log +0 -0
- ckpts/vitp_dotav2_orcnn_6073/20250726_012424.log.json +61 -0
- ckpts/vitp_dotav2_orcnn_6073/epoch_12.pth +3 -0
- ckpts/vitp_dotav2_orcnn_6073/vitp_dotav2_orcnn.py +302 -0
- ckpts/vitp_isaid_upernet_7114/20250803_154801.log +0 -0
- ckpts/vitp_isaid_upernet_7114/20250803_154801.log.json +0 -0
- ckpts/vitp_isaid_upernet_7114/ViTP_isaid_upernet.py +192 -0
- ckpts/vitp_isaid_upernet_7114/eval_20250921_141413.json +40 -0
- ckpts/vitp_isaid_upernet_7114/iter_80000.pth +3 -0
- ckpts/vitp_levir_upernet_7268/20250919_030132/20250919_030132.log +0 -0
- ckpts/vitp_levir_upernet_7268/20250919_030132/20250921_105914.log +485 -0
- ckpts/vitp_levir_upernet_7268/iter_80000.pth +3 -0
- ckpts/vitp_levir_upernet_7268/upernet_internvit_adp_levir.py +344 -0
- ckpts/vitp_loveda_upernet_5428/20250807_180314.log +0 -0
- ckpts/vitp_loveda_upernet_5428/20250807_180314.log.json +0 -0
- ckpts/vitp_loveda_upernet_5428/iter_80000.pth +3 -0
- ckpts/vitp_loveda_upernet_5428/vitp_loveda_upernet.py +208 -0
- ckpts/vitp_rsar_orcnn_7231/20250716_042910.log +0 -0
- ckpts/vitp_rsar_orcnn_7231/20250716_042910.log.json +241 -0
- ckpts/vitp_rsar_orcnn_7231/epoch_12.pth +3 -0
- ckpts/vitp_rsar_orcnn_7231/vitp_rsar_orcnn.py +300 -0
- ckpts/vitp_s2looking_upernet_6989/20250915_140502/20250915_140502.log +0 -0
- ckpts/vitp_s2looking_upernet_6989/best_checkpoint.pth.pth +3 -0
- ckpts/vitp_s2looking_upernet_6989/vitp_s2looking_upernet.py +360 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,29 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
pretrain_data/annotations/general_ann/docvqa_train_10k.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
pretrain_data/annotations/general_ann/dvqa_train_200k.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
pretrain_data/annotations/general_ann/fit_rs_vqa_100k.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
pretrain_data/annotations/general_ann/geoqa+.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
pretrain_data/annotations/general_ann/sharegpt4v_instruct_gpt4-vision_cap100k.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
pretrain_data/annotations/general_ann/sharegpt4v_mix665k_cap23k_coco-ap9k_lcs3k_sam9k_div2k_novg.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
pretrain_data/annotations/general_ann/synthdog_en.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
pretrain_data/annotations/general_ann/vqa_rgb_rsvqahr_train_instruct_100k.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
pretrain_data/annotations/medical_ann/huatuo_oa.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
pretrain_data/annotations/medical_ann/huatuo_vqa.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
pretrain_data/annotations/medical_ann/omnimedvqa.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
pretrain_data/annotations/medical_ann/pmc_oa.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
pretrain_data/annotations/medical_ann/pmc_vqa.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
pretrain_data/annotations/medical_ann/quilt_1m.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
pretrain_data/annotations/medical_ann/quilt_instruct_107k.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
pretrain_data/annotations/medical_ann/quilt_instruct_complex_abductive.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
pretrain_data/annotations/medical_ann/quilt_instruct_conv_desc.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
pretrain_data/annotations/rs_ann/caption_cd_rgb_LevirCCcaptions.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
pretrain_data/annotations/rs_ann/caption_GAIA_trainval_instruct.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
pretrain_data/annotations/rs_ann/cls_rgb_Million-AID_CoT.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
pretrain_data/annotations/rs_ann/cls_rgb_Million-AID.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
pretrain_data/annotations/rs_ann/cls_rgb_NWPU-RESISC45.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
pretrain_data/annotations/rs_ann/geochat_train.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
pretrain_data/annotations/rs_ann/vqa_rgb_LRBEN.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
pretrain_data/annotations/rs_ann/vqa_rgb_SAMRS.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
pretrain_data/annotations/rs_ann/vrsbench_train.jsonl filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
|
| 3 |
+
# Introduction
|
| 4 |
+
|
| 5 |
+
Modern computer vision is converging on a closed loop in which perception, reasoning and generation mutually reinforce each other. However, this loop remains incomplete: the top-down influence of high-level reasoning on the foundational learning of low-level perceptual features is not yet underexplored. This paper addresses this gap by proposing a new paradigm for pretraining foundation models in downstream domains. We introduce **V**isual **i**ns**T**ruction **P**retraining (**ViTP**), a novel approach that directly leverages reasoning to enhance perception. ViTP embeds a Vision Transformer (ViT) backbone within a Vision-Language Model and pretrains it end-to-end using a rich corpus of visual instruction data curated from target downstream domains. ViTP is powered by our proposed Visual Robustness Learning (VRL), which compels the ViT to learn robust and domain-relevant features from a sparse set of visual tokens. Extensive experiments on 16 challenging remote sensing and medical imaging benchmarks demonstrate that ViTP establishes new state-of-the-art performance across a diverse range of downstream tasks. The code is available at [GitHub](github.com/zcablii/ViTP).
|
| 6 |
+
|
| 7 |
+
----
|
| 8 |
+
|
| 9 |
+

|
| 10 |
+
The synergistic relationship between perception, generation, and reasoning in modern CV. Our proposed ViTP forges a novel link from high-level reasoning to low-level perception, a previously underexplored connection. ViTP sets new SOTA performance across a diverse range of downstream tasks in medical imaging and remote sensing.
|
| 11 |
+
|
| 12 |
+
----
|
| 13 |
+
|
| 14 |
+

|
| 15 |
+
A conceptual illustration of the ViTP framework. A ViT backbone is embedded within a large VLM and then pretrained with domain-specific instruction following objective and Visual Robustness Learning (VRL). This process instils high-level semantic understanding into the ViT. The resulting weights are then used to initialize models for various downstream perception tasks.
|
| 16 |
+
|
| 17 |
+
----
|
| 18 |
+
|
| 19 |
+
```bibtex
|
| 20 |
+
@misc{tongyidr,
|
| 21 |
+
author={Tongyi DeepResearch Team},
|
| 22 |
+
title={Tongyi-DeepResearch},
|
| 23 |
+
year={2025},
|
| 24 |
+
howpublished={\url{https://github.com/Alibaba-NLP/DeepResearch}}
|
| 25 |
+
}
|
| 26 |
+
```
|
ckpts/ViTP_InternVL_1B_med.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7a33c06bd3a146de19b80f5bf3289fd1b7fd899fdde06cb94d39e9c7911e0dd7
|
| 3 |
+
size 1876463472
|
ckpts/ViTP_InternVL_1B_rs.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cb63f06371476844f0813d80043e83c2cdd2bfa7878c6221b37b909be2ea10a9
|
| 3 |
+
size 1876463472
|
ckpts/ViTP_ViT_L_300M_med.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:df83fabd968ac7d46280beebbee65b93bddd74860a04a09f97f58004f9dfa21e
|
| 3 |
+
size 617029872
|
ckpts/ViTP_ViT_L_300M_rs.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d44154814bfdaf4bd5b36e7ab1a657bd065c643b3c969c135e43b8bad7589661
|
| 3 |
+
size 617029872
|
ckpts/reduct_pth.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import glob
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
# find out all pth files in the directory
|
| 6 |
+
pth_files = glob.glob('**/*.pth', recursive=True)
|
| 7 |
+
|
| 8 |
+
# loop over all pth files and overwirte them without the 'optimizer' key
|
| 9 |
+
|
| 10 |
+
def overwirte_pth(pth_file):
|
| 11 |
+
print(f'Overwriting {pth_file}')
|
| 12 |
+
checkpoint = torch.load(pth_file)
|
| 13 |
+
|
| 14 |
+
# print the keys and values of the checkpoint
|
| 15 |
+
print(checkpoint.keys()) # dict_keys(['meta', 'state_dict', 'optimizer'])
|
| 16 |
+
|
| 17 |
+
if 'optimizer' not in checkpoint.keys():
|
| 18 |
+
print('No optimizer found in the checkpoint')
|
| 19 |
+
return
|
| 20 |
+
# delete the 'optimizer' key
|
| 21 |
+
del checkpoint['optimizer']
|
| 22 |
+
if 'param_schedulers' in checkpoint.keys(): del checkpoint['param_schedulers']
|
| 23 |
+
if 'message_hub' in checkpoint.keys(): del checkpoint['message_hub']
|
| 24 |
+
|
| 25 |
+
# overwirte the checkpoint without the 'optimizer' key
|
| 26 |
+
torch.save(checkpoint, pth_file)
|
| 27 |
+
print(f'Overwritten {pth_file} successfully')
|
| 28 |
+
|
| 29 |
+
for pth_file in pth_files:
|
| 30 |
+
overwirte_pth(pth_file)
|
| 31 |
+
|
ckpts/vitp_amos_upernet_9060/20250905_141251.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ckpts/vitp_amos_upernet_9060/best_mDice.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a10e012e5c2a3bb778ec98226b0e6f73aef298a12c8832add221f1e7ddb8f3e9
|
| 3 |
+
size 1809417685
|
ckpts/vitp_amos_upernet_9060/eval_single_scale_20250906_143650.json
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"config": "./configs/internvit_new/upernet_internvit_adp_160e_amos_2022_yx.py",
|
| 3 |
+
"metric": {
|
| 4 |
+
"aAcc": 0.9494,
|
| 5 |
+
"mIoU": 0.8334999999999999,
|
| 6 |
+
"mAcc": 0.8970999999999999,
|
| 7 |
+
"mDice": 0.9059999999999999,
|
| 8 |
+
"IoU.spleen": 0.8662000274658204,
|
| 9 |
+
"IoU.kidney_right": 0.93,
|
| 10 |
+
"IoU.kidney_left": 0.915,
|
| 11 |
+
"IoU.gallbladder": 0.7836000061035157,
|
| 12 |
+
"IoU.esophagus": 0.7613999938964844,
|
| 13 |
+
"IoU.liver": 0.9440000152587891,
|
| 14 |
+
"IoU.stomach": 0.8463999938964843,
|
| 15 |
+
"IoU.aorta": 0.9205999755859375,
|
| 16 |
+
"IoU.inferior_vena_cava": 0.7538999938964843,
|
| 17 |
+
"IoU.pancreas": 0.8077999877929688,
|
| 18 |
+
"IoU.adrenal_gland_right": 0.6437000274658203,
|
| 19 |
+
"IoU.adrenal_gland_left": 0.7948000335693359,
|
| 20 |
+
"IoU.duodenum": 0.6570999908447266,
|
| 21 |
+
"IoU.bladder": 0.960999984741211,
|
| 22 |
+
"IoU.prostate_and_uterus": 0.9162999725341797,
|
| 23 |
+
"Acc.spleen": 0.892300033569336,
|
| 24 |
+
"Acc.kidney_right": 0.9648999786376953,
|
| 25 |
+
"Acc.kidney_left": 0.9219000244140625,
|
| 26 |
+
"Acc.gallbladder": 0.844800033569336,
|
| 27 |
+
"Acc.esophagus": 0.9162999725341797,
|
| 28 |
+
"Acc.liver": 0.9783999633789062,
|
| 29 |
+
"Acc.stomach": 0.9309999847412109,
|
| 30 |
+
"Acc.aorta": 0.949800033569336,
|
| 31 |
+
"Acc.inferior_vena_cava": 0.8994000244140625,
|
| 32 |
+
"Acc.pancreas": 0.9041999816894531,
|
| 33 |
+
"Acc.adrenal_gland_right": 0.7270999908447265,
|
| 34 |
+
"Acc.adrenal_gland_left": 0.8395999908447266,
|
| 35 |
+
"Acc.duodenum": 0.7452999877929688,
|
| 36 |
+
"Acc.bladder": 0.9805000305175782,
|
| 37 |
+
"Acc.prostate_and_uterus": 0.9605000305175782,
|
| 38 |
+
"Dice.spleen": 0.9283000183105469,
|
| 39 |
+
"Dice.kidney_right": 0.9637000274658203,
|
| 40 |
+
"Dice.kidney_left": 0.9555999755859375,
|
| 41 |
+
"Dice.gallbladder": 0.8787000274658203,
|
| 42 |
+
"Dice.esophagus": 0.8645999908447266,
|
| 43 |
+
"Dice.liver": 0.9712000274658203,
|
| 44 |
+
"Dice.stomach": 0.9168000030517578,
|
| 45 |
+
"Dice.aorta": 0.9587000274658203,
|
| 46 |
+
"Dice.inferior_vena_cava": 0.8597000122070313,
|
| 47 |
+
"Dice.pancreas": 0.8937000274658203,
|
| 48 |
+
"Dice.adrenal_gland_right": 0.7831999969482422,
|
| 49 |
+
"Dice.adrenal_gland_left": 0.8856999969482422,
|
| 50 |
+
"Dice.duodenum": 0.7930999755859375,
|
| 51 |
+
"Dice.bladder": 0.9801000213623047,
|
| 52 |
+
"Dice.prostate_and_uterus": 0.9562999725341796
|
| 53 |
+
}
|
| 54 |
+
}
|
ckpts/vitp_amos_upernet_9060/vitp_amos_upernet.py
ADDED
|
@@ -0,0 +1,218 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_type = 'AMOS2022Dataset'
|
| 2 |
+
data_root = '/root/data-fs/twh/dataset/AMOS2022/mmseg_data'
|
| 3 |
+
img_norm_cfg = dict(
|
| 4 |
+
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
| 5 |
+
crop_size = (512, 512)
|
| 6 |
+
train_pipeline = [
|
| 7 |
+
dict(type='LoadImageFromFile'),
|
| 8 |
+
dict(type='LoadAnnotations', reduce_zero_label=True),
|
| 9 |
+
dict(type='Resize', img_scale=(512, 512), ratio_range=(0.5, 2.0)),
|
| 10 |
+
dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
|
| 11 |
+
dict(type='RandomFlip', prob=0.5),
|
| 12 |
+
dict(type='PhotoMetricDistortion'),
|
| 13 |
+
dict(
|
| 14 |
+
type='Normalize',
|
| 15 |
+
mean=[123.675, 116.28, 103.53],
|
| 16 |
+
std=[58.395, 57.12, 57.375],
|
| 17 |
+
to_rgb=True),
|
| 18 |
+
dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255),
|
| 19 |
+
dict(type='DefaultFormatBundle'),
|
| 20 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 21 |
+
]
|
| 22 |
+
test_pipeline = [
|
| 23 |
+
dict(type='LoadImageFromFile'),
|
| 24 |
+
dict(
|
| 25 |
+
type='MultiScaleFlipAug',
|
| 26 |
+
img_scale=(512, 512),
|
| 27 |
+
flip=False,
|
| 28 |
+
transforms=[
|
| 29 |
+
dict(type='Resize', keep_ratio=True),
|
| 30 |
+
dict(
|
| 31 |
+
type='Normalize',
|
| 32 |
+
mean=[123.675, 116.28, 103.53],
|
| 33 |
+
std=[58.395, 57.12, 57.375],
|
| 34 |
+
to_rgb=True),
|
| 35 |
+
dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255),
|
| 36 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 37 |
+
dict(type='Collect', keys=['img'])
|
| 38 |
+
])
|
| 39 |
+
]
|
| 40 |
+
data = dict(
|
| 41 |
+
samples_per_gpu=2,
|
| 42 |
+
workers_per_gpu=4,
|
| 43 |
+
train=dict(
|
| 44 |
+
type='AMOS2022Dataset',
|
| 45 |
+
data_root='/root/data-fs/twh/dataset/AMOS2022/mmseg_data',
|
| 46 |
+
img_dir='img_dir/train',
|
| 47 |
+
ann_dir='ann_dir/train',
|
| 48 |
+
pipeline=[
|
| 49 |
+
dict(type='LoadImageFromFile'),
|
| 50 |
+
dict(type='LoadAnnotations', reduce_zero_label=True),
|
| 51 |
+
dict(type='Resize', img_scale=(512, 512), ratio_range=(0.5, 2.0)),
|
| 52 |
+
dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
|
| 53 |
+
dict(type='RandomFlip', prob=0.5),
|
| 54 |
+
dict(type='PhotoMetricDistortion'),
|
| 55 |
+
dict(
|
| 56 |
+
type='Normalize',
|
| 57 |
+
mean=[123.675, 116.28, 103.53],
|
| 58 |
+
std=[58.395, 57.12, 57.375],
|
| 59 |
+
to_rgb=True),
|
| 60 |
+
dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255),
|
| 61 |
+
dict(type='DefaultFormatBundle'),
|
| 62 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 63 |
+
]),
|
| 64 |
+
val=dict(
|
| 65 |
+
type='AMOS2022Dataset',
|
| 66 |
+
data_root='/root/data-fs/twh/dataset/AMOS2022/mmseg_data',
|
| 67 |
+
img_dir='img_dir/val',
|
| 68 |
+
ann_dir='ann_dir/val',
|
| 69 |
+
pipeline=[
|
| 70 |
+
dict(type='LoadImageFromFile'),
|
| 71 |
+
dict(
|
| 72 |
+
type='MultiScaleFlipAug',
|
| 73 |
+
img_scale=(512, 512),
|
| 74 |
+
flip=False,
|
| 75 |
+
transforms=[
|
| 76 |
+
dict(type='Resize', keep_ratio=True),
|
| 77 |
+
dict(
|
| 78 |
+
type='Normalize',
|
| 79 |
+
mean=[123.675, 116.28, 103.53],
|
| 80 |
+
std=[58.395, 57.12, 57.375],
|
| 81 |
+
to_rgb=True),
|
| 82 |
+
dict(
|
| 83 |
+
type='Pad',
|
| 84 |
+
size=(512, 512),
|
| 85 |
+
pad_val=0,
|
| 86 |
+
seg_pad_val=255),
|
| 87 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 88 |
+
dict(type='Collect', keys=['img'])
|
| 89 |
+
])
|
| 90 |
+
]),
|
| 91 |
+
test=dict(
|
| 92 |
+
type='AMOS2022Dataset',
|
| 93 |
+
data_root='/root/data-fs/twh/dataset/AMOS2022/mmseg_data',
|
| 94 |
+
img_dir='img_dir/test',
|
| 95 |
+
ann_dir='ann_dir/test',
|
| 96 |
+
pipeline=[
|
| 97 |
+
dict(type='LoadImageFromFile'),
|
| 98 |
+
dict(
|
| 99 |
+
type='MultiScaleFlipAug',
|
| 100 |
+
img_scale=(512, 512),
|
| 101 |
+
flip=False,
|
| 102 |
+
transforms=[
|
| 103 |
+
dict(type='Resize', keep_ratio=True),
|
| 104 |
+
dict(
|
| 105 |
+
type='Normalize',
|
| 106 |
+
mean=[123.675, 116.28, 103.53],
|
| 107 |
+
std=[58.395, 57.12, 57.375],
|
| 108 |
+
to_rgb=True),
|
| 109 |
+
dict(
|
| 110 |
+
type='Pad',
|
| 111 |
+
size=(512, 512),
|
| 112 |
+
pad_val=0,
|
| 113 |
+
seg_pad_val=255),
|
| 114 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 115 |
+
dict(type='Collect', keys=['img'])
|
| 116 |
+
])
|
| 117 |
+
]))
|
| 118 |
+
log_config = dict(
|
| 119 |
+
interval=50,
|
| 120 |
+
hooks=[
|
| 121 |
+
dict(
|
| 122 |
+
type='MMSegWandbHook',
|
| 123 |
+
init_kwargs=dict(
|
| 124 |
+
project='ITAP_SEG', name='upernet_internvit_adp_160e_amos'),
|
| 125 |
+
interval=1,
|
| 126 |
+
num_eval_images=0)
|
| 127 |
+
])
|
| 128 |
+
dist_params = dict(backend='nccl')
|
| 129 |
+
log_level = 'INFO'
|
| 130 |
+
load_from = None
|
| 131 |
+
resume_from = None
|
| 132 |
+
workflow = [('train', 1)]
|
| 133 |
+
cudnn_benchmark = True
|
| 134 |
+
bs = 2
|
| 135 |
+
pretrained = 'pretrained/ViTP_ViT_L_300M_med.safetensors'
|
| 136 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
| 137 |
+
model = dict(
|
| 138 |
+
type='EncoderDecoder',
|
| 139 |
+
pretrained=None,
|
| 140 |
+
backbone=dict(
|
| 141 |
+
type='InternViTAdapter',
|
| 142 |
+
pretrain_size=448,
|
| 143 |
+
img_size=512,
|
| 144 |
+
patch_size=16,
|
| 145 |
+
embed_dim=1024,
|
| 146 |
+
depth=24,
|
| 147 |
+
num_heads=16,
|
| 148 |
+
mlp_ratio=4.0,
|
| 149 |
+
drop_path_rate=0.1,
|
| 150 |
+
init_values=0.1,
|
| 151 |
+
with_cp=True,
|
| 152 |
+
use_flash_attn=True,
|
| 153 |
+
qk_normalization=False,
|
| 154 |
+
layerscale_force_fp32=False,
|
| 155 |
+
with_fpn=False,
|
| 156 |
+
freeze_vit=False,
|
| 157 |
+
use_final_norm=True,
|
| 158 |
+
interaction_indexes=[[0, 7], [8, 11], [12, 15], [16, 23]],
|
| 159 |
+
cffn_ratio=0.25,
|
| 160 |
+
deform_ratio=0.25,
|
| 161 |
+
qkv_bias=True,
|
| 162 |
+
norm_type='layer_norm',
|
| 163 |
+
pretrained='pretrained/ViTP_ViT_L_300M_med.safetensors',
|
| 164 |
+
pretrained_type='full'),
|
| 165 |
+
decode_head=dict(
|
| 166 |
+
type='UPerHead',
|
| 167 |
+
in_index=[0, 1, 2, 3],
|
| 168 |
+
pool_scales=(1, 2, 3, 6),
|
| 169 |
+
dropout_ratio=0.1,
|
| 170 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 171 |
+
align_corners=False,
|
| 172 |
+
num_classes=15,
|
| 173 |
+
channels=1024,
|
| 174 |
+
ignore_index=255,
|
| 175 |
+
in_channels=[1024, 1024, 1024, 1024],
|
| 176 |
+
loss_decode=dict(
|
| 177 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
| 178 |
+
auxiliary_head=dict(
|
| 179 |
+
type='FCNHead',
|
| 180 |
+
in_channels=1024,
|
| 181 |
+
in_index=2,
|
| 182 |
+
channels=1024,
|
| 183 |
+
num_convs=1,
|
| 184 |
+
concat_input=False,
|
| 185 |
+
dropout_ratio=0.1,
|
| 186 |
+
num_classes=15,
|
| 187 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 188 |
+
align_corners=False,
|
| 189 |
+
loss_decode=dict(
|
| 190 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
| 191 |
+
train_cfg=dict(),
|
| 192 |
+
test_cfg=dict(mode='slide', crop_size=(512, 512), stride=(256, 256)))
|
| 193 |
+
optimizer = dict(
|
| 194 |
+
type='AdamW',
|
| 195 |
+
lr=2e-05,
|
| 196 |
+
betas=(0.9, 0.999),
|
| 197 |
+
weight_decay=0.05,
|
| 198 |
+
constructor='InternViTAdapterLayerDecayOptimizerConstructor',
|
| 199 |
+
paramwise_cfg=dict(num_layers=24, layer_decay_rate=0.9))
|
| 200 |
+
optimizer_config = dict()
|
| 201 |
+
lr_config = dict(
|
| 202 |
+
policy='CosineAnnealing',
|
| 203 |
+
warmup='linear',
|
| 204 |
+
warmup_iters=1500,
|
| 205 |
+
warmup_ratio=1e-06,
|
| 206 |
+
min_lr=0.0)
|
| 207 |
+
runner = dict(type='EpochBasedRunner', max_epochs=160)
|
| 208 |
+
checkpoint_config = dict(interval=16, max_keep_ckpts=1)
|
| 209 |
+
evaluation = dict(interval=16, metric=['mIoU', 'mDice'], save_best='mDice')
|
| 210 |
+
fp16 = dict(loss_scale=dict(init_scale=512))
|
| 211 |
+
randomness = dict(seed=3407)
|
| 212 |
+
vis_backends = [
|
| 213 |
+
dict(type='LocalVisBackend'),
|
| 214 |
+
dict(type='TensorboardVisBackend')
|
| 215 |
+
]
|
| 216 |
+
work_dir = './work_dirs/vitp_amos_upernet'
|
| 217 |
+
gpu_ids = range(0, 8)
|
| 218 |
+
auto_resume = False
|
ckpts/vitp_brats_upernet_7211/20250907_130222.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ckpts/vitp_brats_upernet_7211/best_mDice.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ecfbb50e049d31ee649eddf0d5ffcf003464850edd46c2afa626d3d2c6cbdcec
|
| 3 |
+
size 1809319893
|
ckpts/vitp_brats_upernet_7211/eval_single_scale_20250908_054047.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"config": "./configs/internvit_new/upernet_30e_brats_ailab4_512_5e6.py",
|
| 3 |
+
"metric": {
|
| 4 |
+
"aAcc": 0.8059000000000001,
|
| 5 |
+
"mIoU": 0.5802,
|
| 6 |
+
"mAcc": 0.7034,
|
| 7 |
+
"mDice": 0.7211,
|
| 8 |
+
"IoU.necrotic_tumor_core": 0.8151999664306641,
|
| 9 |
+
"IoU.peritumoral_edema": 0.4456999969482422,
|
| 10 |
+
"IoU.enhancing_tumor": 0.4797999954223633,
|
| 11 |
+
"Acc.necrotic_tumor_core": 0.9245999908447265,
|
| 12 |
+
"Acc.peritumoral_edema": 0.530099983215332,
|
| 13 |
+
"Acc.enhancing_tumor": 0.6555000305175781,
|
| 14 |
+
"Dice.necrotic_tumor_core": 0.8981999969482422,
|
| 15 |
+
"Dice.peritumoral_edema": 0.6165999984741211,
|
| 16 |
+
"Dice.enhancing_tumor": 0.648499984741211
|
| 17 |
+
}
|
| 18 |
+
}
|
ckpts/vitp_brats_upernet_7211/vitp_brats_upernet.py
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_type = 'BraTS2021Dataset'
|
| 2 |
+
img_norm_cfg = dict(
|
| 3 |
+
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
| 4 |
+
crop_size = (512, 512)
|
| 5 |
+
train_pipeline = [
|
| 6 |
+
dict(type='LoadImageFromFile'),
|
| 7 |
+
dict(type='LoadAnnotations', reduce_zero_label=True),
|
| 8 |
+
dict(type='Resize', img_scale=(512, 512), ratio_range=(0.5, 2.0)),
|
| 9 |
+
dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
|
| 10 |
+
dict(type='RandomFlip', prob=0.5),
|
| 11 |
+
dict(type='PhotoMetricDistortion'),
|
| 12 |
+
dict(
|
| 13 |
+
type='Normalize',
|
| 14 |
+
mean=[123.675, 116.28, 103.53],
|
| 15 |
+
std=[58.395, 57.12, 57.375],
|
| 16 |
+
to_rgb=True),
|
| 17 |
+
dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255),
|
| 18 |
+
dict(type='DefaultFormatBundle'),
|
| 19 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 20 |
+
]
|
| 21 |
+
test_pipeline = [
|
| 22 |
+
dict(type='LoadImageFromFile'),
|
| 23 |
+
dict(
|
| 24 |
+
type='MultiScaleFlipAug',
|
| 25 |
+
img_scale=(512, 512),
|
| 26 |
+
flip=False,
|
| 27 |
+
transforms=[
|
| 28 |
+
dict(type='Resize', keep_ratio=True),
|
| 29 |
+
dict(
|
| 30 |
+
type='Normalize',
|
| 31 |
+
mean=[123.675, 116.28, 103.53],
|
| 32 |
+
std=[58.395, 57.12, 57.375],
|
| 33 |
+
to_rgb=True),
|
| 34 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 35 |
+
dict(type='Collect', keys=['img'])
|
| 36 |
+
])
|
| 37 |
+
]
|
| 38 |
+
data_root = '/ailab/user/tangwenhao/data/seg/brats2021/mmseg_data'
|
| 39 |
+
data = dict(
|
| 40 |
+
samples_per_gpu=8,
|
| 41 |
+
workers_per_gpu=4,
|
| 42 |
+
train=dict(
|
| 43 |
+
type='BraTS2021Dataset',
|
| 44 |
+
data_root='/ailab/user/tangwenhao/data/seg/brats2021/mmseg_data',
|
| 45 |
+
img_dir='img_dir/train',
|
| 46 |
+
ann_dir='ann_dir/train',
|
| 47 |
+
pipeline=[
|
| 48 |
+
dict(type='LoadImageFromFile'),
|
| 49 |
+
dict(type='LoadAnnotations', reduce_zero_label=True),
|
| 50 |
+
dict(type='Resize', img_scale=(512, 512), ratio_range=(0.5, 2.0)),
|
| 51 |
+
dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
|
| 52 |
+
dict(type='RandomFlip', prob=0.5),
|
| 53 |
+
dict(type='PhotoMetricDistortion'),
|
| 54 |
+
dict(
|
| 55 |
+
type='Normalize',
|
| 56 |
+
mean=[123.675, 116.28, 103.53],
|
| 57 |
+
std=[58.395, 57.12, 57.375],
|
| 58 |
+
to_rgb=True),
|
| 59 |
+
dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255),
|
| 60 |
+
dict(type='DefaultFormatBundle'),
|
| 61 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 62 |
+
]),
|
| 63 |
+
val=dict(
|
| 64 |
+
type='BraTS2021Dataset',
|
| 65 |
+
data_root='/ailab/user/tangwenhao/data/seg/brats2021/mmseg_data',
|
| 66 |
+
img_dir='img_dir/val',
|
| 67 |
+
ann_dir='ann_dir/val',
|
| 68 |
+
pipeline=[
|
| 69 |
+
dict(type='LoadImageFromFile'),
|
| 70 |
+
dict(
|
| 71 |
+
type='MultiScaleFlipAug',
|
| 72 |
+
img_scale=(512, 512),
|
| 73 |
+
flip=False,
|
| 74 |
+
transforms=[
|
| 75 |
+
dict(type='Resize', keep_ratio=True),
|
| 76 |
+
dict(
|
| 77 |
+
type='Normalize',
|
| 78 |
+
mean=[123.675, 116.28, 103.53],
|
| 79 |
+
std=[58.395, 57.12, 57.375],
|
| 80 |
+
to_rgb=True),
|
| 81 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 82 |
+
dict(type='Collect', keys=['img'])
|
| 83 |
+
])
|
| 84 |
+
]),
|
| 85 |
+
test=dict(
|
| 86 |
+
type='BraTS2021Dataset',
|
| 87 |
+
data_root='/ailab/user/tangwenhao/data/seg/brats2021/mmseg_data',
|
| 88 |
+
img_dir='img_dir/test',
|
| 89 |
+
ann_dir='ann_dir/test',
|
| 90 |
+
pipeline=[
|
| 91 |
+
dict(type='LoadImageFromFile'),
|
| 92 |
+
dict(
|
| 93 |
+
type='MultiScaleFlipAug',
|
| 94 |
+
img_scale=(512, 512),
|
| 95 |
+
flip=False,
|
| 96 |
+
transforms=[
|
| 97 |
+
dict(type='Resize', keep_ratio=True),
|
| 98 |
+
dict(
|
| 99 |
+
type='Normalize',
|
| 100 |
+
mean=[123.675, 116.28, 103.53],
|
| 101 |
+
std=[58.395, 57.12, 57.375],
|
| 102 |
+
to_rgb=True),
|
| 103 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 104 |
+
dict(type='Collect', keys=['img'])
|
| 105 |
+
])
|
| 106 |
+
]))
|
| 107 |
+
log_config = dict(
|
| 108 |
+
interval=50,
|
| 109 |
+
hooks=[
|
| 110 |
+
dict(
|
| 111 |
+
type='MMSegWandbHook',
|
| 112 |
+
init_kwargs=dict(
|
| 113 |
+
project='ITAP_SEG',
|
| 114 |
+
name='upernet_internvit_adp_30e_brats_512'),
|
| 115 |
+
interval=1,
|
| 116 |
+
num_eval_images=0)
|
| 117 |
+
])
|
| 118 |
+
dist_params = dict(backend='nccl')
|
| 119 |
+
log_level = 'INFO'
|
| 120 |
+
load_from = None
|
| 121 |
+
resume_from = None
|
| 122 |
+
workflow = [('train', 1)]
|
| 123 |
+
cudnn_benchmark = True
|
| 124 |
+
bs = 8
|
| 125 |
+
pretrained = 'pretrained/ViTP_ViT_L_300M_med.safetensors'
|
| 126 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
| 127 |
+
model = dict(
|
| 128 |
+
type='EncoderDecoder',
|
| 129 |
+
pretrained=None,
|
| 130 |
+
backbone=dict(
|
| 131 |
+
type='InternViTAdapter',
|
| 132 |
+
pretrain_size=448,
|
| 133 |
+
img_size=512,
|
| 134 |
+
patch_size=16,
|
| 135 |
+
embed_dim=1024,
|
| 136 |
+
depth=24,
|
| 137 |
+
num_heads=16,
|
| 138 |
+
mlp_ratio=4.0,
|
| 139 |
+
drop_path_rate=0.1,
|
| 140 |
+
init_values=0.1,
|
| 141 |
+
with_cp=True,
|
| 142 |
+
use_flash_attn=True,
|
| 143 |
+
qk_normalization=False,
|
| 144 |
+
layerscale_force_fp32=False,
|
| 145 |
+
with_fpn=False,
|
| 146 |
+
freeze_vit=False,
|
| 147 |
+
use_final_norm=True,
|
| 148 |
+
interaction_indexes=[[0, 7], [8, 11], [12, 15], [16, 23]],
|
| 149 |
+
cffn_ratio=0.25,
|
| 150 |
+
deform_ratio=0.25,
|
| 151 |
+
qkv_bias=True,
|
| 152 |
+
norm_type='layer_norm',
|
| 153 |
+
pretrained='pretrained/ViTP_ViT_L_300M_med.safetensors',
|
| 154 |
+
pretrained_type='full'),
|
| 155 |
+
decode_head=dict(
|
| 156 |
+
type='UPerHead',
|
| 157 |
+
in_index=[0, 1, 2, 3],
|
| 158 |
+
pool_scales=(1, 2, 3, 6),
|
| 159 |
+
dropout_ratio=0.1,
|
| 160 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 161 |
+
align_corners=False,
|
| 162 |
+
num_classes=3,
|
| 163 |
+
channels=1024,
|
| 164 |
+
in_channels=[1024, 1024, 1024, 1024],
|
| 165 |
+
loss_decode=dict(
|
| 166 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
| 167 |
+
auxiliary_head=dict(
|
| 168 |
+
type='FCNHead',
|
| 169 |
+
in_channels=1024,
|
| 170 |
+
in_index=2,
|
| 171 |
+
channels=1024,
|
| 172 |
+
num_convs=1,
|
| 173 |
+
concat_input=False,
|
| 174 |
+
dropout_ratio=0.1,
|
| 175 |
+
num_classes=3,
|
| 176 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 177 |
+
align_corners=False,
|
| 178 |
+
loss_decode=dict(
|
| 179 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
| 180 |
+
test_cfg=dict(mode='whole'))
|
| 181 |
+
optimizer = dict(type='AdamW', lr=5e-06, betas=(0.9, 0.999), weight_decay=0.05)
|
| 182 |
+
optimizer_config = dict()
|
| 183 |
+
lr_config = dict(
|
| 184 |
+
policy='CosineAnnealing',
|
| 185 |
+
warmup='linear',
|
| 186 |
+
warmup_iters=375,
|
| 187 |
+
warmup_ratio=1e-08,
|
| 188 |
+
min_lr=0,
|
| 189 |
+
by_epoch=False)
|
| 190 |
+
runner = dict(type='EpochBasedRunner', max_epochs=30)
|
| 191 |
+
checkpoint_config = dict(interval=3, max_keep_ckpts=1)
|
| 192 |
+
evaluation = dict(interval=3, metric=['mIoU', 'mDice'], save_best='mDice')
|
| 193 |
+
fp16 = dict(loss_scale=dict(init_scale=512))
|
| 194 |
+
randomness = dict(seed=3407)
|
| 195 |
+
vis_backends = [
|
| 196 |
+
dict(type='LocalVisBackend'),
|
| 197 |
+
dict(type='TensorboardVisBackend')
|
| 198 |
+
]
|
| 199 |
+
work_dir = './work_dirs/vitp_brats_upernet'
|
| 200 |
+
gpu_ids = range(0, 8)
|
| 201 |
+
auto_resume = False
|
ckpts/vitp_convid_upernet_9155/20250902_103001.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ckpts/vitp_convid_upernet_9155/best_mDice.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c81fb347d19a94011bd171fe4f1343c981bc786dbf2a7e60e4bb87b3b04d481a
|
| 3 |
+
size 1809310421
|
ckpts/vitp_convid_upernet_9155/eval_single_scale_20250902_233031.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"config": "./configs/internvit_new/upernet_internvit_adp_300e_convid_quex_v100yx_512_re_5e6_new.py",
|
| 3 |
+
"metric": {
|
| 4 |
+
"aAcc": 0.9301999999999999,
|
| 5 |
+
"mIoU": 0.8462000000000001,
|
| 6 |
+
"mAcc": 0.9157,
|
| 7 |
+
"mDice": 0.9155,
|
| 8 |
+
"IoU.lung": 0.9062000274658203,
|
| 9 |
+
"IoU.covid": 0.7862000274658203,
|
| 10 |
+
"Acc.lung": 0.9505000305175781,
|
| 11 |
+
"Acc.covid": 0.8808000183105469,
|
| 12 |
+
"Dice.lung": 0.9508000183105468,
|
| 13 |
+
"Dice.covid": 0.8802999877929687
|
| 14 |
+
}
|
| 15 |
+
}
|
ckpts/vitp_convid_upernet_9155/vitp_convid_upernet.py
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_type = 'ConvidQuexDataset'
|
| 2 |
+
data_root = '/root/data-fs/twh/dataset/convid_quex/seg_data'
|
| 3 |
+
img_norm_cfg = dict(
|
| 4 |
+
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
| 5 |
+
crop_size = (512, 512)
|
| 6 |
+
train_pipeline = [
|
| 7 |
+
dict(type='LoadImageFromFile'),
|
| 8 |
+
dict(type='LoadAnnotations', reduce_zero_label=True),
|
| 9 |
+
dict(type='Resize', img_scale=(512, 512), ratio_range=(0.5, 2.0)),
|
| 10 |
+
dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
|
| 11 |
+
dict(type='RandomFlip', prob=0.5),
|
| 12 |
+
dict(type='PhotoMetricDistortion'),
|
| 13 |
+
dict(
|
| 14 |
+
type='Normalize',
|
| 15 |
+
mean=[123.675, 116.28, 103.53],
|
| 16 |
+
std=[58.395, 57.12, 57.375],
|
| 17 |
+
to_rgb=True),
|
| 18 |
+
dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255),
|
| 19 |
+
dict(type='DefaultFormatBundle'),
|
| 20 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 21 |
+
]
|
| 22 |
+
test_pipeline = [
|
| 23 |
+
dict(type='LoadImageFromFile'),
|
| 24 |
+
dict(
|
| 25 |
+
type='MultiScaleFlipAug',
|
| 26 |
+
img_scale=(512, 512),
|
| 27 |
+
flip=False,
|
| 28 |
+
transforms=[
|
| 29 |
+
dict(type='Resize', keep_ratio=True),
|
| 30 |
+
dict(
|
| 31 |
+
type='Normalize',
|
| 32 |
+
mean=[123.675, 116.28, 103.53],
|
| 33 |
+
std=[58.395, 57.12, 57.375],
|
| 34 |
+
to_rgb=True),
|
| 35 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 36 |
+
dict(type='Collect', keys=['img'])
|
| 37 |
+
])
|
| 38 |
+
]
|
| 39 |
+
data = dict(
|
| 40 |
+
samples_per_gpu=2,
|
| 41 |
+
workers_per_gpu=4,
|
| 42 |
+
train=dict(
|
| 43 |
+
type='ConvidQuexDataset',
|
| 44 |
+
data_root='/root/data-fs/twh/dataset/convid_quex/seg_data',
|
| 45 |
+
img_dir='image/train',
|
| 46 |
+
ann_dir='anno/train',
|
| 47 |
+
pipeline=[
|
| 48 |
+
dict(type='LoadImageFromFile'),
|
| 49 |
+
dict(type='LoadAnnotations', reduce_zero_label=True),
|
| 50 |
+
dict(type='Resize', img_scale=(512, 512), ratio_range=(0.5, 2.0)),
|
| 51 |
+
dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
|
| 52 |
+
dict(type='RandomFlip', prob=0.5),
|
| 53 |
+
dict(
|
| 54 |
+
type='Normalize',
|
| 55 |
+
mean=[123.675, 116.28, 103.53],
|
| 56 |
+
std=[58.395, 57.12, 57.375],
|
| 57 |
+
to_rgb=True),
|
| 58 |
+
dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255),
|
| 59 |
+
dict(type='DefaultFormatBundle'),
|
| 60 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 61 |
+
]),
|
| 62 |
+
val=dict(
|
| 63 |
+
type='ConvidQuexDataset',
|
| 64 |
+
data_root='/root/data-fs/twh/dataset/convid_quex/seg_data',
|
| 65 |
+
img_dir='image/val',
|
| 66 |
+
ann_dir='anno/val',
|
| 67 |
+
pipeline=[
|
| 68 |
+
dict(type='LoadImageFromFile'),
|
| 69 |
+
dict(
|
| 70 |
+
type='MultiScaleFlipAug',
|
| 71 |
+
img_scale=(512, 512),
|
| 72 |
+
flip=False,
|
| 73 |
+
transforms=[
|
| 74 |
+
dict(type='Resize', keep_ratio=True),
|
| 75 |
+
dict(
|
| 76 |
+
type='Normalize',
|
| 77 |
+
mean=[123.675, 116.28, 103.53],
|
| 78 |
+
std=[58.395, 57.12, 57.375],
|
| 79 |
+
to_rgb=True),
|
| 80 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 81 |
+
dict(type='Collect', keys=['img'])
|
| 82 |
+
])
|
| 83 |
+
]),
|
| 84 |
+
test=dict(
|
| 85 |
+
type='ConvidQuexDataset',
|
| 86 |
+
data_root='/root/data-fs/twh/dataset/convid_quex/seg_data',
|
| 87 |
+
img_dir='image/test',
|
| 88 |
+
ann_dir='anno/test',
|
| 89 |
+
pipeline=[
|
| 90 |
+
dict(type='LoadImageFromFile'),
|
| 91 |
+
dict(
|
| 92 |
+
type='MultiScaleFlipAug',
|
| 93 |
+
img_scale=(512, 512),
|
| 94 |
+
flip=False,
|
| 95 |
+
transforms=[
|
| 96 |
+
dict(type='Resize', keep_ratio=True),
|
| 97 |
+
dict(
|
| 98 |
+
type='Normalize',
|
| 99 |
+
mean=[123.675, 116.28, 103.53],
|
| 100 |
+
std=[58.395, 57.12, 57.375],
|
| 101 |
+
to_rgb=True),
|
| 102 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 103 |
+
dict(type='Collect', keys=['img'])
|
| 104 |
+
])
|
| 105 |
+
]))
|
| 106 |
+
log_config = dict(
|
| 107 |
+
interval=50,
|
| 108 |
+
hooks=[
|
| 109 |
+
dict(
|
| 110 |
+
type='MMSegWandbHook',
|
| 111 |
+
init_kwargs=dict(
|
| 112 |
+
project='ITAP_SEG',
|
| 113 |
+
name='upernet_internvit_adp_300e_convid_quex_re'),
|
| 114 |
+
interval=30,
|
| 115 |
+
num_eval_images=0)
|
| 116 |
+
])
|
| 117 |
+
dist_params = dict(backend='nccl')
|
| 118 |
+
log_level = 'INFO'
|
| 119 |
+
load_from = None
|
| 120 |
+
resume_from = None
|
| 121 |
+
workflow = [('train', 1)]
|
| 122 |
+
cudnn_benchmark = True
|
| 123 |
+
bs = 2
|
| 124 |
+
pretrained = 'pretrained/ViTP_ViT_L_300M_med.safetensors'
|
| 125 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
| 126 |
+
model = dict(
|
| 127 |
+
type='EncoderDecoder',
|
| 128 |
+
pretrained=None,
|
| 129 |
+
backbone=dict(
|
| 130 |
+
type='InternViTAdapter',
|
| 131 |
+
pretrain_size=448,
|
| 132 |
+
img_size=512,
|
| 133 |
+
patch_size=16,
|
| 134 |
+
embed_dim=1024,
|
| 135 |
+
depth=24,
|
| 136 |
+
num_heads=16,
|
| 137 |
+
mlp_ratio=4.0,
|
| 138 |
+
drop_path_rate=0.1,
|
| 139 |
+
init_values=0.1,
|
| 140 |
+
with_cp=True,
|
| 141 |
+
use_flash_attn=True,
|
| 142 |
+
qk_normalization=False,
|
| 143 |
+
layerscale_force_fp32=False,
|
| 144 |
+
with_fpn=False,
|
| 145 |
+
freeze_vit=False,
|
| 146 |
+
use_final_norm=True,
|
| 147 |
+
interaction_indexes=[[0, 7], [8, 11], [12, 15], [16, 23]],
|
| 148 |
+
cffn_ratio=0.25,
|
| 149 |
+
deform_ratio=0.25,
|
| 150 |
+
qkv_bias=True,
|
| 151 |
+
norm_type='layer_norm',
|
| 152 |
+
pretrained='pretrained/ViTP_ViT_L_300M_med.safetensors',
|
| 153 |
+
pretrained_type='full'),
|
| 154 |
+
decode_head=dict(
|
| 155 |
+
type='UPerHead',
|
| 156 |
+
in_index=[0, 1, 2, 3],
|
| 157 |
+
pool_scales=(1, 2, 3, 6),
|
| 158 |
+
dropout_ratio=0.1,
|
| 159 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 160 |
+
align_corners=False,
|
| 161 |
+
num_classes=2,
|
| 162 |
+
channels=1024,
|
| 163 |
+
in_channels=[1024, 1024, 1024, 1024],
|
| 164 |
+
loss_decode=dict(
|
| 165 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
| 166 |
+
auxiliary_head=dict(
|
| 167 |
+
type='FCNHead',
|
| 168 |
+
in_channels=1024,
|
| 169 |
+
in_index=2,
|
| 170 |
+
channels=1024,
|
| 171 |
+
num_convs=1,
|
| 172 |
+
concat_input=False,
|
| 173 |
+
dropout_ratio=0.1,
|
| 174 |
+
num_classes=2,
|
| 175 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 176 |
+
align_corners=False,
|
| 177 |
+
loss_decode=dict(
|
| 178 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.4)),
|
| 179 |
+
test_cfg=dict(mode='whole'))
|
| 180 |
+
optimizer = dict(
|
| 181 |
+
type='AdamW',
|
| 182 |
+
lr=5e-06,
|
| 183 |
+
betas=(0.9, 0.999),
|
| 184 |
+
weight_decay=0.05,
|
| 185 |
+
constructor='InternViTAdapterLayerDecayOptimizerConstructor',
|
| 186 |
+
paramwise_cfg=dict(num_layers=24, layer_decay_rate=0.9))
|
| 187 |
+
optimizer_config = dict()
|
| 188 |
+
lr_config = dict(
|
| 189 |
+
policy='CosineAnnealing',
|
| 190 |
+
warmup='linear',
|
| 191 |
+
warmup_iters=1500,
|
| 192 |
+
warmup_ratio=1e-06,
|
| 193 |
+
min_lr=0.0,
|
| 194 |
+
by_epoch=False)
|
| 195 |
+
runner = dict(type='EpochBasedRunner', max_epochs=300)
|
| 196 |
+
checkpoint_config = dict(interval=30, max_keep_ckpts=1)
|
| 197 |
+
evaluation = dict(interval=30, metric=['mIoU', 'mDice'], save_best='mDice')
|
| 198 |
+
fp16 = dict(loss_scale=dict(init_scale=512))
|
| 199 |
+
randomness = dict(seed=3407)
|
| 200 |
+
vis_backends = [
|
| 201 |
+
dict(type='LocalVisBackend'),
|
| 202 |
+
dict(type='TensorboardVisBackend'),
|
| 203 |
+
dict(type='WandbVisBackend')
|
| 204 |
+
]
|
| 205 |
+
work_dir = './work_dirs/vitp_convid_upernet'
|
| 206 |
+
gpu_ids = range(0, 8)
|
| 207 |
+
auto_resume = False
|
ckpts/vitp_dior_cascade_rcnn_7960/20250730_223238.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ckpts/vitp_dior_cascade_rcnn_7960/20250730_223238.log.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"mmdet_version": "2.25.16fc0c4e", "CLASSES": ["airplane", "airport", "baseballfield", "basketballcourt", "bridge", "chimney", "dam", "Expressway-Service-area", "Expressway-toll-station", "golffield", "groundtrackfield", "harbor", "overpass", "ship", "stadium", "storagetank", "tenniscourt", "trainstation", "vehicle", "windmill"], "env_info": "sys.platform: linux\nPython: 3.10.16 (main, Dec 11 2024, 16:24:50) [GCC 11.2.0]\nCUDA available: True\nGPU 0,1,2,3,4,5,6,7: NVIDIA GeForce RTX 3090\nCUDA_HOME: /mnt/petrelfs/share_data/liqingyun/cuda/cuda-12.4/\nGCC: gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0\nPyTorch: 1.12.0\nPyTorch compiling details: PyTorch built with:\n - GCC 9.3\n - C++ Version: 201402\n - Intel(R) oneAPI Math Kernel Library Version 2024.0-Product Build 20231011 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v2.6.0 (Git Hash 52b5f107dd9cf10910aaa19cb47f3abf9b349815)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 11.3\n - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_37,code=compute_37\n - CuDNN 8.3.2 (built against CUDA 11.5)\n - Magma 2.5.2\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.3, CUDNN_VERSION=8.3.2, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.12.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=OFF, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, \n\nTorchVision: 0.13.0\nOpenCV: 4.11.0\nMMCV: 1.6.1\nMMCV Compiler: GCC 9.3\nMMCV CUDA Compiler: 11.4\nMMRotate: 0.3.4+6fc0c4e", "config": "dataset_type = 'DIORDataset'\ndata_root = '/defaultShare/pubdata/remote_sensing/DIOR/'\nimg_norm_cfg = dict(\n mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\nimg_size = 800\nangle_version = 'le90'\ntrain_pipeline = [\n dict(type='LoadImageFromFile'),\n dict(type='LoadAnnotations', with_bbox=True),\n dict(type='Resize', img_scale=(800, 800), keep_ratio=False),\n dict(type='RandomFlip', flip_ratio=0.5),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size=(800, 800)),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])\n]\ntest_pipeline = [\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(800, 800),\n flip=False,\n transforms=[\n dict(type='Resize', keep_ratio=False),\n dict(type='RandomFlip'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size=(800, 800)),\n dict(type='ImageToTensor', keys=['img']),\n dict(type='Collect', keys=['img'])\n ])\n]\ndata = dict(\n samples_per_gpu=1,\n workers_per_gpu=4,\n train=dict(\n type='DIORDataset',\n ann_file=\n '/defaultShare/pubdata/remote_sensing/DIOR/Annotations/train_val.json',\n img_prefix=\n '/defaultShare/pubdata/remote_sensing/DIOR/JPEGImages/trainval/',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(type='LoadAnnotations', with_bbox=True),\n dict(type='Resize', img_scale=(800, 800), keep_ratio=False),\n dict(type='RandomFlip', flip_ratio=0.5),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size=(800, 800)),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])\n ]),\n val=dict(\n type='DIORDataset',\n ann_file=\n '/defaultShare/pubdata/remote_sensing/DIOR/Annotations/test.json',\n img_prefix='/defaultShare/pubdata/remote_sensing/DIOR/JPEGImages/test/',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(800, 800),\n flip=False,\n transforms=[\n dict(type='Resize', keep_ratio=False),\n dict(type='RandomFlip'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size=(800, 800)),\n dict(type='ImageToTensor', keys=['img']),\n dict(type='Collect', keys=['img'])\n ])\n ]),\n test=dict(\n type='DIORDataset',\n ann_file=\n '/defaultShare/pubdata/remote_sensing/DIOR/Annotations/test.json',\n img_prefix='/defaultShare/pubdata/remote_sensing/DIOR/JPEGImages/test/',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(800, 800),\n flip=False,\n transforms=[\n dict(type='Resize', keep_ratio=False),\n dict(type='RandomFlip'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size=(800, 800)),\n dict(type='ImageToTensor', keys=['img']),\n dict(type='Collect', keys=['img'])\n ])\n ]))\nevaluation = dict(interval=4, metric='mAP', classwise=True)\noptimizer = dict(\n type='AdamW',\n lr=2e-05,\n betas=(0.9, 0.999),\n weight_decay=0.05,\n constructor='InternViTAdapterLayerDecayOptimizerConstructor',\n paramwise_cfg=dict(num_layers=24, layer_decay_rate=0.9))\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\nlr_config = dict(\n policy='step',\n warmup='linear',\n warmup_iters=500,\n warmup_ratio=0.3333333333333333,\n step=[8, 11])\nrunner = dict(type='EpochBasedRunner', max_epochs=12)\ncheckpoint_config = dict(interval=1, max_keep_ckpts=1)\nlog_config = dict(interval=500, hooks=[dict(type='TextLoggerHook')])\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\nopencv_num_threads = 0\nmp_start_method = 'fork'\npretrained = 'pretrained/ft_full_1b_8ksteps_instruct_tuning_as_pretrain_TMAug75.safetensors'\ngpu_number = 8\nnorm_cfg = dict(type='LN', requires_grad=True)\nnum_classes = 20\nmodel = dict(\n type='CascadeRCNN',\n backbone=dict(\n type='InternViTAdapter',\n pretrain_size=448,\n img_size=800,\n patch_size=16,\n embed_dim=1024,\n depth=24,\n num_heads=16,\n mlp_ratio=4.0,\n drop_path_rate=0.1,\n init_values=0.1,\n with_cp=True,\n use_flash_attn=True,\n qk_normalization=False,\n layerscale_force_fp32=False,\n with_fpn=False,\n freeze_vit=False,\n use_final_norm=True,\n interaction_indexes=[[0, 7], [8, 11], [12, 15], [16, 23]],\n cffn_ratio=0.25,\n deform_ratio=0.25,\n qkv_bias=True,\n norm_type='layer_norm',\n pretrained=\n 'pretrained/ft_full_1b_8ksteps_instruct_tuning_as_pretrain_TMAug75.safetensors',\n pretrained_type='full',\n only_feat_out=True),\n neck=dict(\n type='SimpleFPN',\n in_channels=[1024, 1024, 1024, 1024],\n out_channels=256,\n norm_cfg=dict(type='LN', requires_grad=True),\n use_residual=False,\n num_outs=5),\n rpn_head=dict(\n type='RPNHead',\n in_channels=256,\n feat_channels=256,\n anchor_generator=dict(\n type='AnchorGenerator',\n scales=[8],\n ratios=[0.5, 1.0, 2.0],\n strides=[4, 8, 16, 32, 64]),\n bbox_coder=dict(\n type='DeltaXYWHBBoxCoder',\n target_means=[0.0, 0.0, 0.0, 0.0],\n target_stds=[1.0, 1.0, 1.0, 1.0]),\n loss_cls=dict(\n type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n loss_bbox=dict(\n type='SmoothL1Loss', beta=0.1111111111111111, loss_weight=1.0)),\n roi_head=dict(\n type='CascadeRoIHead',\n num_stages=3,\n stage_loss_weights=[1, 0.5, 0.25],\n bbox_roi_extractor=dict(\n type='SingleRoIExtractor',\n roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),\n out_channels=256,\n featmap_strides=[4, 8, 16, 32]),\n bbox_head=[\n dict(\n type='Shared2FCBBoxHead',\n in_channels=256,\n fc_out_channels=1024,\n roi_feat_size=7,\n num_classes=20,\n bbox_coder=dict(\n type='DeltaXYWHBBoxCoder',\n target_means=[0.0, 0.0, 0.0, 0.0],\n target_stds=[0.1, 0.1, 0.2, 0.2]),\n reg_class_agnostic=True,\n loss_cls=dict(\n type='CrossEntropyLoss',\n use_sigmoid=False,\n loss_weight=1.0),\n loss_bbox=dict(type='SmoothL1Loss', beta=1.0,\n loss_weight=1.0)),\n dict(\n type='Shared2FCBBoxHead',\n in_channels=256,\n fc_out_channels=1024,\n roi_feat_size=7,\n num_classes=20,\n bbox_coder=dict(\n type='DeltaXYWHBBoxCoder',\n target_means=[0.0, 0.0, 0.0, 0.0],\n target_stds=[0.05, 0.05, 0.1, 0.1]),\n reg_class_agnostic=True,\n loss_cls=dict(\n type='CrossEntropyLoss',\n use_sigmoid=False,\n loss_weight=1.0),\n loss_bbox=dict(type='SmoothL1Loss', beta=1.0,\n loss_weight=1.0)),\n dict(\n type='Shared2FCBBoxHead',\n in_channels=256,\n fc_out_channels=1024,\n roi_feat_size=7,\n num_classes=20,\n bbox_coder=dict(\n type='DeltaXYWHBBoxCoder',\n target_means=[0.0, 0.0, 0.0, 0.0],\n target_stds=[0.033, 0.033, 0.067, 0.067]),\n reg_class_agnostic=True,\n loss_cls=dict(\n type='CrossEntropyLoss',\n use_sigmoid=False,\n loss_weight=1.0),\n loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))\n ]),\n train_cfg=dict(\n rpn=dict(\n assigner=dict(\n type='MaxIoUAssigner',\n pos_iou_thr=0.7,\n neg_iou_thr=0.3,\n min_pos_iou=0.3,\n match_low_quality=True,\n ignore_iof_thr=-1),\n sampler=dict(\n type='RandomSampler',\n num=256,\n pos_fraction=0.5,\n neg_pos_ub=-1,\n add_gt_as_proposals=False),\n allowed_border=0,\n pos_weight=-1,\n debug=False),\n rpn_proposal=dict(\n nms_pre=2000,\n max_per_img=2000,\n nms=dict(type='nms', iou_threshold=0.7),\n min_bbox_size=0),\n rcnn=[\n dict(\n assigner=dict(\n type='MaxIoUAssigner',\n pos_iou_thr=0.5,\n neg_iou_thr=0.5,\n min_pos_iou=0.5,\n match_low_quality=False,\n ignore_iof_thr=-1),\n sampler=dict(\n type='RandomSampler',\n num=512,\n pos_fraction=0.25,\n neg_pos_ub=-1,\n add_gt_as_proposals=True),\n pos_weight=-1,\n debug=False),\n dict(\n assigner=dict(\n type='MaxIoUAssigner',\n pos_iou_thr=0.6,\n neg_iou_thr=0.6,\n min_pos_iou=0.6,\n match_low_quality=False,\n ignore_iof_thr=-1),\n sampler=dict(\n type='RandomSampler',\n num=512,\n pos_fraction=0.25,\n neg_pos_ub=-1,\n add_gt_as_proposals=True),\n pos_weight=-1,\n debug=False),\n dict(\n assigner=dict(\n type='MaxIoUAssigner',\n pos_iou_thr=0.7,\n neg_iou_thr=0.7,\n min_pos_iou=0.7,\n match_low_quality=False,\n ignore_iof_thr=-1),\n sampler=dict(\n type='RandomSampler',\n num=512,\n pos_fraction=0.25,\n neg_pos_ub=-1,\n add_gt_as_proposals=True),\n pos_weight=-1,\n debug=False)\n ]),\n test_cfg=dict(\n rpn=dict(\n nms_pre=1000,\n max_per_img=1000,\n nms=dict(type='nms', iou_threshold=0.7),\n min_bbox_size=0),\n rcnn=dict(\n score_thr=0.05,\n nms=dict(type='nms', iou_threshold=0.5),\n max_per_img=100)))\nfp16 = dict(loss_scale=dict(init_scale=512))\nwork_dir = './work_dirs/dior_inst_tun_TMAug75_8k'\nauto_resume = False\ngpu_ids = range(0, 8)\ndevice = 'cuda'\n", "seed": 0, "exp_name": "dior_inst_tun_TMAug75_8k.py", "fp16": {"loss_scaler": {"scale": 2048.0, "growth_factor": 2.0, "backoff_factor": 0.5, "growth_interval": 2000, "_growth_tracker": 1864}}, "epoch": 4, "iter": 5864, "mmcv_version": "1.6.1", "time": "Wed Jul 30 18:25:23 2025", "hook_msgs": {"last_ckpt": "/nfs/liyuxuan/zhangyicheng/mmrotate/work_dirs/dior_inst_tun_TMAug75_8k/epoch_3.pth"}}
|
| 2 |
+
{"mode": "train", "epoch": 5, "iter": 500, "lr": 0.0, "memory": 12677, "data_time": 0.01045, "loss_rpn_cls": 0.00582, "loss_rpn_bbox": 0.00764, "s0.loss_cls": 0.05271, "s0.acc": 97.94438, "s0.loss_bbox": 0.03948, "s1.loss_cls": 0.02568, "s1.acc": 98.02153, "s1.loss_bbox": 0.0621, "s2.loss_cls": 0.01527, "s2.acc": 97.57696, "s2.loss_bbox": 0.05219, "loss": 0.26089, "grad_norm": Infinity, "time": 0.77073}
|
| 3 |
+
{"mode": "train", "epoch": 5, "iter": 1000, "lr": 0.0, "memory": 12677, "data_time": 0.00323, "loss_rpn_cls": 0.0059, "loss_rpn_bbox": 0.00788, "s0.loss_cls": 0.05248, "s0.acc": 97.94033, "s0.loss_bbox": 0.03961, "s1.loss_cls": 0.02571, "s1.acc": 98.00724, "s1.loss_bbox": 0.06156, "s2.loss_cls": 0.01496, "s2.acc": 97.60852, "s2.loss_bbox": 0.05159, "loss": 0.25968, "grad_norm": 9.70959, "time": 0.75422}
|
| 4 |
+
{"mode": "train", "epoch": 6, "iter": 500, "lr": 0.0, "memory": 12680, "data_time": 0.00791, "loss_rpn_cls": 0.00518, "loss_rpn_bbox": 0.00718, "s0.loss_cls": 0.04721, "s0.acc": 98.17241, "s0.loss_bbox": 0.03518, "s1.loss_cls": 0.02243, "s1.acc": 98.26773, "s1.loss_bbox": 0.05572, "s2.loss_cls": 0.01297, "s2.acc": 97.95815, "s2.loss_bbox": 0.04768, "loss": 0.23354, "grad_norm": 9.22684, "time": 0.75987}
|
| 5 |
+
{"mode": "train", "epoch": 6, "iter": 1000, "lr": 0.0, "memory": 12680, "data_time": 0.00303, "loss_rpn_cls": 0.00488, "loss_rpn_bbox": 0.00711, "s0.loss_cls": 0.0468, "s0.acc": 98.16084, "s0.loss_bbox": 0.03546, "s1.loss_cls": 0.02189, "s1.acc": 98.29857, "s1.loss_bbox": 0.05547, "s2.loss_cls": 0.01293, "s2.acc": 97.95175, "s2.loss_bbox": 0.04742, "loss": 0.23196, "grad_norm": Infinity, "time": 0.75293}
|
| 6 |
+
{"mode": "train", "epoch": 7, "iter": 500, "lr": 0.0, "memory": 12680, "data_time": 0.00782, "loss_rpn_cls": 0.00395, "loss_rpn_bbox": 0.00645, "s0.loss_cls": 0.04222, "s0.acc": 98.33569, "s0.loss_bbox": 0.03149, "s1.loss_cls": 0.01936, "s1.acc": 98.49516, "s1.loss_bbox": 0.0514, "s2.loss_cls": 0.01139, "s2.acc": 98.21969, "s2.loss_bbox": 0.04539, "loss": 0.21164, "grad_norm": 9.5548, "time": 0.75796}
|
| 7 |
+
{"mode": "train", "epoch": 7, "iter": 1000, "lr": 0.0, "memory": 12680, "data_time": 0.00291, "loss_rpn_cls": 0.00449, "loss_rpn_bbox": 0.00649, "s0.loss_cls": 0.04345, "s0.acc": 98.29019, "s0.loss_bbox": 0.03286, "s1.loss_cls": 0.02, "s1.acc": 98.45613, "s1.loss_bbox": 0.0523, "s2.loss_cls": 0.01178, "s2.acc": 98.15007, "s2.loss_bbox": 0.04496, "loss": 0.21633, "grad_norm": 9.5179, "time": 0.75234}
|
| 8 |
+
{"mode": "train", "epoch": 8, "iter": 500, "lr": 0.0, "memory": 12680, "data_time": 0.00785, "loss_rpn_cls": 0.00372, "loss_rpn_bbox": 0.00585, "s0.loss_cls": 0.03913, "s0.acc": 98.46299, "s0.loss_bbox": 0.02886, "s1.loss_cls": 0.01705, "s1.acc": 98.68055, "s1.loss_bbox": 0.04656, "s2.loss_cls": 0.00996, "s2.acc": 98.43402, "s2.loss_bbox": 0.04085, "loss": 0.19199, "grad_norm": 8.43056, "time": 0.75836}
|
| 9 |
+
{"mode": "train", "epoch": 8, "iter": 1000, "lr": 0.0, "memory": 12680, "data_time": 0.00285, "loss_rpn_cls": 0.00367, "loss_rpn_bbox": 0.0061, "s0.loss_cls": 0.0383, "s0.acc": 98.48643, "s0.loss_bbox": 0.02924, "s1.loss_cls": 0.01657, "s1.acc": 98.71499, "s1.loss_bbox": 0.04741, "s2.loss_cls": 0.00973, "s2.acc": 98.48204, "s2.loss_bbox": 0.04206, "loss": 0.19309, "grad_norm": 8.75413, "time": 0.75288}
|
| 10 |
+
{"mode": "val", "epoch": 8, "iter": 1468, "lr": 0.0, "bbox_mAP": 0.552, "bbox_mAP_50": 0.797, "bbox_mAP_75": 0.601, "bbox_mAP_s": 0.171, "bbox_mAP_m": 0.464, "bbox_mAP_l": 0.738, "bbox_mAP_copypaste": "0.552 0.797 0.601 0.171 0.464 0.738"}
|
| 11 |
+
{"mode": "train", "epoch": 9, "iter": 500, "lr": 0.0, "memory": 12680, "data_time": 0.00814, "loss_rpn_cls": 0.00287, "loss_rpn_bbox": 0.00487, "s0.loss_cls": 0.03275, "s0.acc": 98.70068, "s0.loss_bbox": 0.02321, "s1.loss_cls": 0.01318, "s1.acc": 98.98104, "s1.loss_bbox": 0.03895, "s2.loss_cls": 0.00756, "s2.acc": 98.83655, "s2.loss_bbox": 0.03562, "loss": 0.159, "grad_norm": 7.53158, "time": 0.76159}
|
| 12 |
+
{"mode": "train", "epoch": 9, "iter": 1000, "lr": 0.0, "memory": 12680, "data_time": 0.003, "loss_rpn_cls": 0.00298, "loss_rpn_bbox": 0.00475, "s0.loss_cls": 0.03326, "s0.acc": 98.70029, "s0.loss_bbox": 0.02426, "s1.loss_cls": 0.01377, "s1.acc": 98.94669, "s1.loss_bbox": 0.04039, "s2.loss_cls": 0.00799, "s2.acc": 98.7764, "s2.loss_bbox": 0.0367, "loss": 0.16409, "grad_norm": 7.60728, "time": 0.75893}
|
| 13 |
+
{"mode": "train", "epoch": 10, "iter": 500, "lr": 0.0, "memory": 12680, "data_time": 0.0081, "loss_rpn_cls": 0.00275, "loss_rpn_bbox": 0.00454, "s0.loss_cls": 0.03061, "s0.acc": 98.79683, "s0.loss_bbox": 0.02215, "s1.loss_cls": 0.01231, "s1.acc": 99.03177, "s1.loss_bbox": 0.03735, "s2.loss_cls": 0.00687, "s2.acc": 98.9413, "s2.loss_bbox": 0.03433, "loss": 0.15092, "grad_norm": 7.38586, "time": 0.76107}
|
| 14 |
+
{"mode": "train", "epoch": 10, "iter": 1000, "lr": 0.0, "memory": 12680, "data_time": 0.00312, "loss_rpn_cls": 0.00261, "loss_rpn_bbox": 0.00469, "s0.loss_cls": 0.03044, "s0.acc": 98.81895, "s0.loss_bbox": 0.02166, "s1.loss_cls": 0.01218, "s1.acc": 99.06392, "s1.loss_bbox": 0.03619, "s2.loss_cls": 0.00683, "s2.acc": 98.95856, "s2.loss_bbox": 0.03315, "loss": 0.14775, "grad_norm": 6.83723, "time": 0.75739}
|
| 15 |
+
{"mode": "train", "epoch": 11, "iter": 500, "lr": 0.0, "memory": 12680, "data_time": 0.00774, "loss_rpn_cls": 0.00255, "loss_rpn_bbox": 0.00446, "s0.loss_cls": 0.02944, "s0.acc": 98.8356, "s0.loss_bbox": 0.02092, "s1.loss_cls": 0.01161, "s1.acc": 99.09921, "s1.loss_bbox": 0.03464, "s2.loss_cls": 0.00648, "s2.acc": 98.98859, "s2.loss_bbox": 0.03173, "loss": 0.14183, "grad_norm": 6.81652, "time": 0.7619}
|
| 16 |
+
{"mode": "train", "epoch": 11, "iter": 1000, "lr": 0.0, "memory": 12683, "data_time": 0.00296, "loss_rpn_cls": 0.00247, "loss_rpn_bbox": 0.00473, "s0.loss_cls": 0.02998, "s0.acc": 98.82202, "s0.loss_bbox": 0.02142, "s1.loss_cls": 0.01177, "s1.acc": 99.08769, "s1.loss_bbox": 0.03574, "s2.loss_cls": 0.00655, "s2.acc": 98.99902, "s2.loss_bbox": 0.03296, "loss": 0.14561, "grad_norm": 7.11457, "time": 0.7583}
|
| 17 |
+
{"mode": "train", "epoch": 12, "iter": 500, "lr": 0.0, "memory": 12683, "data_time": 0.00791, "loss_rpn_cls": 0.00221, "loss_rpn_bbox": 0.00392, "s0.loss_cls": 0.02771, "s0.acc": 98.91255, "s0.loss_bbox": 0.01946, "s1.loss_cls": 0.01073, "s1.acc": 99.16979, "s1.loss_bbox": 0.0328, "s2.loss_cls": 0.00588, "s2.acc": 99.10159, "s2.loss_bbox": 0.03082, "loss": 0.13353, "grad_norm": 6.75006, "time": 0.76188}
|
| 18 |
+
{"mode": "train", "epoch": 12, "iter": 1000, "lr": 0.0, "memory": 12683, "data_time": 0.00304, "loss_rpn_cls": 0.00255, "loss_rpn_bbox": 0.00466, "s0.loss_cls": 0.02867, "s0.acc": 98.86948, "s0.loss_bbox": 0.02018, "s1.loss_cls": 0.01103, "s1.acc": 99.1422, "s1.loss_bbox": 0.0331, "s2.loss_cls": 0.00617, "s2.acc": 99.05892, "s2.loss_bbox": 0.03062, "loss": 0.13698, "grad_norm": Infinity, "time": 0.7584}
|
| 19 |
+
{"mode": "val", "epoch": 12, "iter": 1468, "lr": 0.0, "bbox_mAP": 0.557, "bbox_mAP_50": 0.796, "bbox_mAP_75": 0.61, "bbox_mAP_s": 0.175, "bbox_mAP_m": 0.474, "bbox_mAP_l": 0.747, "bbox_mAP_copypaste": "0.557 0.796 0.610 0.175 0.474 0.747"}
|
ckpts/vitp_dior_cascade_rcnn_7960/epoch_12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cb6fd409f17f1ac0c98c7ee418c5c126c482c8753d4db467cfacc65e5fddfd61
|
| 3 |
+
size 1478110605
|
ckpts/vitp_dior_cascade_rcnn_7960/vitp_dior_cascade_rcnn.py
ADDED
|
@@ -0,0 +1,308 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_type = 'DIORDataset'
|
| 2 |
+
data_root = '/defaultShare/pubdata/remote_sensing/DIOR/'
|
| 3 |
+
img_norm_cfg = dict(
|
| 4 |
+
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
| 5 |
+
img_size = 800
|
| 6 |
+
angle_version = 'le90'
|
| 7 |
+
|
| 8 |
+
data = dict(
|
| 9 |
+
samples_per_gpu=1,
|
| 10 |
+
workers_per_gpu=4,
|
| 11 |
+
train=dict(
|
| 12 |
+
type='DIORDataset',
|
| 13 |
+
ann_file=
|
| 14 |
+
'/defaultShare/pubdata/remote_sensing/DIOR/Annotations/train_val.json',
|
| 15 |
+
img_prefix=
|
| 16 |
+
'/defaultShare/pubdata/remote_sensing/DIOR/JPEGImages/trainval/',
|
| 17 |
+
pipeline=[
|
| 18 |
+
dict(type='LoadImageFromFile'),
|
| 19 |
+
dict(type='LoadAnnotations', with_bbox=True),
|
| 20 |
+
dict(type='Resize', img_scale=(800, 800), keep_ratio=False),
|
| 21 |
+
dict(type='RandomFlip', flip_ratio=0.5),
|
| 22 |
+
dict(
|
| 23 |
+
type='Normalize',
|
| 24 |
+
mean=[123.675, 116.28, 103.53],
|
| 25 |
+
std=[58.395, 57.12, 57.375],
|
| 26 |
+
to_rgb=True),
|
| 27 |
+
dict(type='Pad', size=(800, 800)),
|
| 28 |
+
dict(type='DefaultFormatBundle'),
|
| 29 |
+
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
|
| 30 |
+
]),
|
| 31 |
+
val=dict(
|
| 32 |
+
type='DIORDataset',
|
| 33 |
+
ann_file=
|
| 34 |
+
'/defaultShare/pubdata/remote_sensing/DIOR/Annotations/test.json',
|
| 35 |
+
img_prefix='/defaultShare/pubdata/remote_sensing/DIOR/JPEGImages/test/',
|
| 36 |
+
pipeline=[
|
| 37 |
+
dict(type='LoadImageFromFile'),
|
| 38 |
+
dict(
|
| 39 |
+
type='MultiScaleFlipAug',
|
| 40 |
+
img_scale=(800, 800),
|
| 41 |
+
flip=False,
|
| 42 |
+
transforms=[
|
| 43 |
+
dict(type='Resize', keep_ratio=False),
|
| 44 |
+
dict(type='RandomFlip'),
|
| 45 |
+
dict(
|
| 46 |
+
type='Normalize',
|
| 47 |
+
mean=[123.675, 116.28, 103.53],
|
| 48 |
+
std=[58.395, 57.12, 57.375],
|
| 49 |
+
to_rgb=True),
|
| 50 |
+
dict(type='Pad', size=(800, 800)),
|
| 51 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 52 |
+
dict(type='Collect', keys=['img'])
|
| 53 |
+
])
|
| 54 |
+
]),
|
| 55 |
+
test=dict(
|
| 56 |
+
type='DIORDataset',
|
| 57 |
+
ann_file=
|
| 58 |
+
'/defaultShare/pubdata/remote_sensing/DIOR/Annotations/test.json',
|
| 59 |
+
img_prefix='/defaultShare/pubdata/remote_sensing/DIOR/JPEGImages/test/',
|
| 60 |
+
pipeline=[
|
| 61 |
+
dict(type='LoadImageFromFile'),
|
| 62 |
+
dict(
|
| 63 |
+
type='MultiScaleFlipAug',
|
| 64 |
+
img_scale=(800, 800),
|
| 65 |
+
flip=False,
|
| 66 |
+
transforms=[
|
| 67 |
+
dict(type='Resize', keep_ratio=False),
|
| 68 |
+
dict(type='RandomFlip'),
|
| 69 |
+
dict(
|
| 70 |
+
type='Normalize',
|
| 71 |
+
mean=[123.675, 116.28, 103.53],
|
| 72 |
+
std=[58.395, 57.12, 57.375],
|
| 73 |
+
to_rgb=True),
|
| 74 |
+
dict(type='Pad', size=(800, 800)),
|
| 75 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 76 |
+
dict(type='Collect', keys=['img'])
|
| 77 |
+
])
|
| 78 |
+
]))
|
| 79 |
+
evaluation = dict(interval=4, metric='bbox', classwise=True)
|
| 80 |
+
optimizer = dict(
|
| 81 |
+
type='AdamW',
|
| 82 |
+
lr=2e-05,
|
| 83 |
+
betas=(0.9, 0.999),
|
| 84 |
+
weight_decay=0.05,
|
| 85 |
+
constructor='InternViTAdapterLayerDecayOptimizerConstructor',
|
| 86 |
+
paramwise_cfg=dict(num_layers=24, layer_decay_rate=0.9))
|
| 87 |
+
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
|
| 88 |
+
lr_config = dict(
|
| 89 |
+
policy='step',
|
| 90 |
+
warmup='linear',
|
| 91 |
+
warmup_iters=500,
|
| 92 |
+
warmup_ratio=0.3333333333333333,
|
| 93 |
+
step=[8, 11])
|
| 94 |
+
runner = dict(type='EpochBasedRunner', max_epochs=12)
|
| 95 |
+
checkpoint_config = dict(interval=1, max_keep_ckpts=1)
|
| 96 |
+
log_config = dict(interval=500, hooks=[dict(type='TextLoggerHook')])
|
| 97 |
+
dist_params = dict(backend='nccl')
|
| 98 |
+
log_level = 'INFO'
|
| 99 |
+
load_from = None
|
| 100 |
+
resume_from = None
|
| 101 |
+
workflow = [('train', 1)]
|
| 102 |
+
opencv_num_threads = 0
|
| 103 |
+
mp_start_method = 'fork'
|
| 104 |
+
pretrained = 'pretrained/ViTP_ViT_L_300M_rs.safetensors'
|
| 105 |
+
gpu_number = 8
|
| 106 |
+
norm_cfg = dict(type='LN', requires_grad=True)
|
| 107 |
+
num_classes = 20
|
| 108 |
+
model = dict(
|
| 109 |
+
type='CascadeRCNN',
|
| 110 |
+
backbone=dict(
|
| 111 |
+
type='InternViTAdapter',
|
| 112 |
+
pretrain_size=448,
|
| 113 |
+
img_size=800,
|
| 114 |
+
patch_size=16,
|
| 115 |
+
embed_dim=1024,
|
| 116 |
+
depth=24,
|
| 117 |
+
num_heads=16,
|
| 118 |
+
mlp_ratio=4.0,
|
| 119 |
+
drop_path_rate=0.1,
|
| 120 |
+
init_values=0.1,
|
| 121 |
+
with_cp=True,
|
| 122 |
+
use_flash_attn=True,
|
| 123 |
+
qk_normalization=False,
|
| 124 |
+
layerscale_force_fp32=False,
|
| 125 |
+
with_fpn=False,
|
| 126 |
+
freeze_vit=False,
|
| 127 |
+
use_final_norm=True,
|
| 128 |
+
interaction_indexes=[[0, 7], [8, 11], [12, 15], [16, 23]],
|
| 129 |
+
cffn_ratio=0.25,
|
| 130 |
+
deform_ratio=0.25,
|
| 131 |
+
qkv_bias=True,
|
| 132 |
+
norm_type='layer_norm',
|
| 133 |
+
pretrained='pretrained/ViTP_ViT_L_300M_rs.safetensors',
|
| 134 |
+
pretrained_type='full',
|
| 135 |
+
only_feat_out=True),
|
| 136 |
+
neck=dict(
|
| 137 |
+
type='SimpleFPN',
|
| 138 |
+
in_channels=[1024, 1024, 1024, 1024],
|
| 139 |
+
out_channels=256,
|
| 140 |
+
norm_cfg=dict(type='LN', requires_grad=True),
|
| 141 |
+
use_residual=False,
|
| 142 |
+
num_outs=5),
|
| 143 |
+
rpn_head=dict(
|
| 144 |
+
type='RPNHead',
|
| 145 |
+
in_channels=256,
|
| 146 |
+
feat_channels=256,
|
| 147 |
+
anchor_generator=dict(
|
| 148 |
+
type='AnchorGenerator',
|
| 149 |
+
scales=[8],
|
| 150 |
+
ratios=[0.5, 1.0, 2.0],
|
| 151 |
+
strides=[4, 8, 16, 32, 64]),
|
| 152 |
+
bbox_coder=dict(
|
| 153 |
+
type='DeltaXYWHBBoxCoder',
|
| 154 |
+
target_means=[0.0, 0.0, 0.0, 0.0],
|
| 155 |
+
target_stds=[1.0, 1.0, 1.0, 1.0]),
|
| 156 |
+
loss_cls=dict(
|
| 157 |
+
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
|
| 158 |
+
loss_bbox=dict(
|
| 159 |
+
type='SmoothL1Loss', beta=0.1111111111111111, loss_weight=1.0)),
|
| 160 |
+
roi_head=dict(
|
| 161 |
+
type='CascadeRoIHead',
|
| 162 |
+
num_stages=3,
|
| 163 |
+
stage_loss_weights=[1, 0.5, 0.25],
|
| 164 |
+
bbox_roi_extractor=dict(
|
| 165 |
+
type='SingleRoIExtractor',
|
| 166 |
+
roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
|
| 167 |
+
out_channels=256,
|
| 168 |
+
featmap_strides=[4, 8, 16, 32]),
|
| 169 |
+
bbox_head=[
|
| 170 |
+
dict(
|
| 171 |
+
type='Shared2FCBBoxHead',
|
| 172 |
+
in_channels=256,
|
| 173 |
+
fc_out_channels=1024,
|
| 174 |
+
roi_feat_size=7,
|
| 175 |
+
num_classes=20,
|
| 176 |
+
bbox_coder=dict(
|
| 177 |
+
type='DeltaXYWHBBoxCoder',
|
| 178 |
+
target_means=[0.0, 0.0, 0.0, 0.0],
|
| 179 |
+
target_stds=[0.1, 0.1, 0.2, 0.2]),
|
| 180 |
+
reg_class_agnostic=True,
|
| 181 |
+
loss_cls=dict(
|
| 182 |
+
type='CrossEntropyLoss',
|
| 183 |
+
use_sigmoid=False,
|
| 184 |
+
loss_weight=1.0),
|
| 185 |
+
loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
|
| 186 |
+
loss_weight=1.0)),
|
| 187 |
+
dict(
|
| 188 |
+
type='Shared2FCBBoxHead',
|
| 189 |
+
in_channels=256,
|
| 190 |
+
fc_out_channels=1024,
|
| 191 |
+
roi_feat_size=7,
|
| 192 |
+
num_classes=20,
|
| 193 |
+
bbox_coder=dict(
|
| 194 |
+
type='DeltaXYWHBBoxCoder',
|
| 195 |
+
target_means=[0.0, 0.0, 0.0, 0.0],
|
| 196 |
+
target_stds=[0.05, 0.05, 0.1, 0.1]),
|
| 197 |
+
reg_class_agnostic=True,
|
| 198 |
+
loss_cls=dict(
|
| 199 |
+
type='CrossEntropyLoss',
|
| 200 |
+
use_sigmoid=False,
|
| 201 |
+
loss_weight=1.0),
|
| 202 |
+
loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
|
| 203 |
+
loss_weight=1.0)),
|
| 204 |
+
dict(
|
| 205 |
+
type='Shared2FCBBoxHead',
|
| 206 |
+
in_channels=256,
|
| 207 |
+
fc_out_channels=1024,
|
| 208 |
+
roi_feat_size=7,
|
| 209 |
+
num_classes=20,
|
| 210 |
+
bbox_coder=dict(
|
| 211 |
+
type='DeltaXYWHBBoxCoder',
|
| 212 |
+
target_means=[0.0, 0.0, 0.0, 0.0],
|
| 213 |
+
target_stds=[0.033, 0.033, 0.067, 0.067]),
|
| 214 |
+
reg_class_agnostic=True,
|
| 215 |
+
loss_cls=dict(
|
| 216 |
+
type='CrossEntropyLoss',
|
| 217 |
+
use_sigmoid=False,
|
| 218 |
+
loss_weight=1.0),
|
| 219 |
+
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))
|
| 220 |
+
]),
|
| 221 |
+
train_cfg=dict(
|
| 222 |
+
rpn=dict(
|
| 223 |
+
assigner=dict(
|
| 224 |
+
type='MaxIoUAssigner',
|
| 225 |
+
pos_iou_thr=0.7,
|
| 226 |
+
neg_iou_thr=0.3,
|
| 227 |
+
min_pos_iou=0.3,
|
| 228 |
+
match_low_quality=True,
|
| 229 |
+
ignore_iof_thr=-1),
|
| 230 |
+
sampler=dict(
|
| 231 |
+
type='RandomSampler',
|
| 232 |
+
num=256,
|
| 233 |
+
pos_fraction=0.5,
|
| 234 |
+
neg_pos_ub=-1,
|
| 235 |
+
add_gt_as_proposals=False),
|
| 236 |
+
allowed_border=0,
|
| 237 |
+
pos_weight=-1,
|
| 238 |
+
debug=False),
|
| 239 |
+
rpn_proposal=dict(
|
| 240 |
+
nms_pre=2000,
|
| 241 |
+
max_per_img=2000,
|
| 242 |
+
nms=dict(type='nms', iou_threshold=0.7),
|
| 243 |
+
min_bbox_size=0),
|
| 244 |
+
rcnn=[
|
| 245 |
+
dict(
|
| 246 |
+
assigner=dict(
|
| 247 |
+
type='MaxIoUAssigner',
|
| 248 |
+
pos_iou_thr=0.5,
|
| 249 |
+
neg_iou_thr=0.5,
|
| 250 |
+
min_pos_iou=0.5,
|
| 251 |
+
match_low_quality=False,
|
| 252 |
+
ignore_iof_thr=-1),
|
| 253 |
+
sampler=dict(
|
| 254 |
+
type='RandomSampler',
|
| 255 |
+
num=512,
|
| 256 |
+
pos_fraction=0.25,
|
| 257 |
+
neg_pos_ub=-1,
|
| 258 |
+
add_gt_as_proposals=True),
|
| 259 |
+
pos_weight=-1,
|
| 260 |
+
debug=False),
|
| 261 |
+
dict(
|
| 262 |
+
assigner=dict(
|
| 263 |
+
type='MaxIoUAssigner',
|
| 264 |
+
pos_iou_thr=0.6,
|
| 265 |
+
neg_iou_thr=0.6,
|
| 266 |
+
min_pos_iou=0.6,
|
| 267 |
+
match_low_quality=False,
|
| 268 |
+
ignore_iof_thr=-1),
|
| 269 |
+
sampler=dict(
|
| 270 |
+
type='RandomSampler',
|
| 271 |
+
num=512,
|
| 272 |
+
pos_fraction=0.25,
|
| 273 |
+
neg_pos_ub=-1,
|
| 274 |
+
add_gt_as_proposals=True),
|
| 275 |
+
pos_weight=-1,
|
| 276 |
+
debug=False),
|
| 277 |
+
dict(
|
| 278 |
+
assigner=dict(
|
| 279 |
+
type='MaxIoUAssigner',
|
| 280 |
+
pos_iou_thr=0.7,
|
| 281 |
+
neg_iou_thr=0.7,
|
| 282 |
+
min_pos_iou=0.7,
|
| 283 |
+
match_low_quality=False,
|
| 284 |
+
ignore_iof_thr=-1),
|
| 285 |
+
sampler=dict(
|
| 286 |
+
type='RandomSampler',
|
| 287 |
+
num=512,
|
| 288 |
+
pos_fraction=0.25,
|
| 289 |
+
neg_pos_ub=-1,
|
| 290 |
+
add_gt_as_proposals=True),
|
| 291 |
+
pos_weight=-1,
|
| 292 |
+
debug=False)
|
| 293 |
+
]),
|
| 294 |
+
test_cfg=dict(
|
| 295 |
+
rpn=dict(
|
| 296 |
+
nms_pre=1000,
|
| 297 |
+
max_per_img=1000,
|
| 298 |
+
nms=dict(type='nms', iou_threshold=0.7),
|
| 299 |
+
min_bbox_size=0),
|
| 300 |
+
rcnn=dict(
|
| 301 |
+
score_thr=0.05,
|
| 302 |
+
nms=dict(type='nms', iou_threshold=0.5),
|
| 303 |
+
max_per_img=100)))
|
| 304 |
+
fp16 = dict(loss_scale=dict(init_scale=512))
|
| 305 |
+
work_dir = './work_dirs/dior_inst_tun_TMAug75_8k'
|
| 306 |
+
auto_resume = True
|
| 307 |
+
gpu_ids = range(0, 8)
|
| 308 |
+
device = 'cuda'
|
ckpts/vitp_diorr_orcnn_7508/20250918_082138.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ckpts/vitp_diorr_orcnn_7508/epoch_12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:efb778e73bb38524df174933dcb8ec40e778ed101f571810556942946b917148
|
| 3 |
+
size 1373279149
|
ckpts/vitp_diorr_orcnn_7508/vitp_diorr_orcnn.py
ADDED
|
@@ -0,0 +1,311 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_type = 'DIORRDataset'
|
| 2 |
+
data_root = '/defaultShare/pubdata/remote_sensing/DIOR/'
|
| 3 |
+
angle_version = 'le90'
|
| 4 |
+
img_norm_cfg = dict(
|
| 5 |
+
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
| 6 |
+
train_pipeline = [
|
| 7 |
+
dict(type='LoadImageFromFile'),
|
| 8 |
+
dict(type='LoadAnnotations', with_bbox=True),
|
| 9 |
+
dict(type='RResize', img_scale=(1024, 1024)),
|
| 10 |
+
dict(
|
| 11 |
+
type='RRandomFlip',
|
| 12 |
+
flip_ratio=[0.25, 0.25, 0.25],
|
| 13 |
+
direction=['horizontal', 'vertical', 'diagonal'],
|
| 14 |
+
version='le90'),
|
| 15 |
+
dict(
|
| 16 |
+
type='PolyRandomRotate',
|
| 17 |
+
rotate_ratio=0.5,
|
| 18 |
+
angles_range=180,
|
| 19 |
+
auto_bound=False,
|
| 20 |
+
rect_classes=[5, 15, 19],
|
| 21 |
+
version='le90'),
|
| 22 |
+
dict(
|
| 23 |
+
type='Normalize',
|
| 24 |
+
mean=[123.675, 116.28, 103.53],
|
| 25 |
+
std=[58.395, 57.12, 57.375],
|
| 26 |
+
to_rgb=True),
|
| 27 |
+
dict(type='Pad', size_divisor=32),
|
| 28 |
+
dict(type='DefaultFormatBundle'),
|
| 29 |
+
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
|
| 30 |
+
]
|
| 31 |
+
test_pipeline = [
|
| 32 |
+
dict(type='LoadImageFromFile'),
|
| 33 |
+
dict(
|
| 34 |
+
type='MultiScaleFlipAug',
|
| 35 |
+
img_scale=(1024, 1024),
|
| 36 |
+
flip=False,
|
| 37 |
+
transforms=[
|
| 38 |
+
dict(type='RResize'),
|
| 39 |
+
dict(
|
| 40 |
+
type='Normalize',
|
| 41 |
+
mean=[123.675, 116.28, 103.53],
|
| 42 |
+
std=[58.395, 57.12, 57.375],
|
| 43 |
+
to_rgb=True),
|
| 44 |
+
dict(type='Pad', size_divisor=32),
|
| 45 |
+
dict(type='DefaultFormatBundle'),
|
| 46 |
+
dict(type='Collect', keys=['img'])
|
| 47 |
+
])
|
| 48 |
+
]
|
| 49 |
+
data = dict(
|
| 50 |
+
samples_per_gpu=1,
|
| 51 |
+
workers_per_gpu=4,
|
| 52 |
+
train=dict(
|
| 53 |
+
type='DIORRDataset',
|
| 54 |
+
ann_file=[
|
| 55 |
+
'/defaultShare/pubdata/remote_sensing/DIOR/ImageSets/train.txt',
|
| 56 |
+
'/defaultShare/pubdata/remote_sensing/DIOR/ImageSets/val.txt'
|
| 57 |
+
],
|
| 58 |
+
ann_subdir=
|
| 59 |
+
'/defaultShare/pubdata/remote_sensing/DIOR/Annotations/Oriented Bounding Boxes/',
|
| 60 |
+
img_subdir=
|
| 61 |
+
'/defaultShare/pubdata/remote_sensing/DIOR/JPEGImages/trainval/',
|
| 62 |
+
img_prefix=
|
| 63 |
+
'/defaultShare/pubdata/remote_sensing/DIOR/JPEGImages/trainval/',
|
| 64 |
+
pipeline=[
|
| 65 |
+
dict(type='LoadImageFromFile'),
|
| 66 |
+
dict(type='LoadAnnotations', with_bbox=True),
|
| 67 |
+
dict(type='RResize', img_scale=(1024, 1024)),
|
| 68 |
+
dict(
|
| 69 |
+
type='RRandomFlip',
|
| 70 |
+
flip_ratio=[0.25, 0.25, 0.25],
|
| 71 |
+
direction=['horizontal', 'vertical', 'diagonal'],
|
| 72 |
+
version='le90'),
|
| 73 |
+
dict(
|
| 74 |
+
type='PolyRandomRotate',
|
| 75 |
+
rotate_ratio=0.5,
|
| 76 |
+
angles_range=180,
|
| 77 |
+
auto_bound=False,
|
| 78 |
+
rect_classes=[5, 15, 19],
|
| 79 |
+
version='le90'),
|
| 80 |
+
dict(
|
| 81 |
+
type='Normalize',
|
| 82 |
+
mean=[123.675, 116.28, 103.53],
|
| 83 |
+
std=[58.395, 57.12, 57.375],
|
| 84 |
+
to_rgb=True),
|
| 85 |
+
dict(type='Pad', size_divisor=32),
|
| 86 |
+
dict(type='DefaultFormatBundle'),
|
| 87 |
+
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
|
| 88 |
+
],
|
| 89 |
+
version='le90'),
|
| 90 |
+
val=dict(
|
| 91 |
+
type='DIORRDataset',
|
| 92 |
+
ann_file='/defaultShare/pubdata/remote_sensing/DIOR/ImageSets/test.txt',
|
| 93 |
+
ann_subdir=
|
| 94 |
+
'/defaultShare/pubdata/remote_sensing/DIOR/Annotations/Oriented Bounding Boxes/',
|
| 95 |
+
img_subdir='/defaultShare/pubdata/remote_sensing/DIOR/JPEGImages/test/',
|
| 96 |
+
img_prefix='/defaultShare/pubdata/remote_sensing/DIOR/JPEGImages/test/',
|
| 97 |
+
pipeline=[
|
| 98 |
+
dict(type='LoadImageFromFile'),
|
| 99 |
+
dict(
|
| 100 |
+
type='MultiScaleFlipAug',
|
| 101 |
+
img_scale=(1024, 1024),
|
| 102 |
+
flip=False,
|
| 103 |
+
transforms=[
|
| 104 |
+
dict(type='RResize'),
|
| 105 |
+
dict(
|
| 106 |
+
type='Normalize',
|
| 107 |
+
mean=[123.675, 116.28, 103.53],
|
| 108 |
+
std=[58.395, 57.12, 57.375],
|
| 109 |
+
to_rgb=True),
|
| 110 |
+
dict(type='Pad', size_divisor=32),
|
| 111 |
+
dict(type='DefaultFormatBundle'),
|
| 112 |
+
dict(type='Collect', keys=['img'])
|
| 113 |
+
])
|
| 114 |
+
],
|
| 115 |
+
version='le90'),
|
| 116 |
+
test=dict(
|
| 117 |
+
type='DIORRDataset',
|
| 118 |
+
ann_file='/defaultShare/pubdata/remote_sensing/DIOR/ImageSets/test.txt',
|
| 119 |
+
ann_subdir=
|
| 120 |
+
'/defaultShare/pubdata/remote_sensing/DIOR/Annotations/Oriented Bounding Boxes/',
|
| 121 |
+
img_subdir='/defaultShare/pubdata/remote_sensing/DIOR/JPEGImages/test/',
|
| 122 |
+
img_prefix='/defaultShare/pubdata/remote_sensing/DIOR/JPEGImages/test/',
|
| 123 |
+
pipeline=[
|
| 124 |
+
dict(type='LoadImageFromFile'),
|
| 125 |
+
dict(
|
| 126 |
+
type='MultiScaleFlipAug',
|
| 127 |
+
img_scale=(1024, 1024),
|
| 128 |
+
flip=False,
|
| 129 |
+
transforms=[
|
| 130 |
+
dict(type='RResize'),
|
| 131 |
+
dict(
|
| 132 |
+
type='Normalize',
|
| 133 |
+
mean=[123.675, 116.28, 103.53],
|
| 134 |
+
std=[58.395, 57.12, 57.375],
|
| 135 |
+
to_rgb=True),
|
| 136 |
+
dict(type='Pad', size_divisor=32),
|
| 137 |
+
dict(type='DefaultFormatBundle'),
|
| 138 |
+
dict(type='Collect', keys=['img'])
|
| 139 |
+
])
|
| 140 |
+
],
|
| 141 |
+
version='le90'))
|
| 142 |
+
evaluation = dict(interval=1, metric='mAP')
|
| 143 |
+
optimizer = dict(
|
| 144 |
+
type='AdamW',
|
| 145 |
+
lr=2.5e-05,
|
| 146 |
+
betas=(0.9, 0.999),
|
| 147 |
+
weight_decay=0.05,
|
| 148 |
+
constructor='InternViTAdapterLayerDecayOptimizerConstructor',
|
| 149 |
+
paramwise_cfg=dict(num_layers=24, layer_decay_rate=0.95))
|
| 150 |
+
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
|
| 151 |
+
lr_config = dict(
|
| 152 |
+
policy='step',
|
| 153 |
+
warmup='linear',
|
| 154 |
+
warmup_iters=500,
|
| 155 |
+
warmup_ratio=0.3333333333333333,
|
| 156 |
+
step=[8, 11])
|
| 157 |
+
runner = dict(type='EpochBasedRunner', max_epochs=12)
|
| 158 |
+
checkpoint_config = dict(interval=1, max_keep_ckpts=1)
|
| 159 |
+
log_config = dict(interval=500, hooks=[dict(type='TextLoggerHook')])
|
| 160 |
+
dist_params = dict(backend='nccl')
|
| 161 |
+
log_level = 'INFO'
|
| 162 |
+
load_from = None
|
| 163 |
+
resume_from = None
|
| 164 |
+
workflow = [('train', 1)]
|
| 165 |
+
opencv_num_threads = 0
|
| 166 |
+
mp_start_method = 'fork'
|
| 167 |
+
pretrained = 'pretrained/ft_full_1b_8ksteps_instruct_tuning_as_pretrain_TMAug75.safetensors'
|
| 168 |
+
norm_cfg = dict(type='LN', requires_grad=True)
|
| 169 |
+
model = dict(
|
| 170 |
+
type='OrientedRCNN',
|
| 171 |
+
backbone=dict(
|
| 172 |
+
type='InternViTAdapter',
|
| 173 |
+
pretrain_size=448,
|
| 174 |
+
img_size=1024,
|
| 175 |
+
patch_size=16,
|
| 176 |
+
embed_dim=1024,
|
| 177 |
+
depth=24,
|
| 178 |
+
num_heads=16,
|
| 179 |
+
mlp_ratio=4.0,
|
| 180 |
+
drop_path_rate=0.1,
|
| 181 |
+
init_values=0.1,
|
| 182 |
+
with_cp=True,
|
| 183 |
+
use_flash_attn=True,
|
| 184 |
+
qk_normalization=False,
|
| 185 |
+
layerscale_force_fp32=False,
|
| 186 |
+
with_fpn=False,
|
| 187 |
+
freeze_vit=False,
|
| 188 |
+
use_final_norm=True,
|
| 189 |
+
interaction_indexes=[[0, 7], [8, 11], [12, 15], [16, 23]],
|
| 190 |
+
cffn_ratio=0.25,
|
| 191 |
+
deform_ratio=0.25,
|
| 192 |
+
qkv_bias=True,
|
| 193 |
+
norm_type='layer_norm',
|
| 194 |
+
pretrained=
|
| 195 |
+
'pretrained/ft_full_1b_8ksteps_instruct_tuning_as_pretrain_TMAug75.safetensors',
|
| 196 |
+
pretrained_type='full',
|
| 197 |
+
only_feat_out=True),
|
| 198 |
+
neck=dict(
|
| 199 |
+
type='SimpleFPN',
|
| 200 |
+
in_channels=[1024, 1024, 1024, 1024],
|
| 201 |
+
out_channels=256,
|
| 202 |
+
norm_cfg=dict(type='LN', requires_grad=True),
|
| 203 |
+
use_residual=False,
|
| 204 |
+
num_outs=5),
|
| 205 |
+
rpn_head=dict(
|
| 206 |
+
type='OrientedRPNHead',
|
| 207 |
+
in_channels=256,
|
| 208 |
+
feat_channels=256,
|
| 209 |
+
version='le90',
|
| 210 |
+
anchor_generator=dict(
|
| 211 |
+
type='AnchorGenerator',
|
| 212 |
+
scales=[8],
|
| 213 |
+
ratios=[0.5, 1.0, 2.0],
|
| 214 |
+
strides=[4, 8, 16, 32, 64]),
|
| 215 |
+
bbox_coder=dict(
|
| 216 |
+
type='MidpointOffsetCoder',
|
| 217 |
+
angle_range='le90',
|
| 218 |
+
target_means=[0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
|
| 219 |
+
target_stds=[1.0, 1.0, 1.0, 1.0, 0.5, 0.5]),
|
| 220 |
+
loss_cls=dict(
|
| 221 |
+
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
|
| 222 |
+
loss_bbox=dict(
|
| 223 |
+
type='SmoothL1Loss', beta=0.1111111111111111, loss_weight=1.0)),
|
| 224 |
+
roi_head=dict(
|
| 225 |
+
type='OrientedStandardRoIHead',
|
| 226 |
+
bbox_roi_extractor=dict(
|
| 227 |
+
type='RotatedSingleRoIExtractor',
|
| 228 |
+
roi_layer=dict(
|
| 229 |
+
type='RoIAlignRotated',
|
| 230 |
+
out_size=7,
|
| 231 |
+
sample_num=2,
|
| 232 |
+
clockwise=True),
|
| 233 |
+
out_channels=256,
|
| 234 |
+
featmap_strides=[4, 8, 16, 32]),
|
| 235 |
+
bbox_head=dict(
|
| 236 |
+
type='RotatedShared2FCBBoxHead',
|
| 237 |
+
in_channels=256,
|
| 238 |
+
fc_out_channels=1024,
|
| 239 |
+
roi_feat_size=7,
|
| 240 |
+
num_classes=20,
|
| 241 |
+
bbox_coder=dict(
|
| 242 |
+
type='DeltaXYWHAOBBoxCoder',
|
| 243 |
+
angle_range='le90',
|
| 244 |
+
norm_factor=None,
|
| 245 |
+
edge_swap=True,
|
| 246 |
+
proj_xy=True,
|
| 247 |
+
target_means=(0.0, 0.0, 0.0, 0.0, 0.0),
|
| 248 |
+
target_stds=(0.1, 0.1, 0.2, 0.2, 0.1)),
|
| 249 |
+
reg_class_agnostic=True,
|
| 250 |
+
loss_cls=dict(
|
| 251 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
|
| 252 |
+
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))),
|
| 253 |
+
train_cfg=dict(
|
| 254 |
+
rpn=dict(
|
| 255 |
+
assigner=dict(
|
| 256 |
+
type='MaxIoUAssigner',
|
| 257 |
+
pos_iou_thr=0.7,
|
| 258 |
+
neg_iou_thr=0.3,
|
| 259 |
+
min_pos_iou=0.3,
|
| 260 |
+
match_low_quality=True,
|
| 261 |
+
gpu_assign_thr=800,
|
| 262 |
+
ignore_iof_thr=-1),
|
| 263 |
+
sampler=dict(
|
| 264 |
+
type='RandomSampler',
|
| 265 |
+
num=256,
|
| 266 |
+
pos_fraction=0.5,
|
| 267 |
+
neg_pos_ub=-1,
|
| 268 |
+
add_gt_as_proposals=False),
|
| 269 |
+
allowed_border=0,
|
| 270 |
+
pos_weight=-1,
|
| 271 |
+
debug=False),
|
| 272 |
+
rpn_proposal=dict(
|
| 273 |
+
nms_pre=2000,
|
| 274 |
+
max_per_img=2000,
|
| 275 |
+
nms=dict(type='nms', iou_threshold=0.8),
|
| 276 |
+
min_bbox_size=0),
|
| 277 |
+
rcnn=dict(
|
| 278 |
+
assigner=dict(
|
| 279 |
+
type='MaxIoUAssigner',
|
| 280 |
+
pos_iou_thr=0.5,
|
| 281 |
+
neg_iou_thr=0.5,
|
| 282 |
+
min_pos_iou=0.5,
|
| 283 |
+
match_low_quality=False,
|
| 284 |
+
gpu_assign_thr=800,
|
| 285 |
+
iou_calculator=dict(type='RBboxOverlaps2D'),
|
| 286 |
+
ignore_iof_thr=-1),
|
| 287 |
+
sampler=dict(
|
| 288 |
+
type='RRandomSampler',
|
| 289 |
+
num=512,
|
| 290 |
+
pos_fraction=0.25,
|
| 291 |
+
neg_pos_ub=-1,
|
| 292 |
+
add_gt_as_proposals=True),
|
| 293 |
+
pos_weight=-1,
|
| 294 |
+
debug=False)),
|
| 295 |
+
test_cfg=dict(
|
| 296 |
+
rpn=dict(
|
| 297 |
+
nms_pre=2000,
|
| 298 |
+
max_per_img=2000,
|
| 299 |
+
nms=dict(type='nms', iou_threshold=0.8),
|
| 300 |
+
min_bbox_size=0),
|
| 301 |
+
rcnn=dict(
|
| 302 |
+
nms_pre=2000,
|
| 303 |
+
min_bbox_size=0,
|
| 304 |
+
score_thr=0.05,
|
| 305 |
+
nms=dict(iou_thr=0.1),
|
| 306 |
+
max_per_img=2000)))
|
| 307 |
+
fp16 = dict(loss_scale=dict(init_scale=512))
|
| 308 |
+
work_dir = './work_dirs/diorr_inst_tun_TMAug75_8k'
|
| 309 |
+
auto_resume = False
|
| 310 |
+
gpu_ids = range(0, 8)
|
| 311 |
+
device = 'cuda'
|
ckpts/vitp_dotav2_orcnn_6073/20250726_012424.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ckpts/vitp_dotav2_orcnn_6073/20250726_012424.log.json
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"env_info": "sys.platform: linux\nPython: 3.10.16 (main, Dec 11 2024, 16:24:50) [GCC 11.2.0]\nCUDA available: True\nGPU 0,1,2,3,4,5,6,7: NVIDIA GeForce RTX 3090\nCUDA_HOME: /usr/local/cuda-11\nNVCC: Cuda compilation tools, release 11.4, V11.4.120\nGCC: gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0\nPyTorch: 1.12.0\nPyTorch compiling details: PyTorch built with:\n - GCC 9.3\n - C++ Version: 201402\n - Intel(R) oneAPI Math Kernel Library Version 2024.0-Product Build 20231011 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v2.6.0 (Git Hash 52b5f107dd9cf10910aaa19cb47f3abf9b349815)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 11.3\n - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_37,code=compute_37\n - CuDNN 8.3.2 (built against CUDA 11.5)\n - Magma 2.5.2\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.3, CUDNN_VERSION=8.3.2, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.12.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=OFF, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, \n\nTorchVision: 0.13.0\nOpenCV: 4.11.0\nMMCV: 1.6.1\nMMCV Compiler: GCC 9.3\nMMCV CUDA Compiler: 11.4\nMMRotate: 0.3.4+6fc0c4e", "config": "dataset_type = 'DOTAv2Dataset'\ndata_root = '/defaultShare/pubdata/remote_sensing/dota_v2/'\nangle_version = 'le90'\nimg_norm_cfg = dict(\n mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n dict(type='LoadImageFromFile'),\n dict(type='LoadAnnotations', with_bbox=True),\n dict(type='RResize', img_scale=(1024, 1024)),\n dict(\n type='RRandomFlip',\n flip_ratio=[0.25, 0.25, 0.25],\n direction=['horizontal', 'vertical', 'diagonal'],\n version='le90'),\n dict(\n type='PolyRandomRotate',\n rotate_ratio=0.5,\n angles_range=180,\n auto_bound=False,\n rect_classes=[9, 11, 16],\n version='le90'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size_divisor=32),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])\n]\ntest_pipeline = [\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(1024, 1024),\n flip=False,\n transforms=[\n dict(type='RResize'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size_divisor=32),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img'])\n ])\n]\ndata = dict(\n samples_per_gpu=1,\n workers_per_gpu=4,\n train=dict(\n type='DOTAv2Dataset',\n ann_file=\n '/defaultShare/pubdata/remote_sensing/dota_v2/split_ss_dota/trainval/annfiles/',\n img_prefix=\n '/defaultShare/pubdata/remote_sensing/dota_v2/split_ss_dota/trainval/images/',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(type='LoadAnnotations', with_bbox=True),\n dict(type='RResize', img_scale=(1024, 1024)),\n dict(\n type='RRandomFlip',\n flip_ratio=[0.25, 0.25, 0.25],\n direction=['horizontal', 'vertical', 'diagonal'],\n version='le90'),\n dict(\n type='PolyRandomRotate',\n rotate_ratio=0.5,\n angles_range=180,\n auto_bound=False,\n rect_classes=[9, 11, 16],\n version='le90'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size_divisor=32),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])\n ],\n version='le90'),\n val=dict(\n type='DOTAv2Dataset',\n ann_file=\n '/defaultShare/pubdata/remote_sensing/dota_v2/split_ss_dota/val/annfiles/',\n img_prefix=\n '/defaultShare/pubdata/remote_sensing/dota_v2/split_ss_dota/val/images/',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(1024, 1024),\n flip=False,\n transforms=[\n dict(type='RResize'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size_divisor=32),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img'])\n ])\n ],\n version='le90'),\n test=dict(\n type='DOTAv2Dataset',\n ann_file=\n '/defaultShare/pubdata/remote_sensing/dota_v2/split_ss_dota/test/images/',\n img_prefix=\n '/defaultShare/pubdata/remote_sensing/dota_v2/split_ss_dota/test/images/',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(1024, 1024),\n flip=False,\n transforms=[\n dict(type='RResize'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size_divisor=32),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img'])\n ])\n ],\n version='le90'))\nevaluation = dict(interval=1, metric='mAP')\noptimizer = dict(\n type='AdamW',\n lr=2.5e-05,\n betas=(0.9, 0.999),\n weight_decay=0.05,\n constructor='InternViTAdapterLayerDecayOptimizerConstructor',\n paramwise_cfg=dict(num_layers=24, layer_decay_rate=0.95))\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\nlr_config = dict(\n policy='step',\n warmup='linear',\n warmup_iters=500,\n warmup_ratio=0.3333333333333333,\n step=[8, 11])\nrunner = dict(type='EpochBasedRunner', max_epochs=12)\ncheckpoint_config = dict(interval=1, max_keep_ckpts=1)\nlog_config = dict(interval=500, hooks=[dict(type='TextLoggerHook')])\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\nopencv_num_threads = 0\nmp_start_method = 'fork'\npretrained = 'pretrained/ft_full_1b_8ksteps_instruct_tuning_as_pretrain_TMAug75.safetensors'\nnorm_cfg = dict(type='LN', requires_grad=True)\nmodel = dict(\n type='OrientedRCNN',\n backbone=dict(\n type='InternViTAdapter',\n pretrain_size=448,\n img_size=1024,\n patch_size=16,\n embed_dim=1024,\n depth=24,\n num_heads=16,\n mlp_ratio=4.0,\n drop_path_rate=0.1,\n init_values=0.1,\n with_cp=True,\n use_flash_attn=True,\n qk_normalization=False,\n layerscale_force_fp32=False,\n with_fpn=False,\n freeze_vit=False,\n use_final_norm=True,\n interaction_indexes=[[0, 7], [8, 11], [12, 15], [16, 23]],\n cffn_ratio=0.25,\n deform_ratio=0.25,\n qkv_bias=True,\n norm_type='layer_norm',\n pretrained=\n 'pretrained/ft_full_1b_8ksteps_instruct_tuning_as_pretrain_TMAug75.safetensors',\n pretrained_type='full',\n only_feat_out=True),\n neck=dict(\n type='SimpleFPN',\n in_channels=[1024, 1024, 1024, 1024],\n out_channels=256,\n norm_cfg=dict(type='LN', requires_grad=True),\n use_residual=False,\n num_outs=5),\n rpn_head=dict(\n type='OrientedRPNHead',\n in_channels=256,\n feat_channels=256,\n version='le90',\n anchor_generator=dict(\n type='AnchorGenerator',\n scales=[8],\n ratios=[0.5, 1.0, 2.0],\n strides=[4, 8, 16, 32, 64]),\n bbox_coder=dict(\n type='MidpointOffsetCoder',\n angle_range='le90',\n target_means=[0.0, 0.0, 0.0, 0.0, 0.0, 0.0],\n target_stds=[1.0, 1.0, 1.0, 1.0, 0.5, 0.5]),\n loss_cls=dict(\n type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n loss_bbox=dict(\n type='SmoothL1Loss', beta=0.1111111111111111, loss_weight=1.0)),\n roi_head=dict(\n type='OrientedStandardRoIHead',\n bbox_roi_extractor=dict(\n type='RotatedSingleRoIExtractor',\n roi_layer=dict(\n type='RoIAlignRotated',\n out_size=7,\n sample_num=2,\n clockwise=True),\n out_channels=256,\n featmap_strides=[4, 8, 16, 32]),\n bbox_head=dict(\n type='RotatedShared2FCBBoxHead',\n in_channels=256,\n fc_out_channels=1024,\n roi_feat_size=7,\n num_classes=18,\n bbox_coder=dict(\n type='DeltaXYWHAOBBoxCoder',\n angle_range='le90',\n norm_factor=None,\n edge_swap=True,\n proj_xy=True,\n target_means=(0.0, 0.0, 0.0, 0.0, 0.0),\n target_stds=(0.1, 0.1, 0.2, 0.2, 0.1)),\n reg_class_agnostic=True,\n loss_cls=dict(\n type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))),\n train_cfg=dict(\n rpn=dict(\n assigner=dict(\n type='MaxIoUAssigner',\n pos_iou_thr=0.7,\n neg_iou_thr=0.3,\n min_pos_iou=0.3,\n match_low_quality=True,\n gpu_assign_thr=1000,\n ignore_iof_thr=-1),\n sampler=dict(\n type='RandomSampler',\n num=256,\n pos_fraction=0.5,\n neg_pos_ub=-1,\n add_gt_as_proposals=False),\n allowed_border=0,\n pos_weight=-1,\n debug=False),\n rpn_proposal=dict(\n nms_pre=2000,\n max_per_img=2000,\n nms=dict(type='nms', iou_threshold=0.8),\n min_bbox_size=0),\n rcnn=dict(\n assigner=dict(\n type='MaxIoUAssigner',\n pos_iou_thr=0.5,\n neg_iou_thr=0.5,\n min_pos_iou=0.5,\n match_low_quality=False,\n gpu_assign_thr=1000,\n iou_calculator=dict(type='RBboxOverlaps2D'),\n ignore_iof_thr=-1),\n sampler=dict(\n type='RRandomSampler',\n num=512,\n pos_fraction=0.25,\n neg_pos_ub=-1,\n add_gt_as_proposals=True),\n pos_weight=-1,\n debug=False)),\n test_cfg=dict(\n rpn=dict(\n nms_pre=2000,\n max_per_img=2000,\n nms=dict(type='nms', iou_threshold=0.8),\n min_bbox_size=0),\n rcnn=dict(\n nms_pre=2000,\n min_bbox_size=0,\n score_thr=0.05,\n nms=dict(iou_thr=0.1),\n max_per_img=2000)))\nfp16 = dict(loss_scale=dict(init_scale=512))\nwork_dir = './work_dirs/dotav2_ss_inst_tun_TMAug75_orcnn_8k_submit_fixed'\nauto_resume = False\ngpu_ids = range(0, 8)\ndevice = 'cuda'\n", "seed": 0, "exp_name": "dotav2_ss_inst_tun_TMAug75_orcnn_8k_submit_fixed.py"}
|
| 2 |
+
{"mode": "train", "epoch": 1, "iter": 500, "lr": 1e-05, "memory": 17876, "data_time": 0.01321, "loss_rpn_cls": 0.22962, "loss_rpn_bbox": 0.23782, "loss_cls": 0.2992, "acc": 91.19155, "loss_bbox": 0.36975, "loss": 1.13638, "grad_norm": 6.84031, "time": 0.89729}
|
| 3 |
+
{"mode": "train", "epoch": 1, "iter": 1000, "lr": 1e-05, "memory": 18434, "data_time": 0.00449, "loss_rpn_cls": 0.06197, "loss_rpn_bbox": 0.13979, "loss_cls": 0.27737, "acc": 89.68506, "loss_bbox": 0.32214, "loss": 0.80126, "grad_norm": 8.01017, "time": 0.92367}
|
| 4 |
+
{"mode": "train", "epoch": 1, "iter": 1500, "lr": 1e-05, "memory": 18674, "data_time": 0.00473, "loss_rpn_cls": 0.04664, "loss_rpn_bbox": 0.10544, "loss_cls": 0.25047, "acc": 90.28662, "loss_bbox": 0.28368, "loss": 0.68623, "grad_norm": 7.65981, "time": 0.93148}
|
| 5 |
+
{"mode": "train", "epoch": 1, "iter": 2000, "lr": 1e-05, "memory": 18674, "data_time": 0.0046, "loss_rpn_cls": 0.04112, "loss_rpn_bbox": 0.09466, "loss_cls": 0.23303, "acc": 90.85869, "loss_bbox": 0.2537, "loss": 0.62252, "grad_norm": 7.42095, "time": 0.88799}
|
| 6 |
+
{"mode": "val", "epoch": 1, "iter": 507, "lr": 1e-05, "mAP": 0.55798}
|
| 7 |
+
{"mode": "train", "epoch": 2, "iter": 500, "lr": 1e-05, "memory": 18674, "data_time": 0.01054, "loss_rpn_cls": 0.03452, "loss_rpn_bbox": 0.08228, "loss_cls": 0.21041, "acc": 91.63521, "loss_bbox": 0.22821, "loss": 0.55542, "grad_norm": 6.93511, "time": 0.86113}
|
| 8 |
+
{"mode": "train", "epoch": 2, "iter": 1000, "lr": 1e-05, "memory": 18674, "data_time": 0.00488, "loss_rpn_cls": 0.0323, "loss_rpn_bbox": 0.07856, "loss_cls": 0.20609, "acc": 91.78198, "loss_bbox": 0.21681, "loss": 0.53376, "grad_norm": 6.61931, "time": 0.96748}
|
| 9 |
+
{"mode": "train", "epoch": 2, "iter": 1500, "lr": 1e-05, "memory": 18674, "data_time": 0.00467, "loss_rpn_cls": 0.03153, "loss_rpn_bbox": 0.07904, "loss_cls": 0.19671, "acc": 92.2127, "loss_bbox": 0.20355, "loss": 0.51084, "grad_norm": 6.5069, "time": 0.88819}
|
| 10 |
+
{"mode": "train", "epoch": 2, "iter": 2000, "lr": 1e-05, "memory": 18674, "data_time": 0.0045, "loss_rpn_cls": 0.02838, "loss_rpn_bbox": 0.07284, "loss_cls": 0.18933, "acc": 92.44087, "loss_bbox": 0.20238, "loss": 0.49292, "grad_norm": 6.43785, "time": 0.88837}
|
| 11 |
+
{"mode": "val", "epoch": 2, "iter": 507, "lr": 1e-05, "mAP": 0.67134}
|
| 12 |
+
{"mode": "train", "epoch": 3, "iter": 500, "lr": 1e-05, "memory": 18674, "data_time": 0.01028, "loss_rpn_cls": 0.02596, "loss_rpn_bbox": 0.06967, "loss_cls": 0.17774, "acc": 92.91035, "loss_bbox": 0.19083, "loss": 0.46419, "grad_norm": 6.16322, "time": 0.93415}
|
| 13 |
+
{"mode": "train", "epoch": 3, "iter": 1000, "lr": 1e-05, "memory": 18674, "data_time": 0.0049, "loss_rpn_cls": 0.0243, "loss_rpn_bbox": 0.07143, "loss_cls": 0.17614, "acc": 92.94189, "loss_bbox": 0.18816, "loss": 0.46003, "grad_norm": 6.08296, "time": 0.85458}
|
| 14 |
+
{"mode": "train", "epoch": 3, "iter": 1500, "lr": 1e-05, "memory": 18674, "data_time": 0.00555, "loss_rpn_cls": 0.02326, "loss_rpn_bbox": 0.07019, "loss_cls": 0.17277, "acc": 93.06279, "loss_bbox": 0.18304, "loss": 0.44925, "grad_norm": Infinity, "time": 0.93356}
|
| 15 |
+
{"mode": "train", "epoch": 3, "iter": 2000, "lr": 1e-05, "memory": 18674, "data_time": 0.00419, "loss_rpn_cls": 0.02175, "loss_rpn_bbox": 0.06569, "loss_cls": 0.16608, "acc": 93.34463, "loss_bbox": 0.17497, "loss": 0.42848, "grad_norm": 5.86902, "time": 0.93196}
|
| 16 |
+
{"mode": "val", "epoch": 3, "iter": 507, "lr": 1e-05, "mAP": 0.73974}
|
| 17 |
+
{"mode": "train", "epoch": 4, "iter": 500, "lr": 1e-05, "memory": 18674, "data_time": 0.01021, "loss_rpn_cls": 0.0207, "loss_rpn_bbox": 0.06478, "loss_cls": 0.1601, "acc": 93.57822, "loss_bbox": 0.1722, "loss": 0.41777, "grad_norm": 5.68575, "time": 0.83728}
|
| 18 |
+
{"mode": "train", "epoch": 4, "iter": 1000, "lr": 1e-05, "memory": 18674, "data_time": 0.00434, "loss_rpn_cls": 0.01994, "loss_rpn_bbox": 0.06668, "loss_cls": 0.15932, "acc": 93.60884, "loss_bbox": 0.16885, "loss": 0.41479, "grad_norm": 5.81053, "time": 0.88715}
|
| 19 |
+
{"mode": "train", "epoch": 4, "iter": 1500, "lr": 1e-05, "memory": 18674, "data_time": 0.00427, "loss_rpn_cls": 0.01936, "loss_rpn_bbox": 0.06142, "loss_cls": 0.15656, "acc": 93.70991, "loss_bbox": 0.16712, "loss": 0.40445, "grad_norm": 5.80572, "time": 0.92355}
|
| 20 |
+
{"mode": "train", "epoch": 4, "iter": 2000, "lr": 1e-05, "memory": 18674, "data_time": 0.0045, "loss_rpn_cls": 0.01946, "loss_rpn_bbox": 0.06579, "loss_cls": 0.15377, "acc": 93.8209, "loss_bbox": 0.16848, "loss": 0.4075, "grad_norm": 5.65134, "time": 0.91218}
|
| 21 |
+
{"mode": "val", "epoch": 4, "iter": 507, "lr": 1e-05, "mAP": 0.77918}
|
| 22 |
+
{"mode": "train", "epoch": 5, "iter": 500, "lr": 1e-05, "memory": 18674, "data_time": 0.01137, "loss_rpn_cls": 0.01661, "loss_rpn_bbox": 0.06183, "loss_cls": 0.14519, "acc": 94.13652, "loss_bbox": 0.16133, "loss": 0.38495, "grad_norm": 5.58271, "time": 0.83983}
|
| 23 |
+
{"mode": "train", "epoch": 5, "iter": 1000, "lr": 1e-05, "memory": 18674, "data_time": 0.00575, "loss_rpn_cls": 0.01661, "loss_rpn_bbox": 0.05994, "loss_cls": 0.14637, "acc": 94.13779, "loss_bbox": 0.15861, "loss": 0.38153, "grad_norm": 5.50105, "time": 0.90623}
|
| 24 |
+
{"mode": "train", "epoch": 5, "iter": 1500, "lr": 1e-05, "memory": 18684, "data_time": 0.00576, "loss_rpn_cls": 0.01698, "loss_rpn_bbox": 0.05849, "loss_cls": 0.1446, "acc": 94.17349, "loss_bbox": 0.1583, "loss": 0.37836, "grad_norm": 5.34356, "time": 0.88473}
|
| 25 |
+
{"mode": "train", "epoch": 5, "iter": 2000, "lr": 1e-05, "memory": 18684, "data_time": 0.00543, "loss_rpn_cls": 0.01642, "loss_rpn_bbox": 0.05949, "loss_cls": 0.14323, "acc": 94.2292, "loss_bbox": 0.1564, "loss": 0.37555, "grad_norm": 5.26925, "time": 0.93525}
|
| 26 |
+
{"mode": "val", "epoch": 5, "iter": 507, "lr": 1e-05, "mAP": 0.77819}
|
| 27 |
+
{"mode": "train", "epoch": 6, "iter": 500, "lr": 1e-05, "memory": 18684, "data_time": 0.01102, "loss_rpn_cls": 0.01521, "loss_rpn_bbox": 0.05975, "loss_cls": 0.14136, "acc": 94.25312, "loss_bbox": 0.15807, "loss": 0.37439, "grad_norm": 5.27262, "time": 0.84039}
|
| 28 |
+
{"mode": "train", "epoch": 6, "iter": 1000, "lr": 1e-05, "memory": 18684, "data_time": 0.00541, "loss_rpn_cls": 0.01487, "loss_rpn_bbox": 0.05635, "loss_cls": 0.13433, "acc": 94.55645, "loss_bbox": 0.14917, "loss": 0.35473, "grad_norm": Infinity, "time": 0.94292}
|
| 29 |
+
{"mode": "train", "epoch": 6, "iter": 1500, "lr": 1e-05, "memory": 18684, "data_time": 0.00517, "loss_rpn_cls": 0.01424, "loss_rpn_bbox": 0.05669, "loss_cls": 0.13325, "acc": 94.6062, "loss_bbox": 0.14822, "loss": 0.35239, "grad_norm": 5.11383, "time": 0.94904}
|
| 30 |
+
{"mode": "train", "epoch": 6, "iter": 2000, "lr": 1e-05, "memory": 18684, "data_time": 0.00494, "loss_rpn_cls": 0.01532, "loss_rpn_bbox": 0.05812, "loss_cls": 0.1349, "acc": 94.57983, "loss_bbox": 0.14951, "loss": 0.35784, "grad_norm": 5.09458, "time": 0.8966}
|
| 31 |
+
{"mode": "val", "epoch": 6, "iter": 507, "lr": 1e-05, "mAP": 0.78846}
|
| 32 |
+
{"mode": "train", "epoch": 7, "iter": 500, "lr": 1e-05, "memory": 18684, "data_time": 0.0115, "loss_rpn_cls": 0.01331, "loss_rpn_bbox": 0.05755, "loss_cls": 0.12985, "acc": 94.76611, "loss_bbox": 0.14636, "loss": 0.34707, "grad_norm": 5.24194, "time": 0.86415}
|
| 33 |
+
{"mode": "train", "epoch": 7, "iter": 1000, "lr": 1e-05, "memory": 18684, "data_time": 0.00517, "loss_rpn_cls": 0.01366, "loss_rpn_bbox": 0.05277, "loss_cls": 0.12833, "acc": 94.75073, "loss_bbox": 0.14497, "loss": 0.33974, "grad_norm": 5.13276, "time": 0.88887}
|
| 34 |
+
{"mode": "train", "epoch": 7, "iter": 1500, "lr": 1e-05, "memory": 18684, "data_time": 0.00541, "loss_rpn_cls": 0.01313, "loss_rpn_bbox": 0.05564, "loss_cls": 0.12753, "acc": 94.81455, "loss_bbox": 0.14402, "loss": 0.34032, "grad_norm": Infinity, "time": 0.93935}
|
| 35 |
+
{"mode": "train", "epoch": 7, "iter": 2000, "lr": 1e-05, "memory": 18684, "data_time": 0.00511, "loss_rpn_cls": 0.01303, "loss_rpn_bbox": 0.05475, "loss_cls": 0.13166, "acc": 94.66523, "loss_bbox": 0.14399, "loss": 0.34343, "grad_norm": 5.21322, "time": 0.8627}
|
| 36 |
+
{"mode": "val", "epoch": 7, "iter": 507, "lr": 1e-05, "mAP": 0.81797}
|
| 37 |
+
{"mode": "train", "epoch": 8, "iter": 500, "lr": 1e-05, "memory": 18684, "data_time": 0.01038, "loss_rpn_cls": 0.01266, "loss_rpn_bbox": 0.05059, "loss_cls": 0.12186, "acc": 95.03833, "loss_bbox": 0.14016, "loss": 0.32527, "grad_norm": 4.9427, "time": 0.9485}
|
| 38 |
+
{"mode": "train", "epoch": 8, "iter": 1000, "lr": 1e-05, "memory": 18684, "data_time": 0.005, "loss_rpn_cls": 0.01225, "loss_rpn_bbox": 0.05314, "loss_cls": 0.1225, "acc": 94.99727, "loss_bbox": 0.1376, "loss": 0.32549, "grad_norm": 4.87507, "time": 0.90573}
|
| 39 |
+
{"mode": "train", "epoch": 8, "iter": 1500, "lr": 1e-05, "memory": 18684, "data_time": 0.00499, "loss_rpn_cls": 0.01257, "loss_rpn_bbox": 0.052, "loss_cls": 0.12513, "acc": 94.90845, "loss_bbox": 0.1408, "loss": 0.3305, "grad_norm": 4.96171, "time": 0.86173}
|
| 40 |
+
{"mode": "train", "epoch": 8, "iter": 2000, "lr": 1e-05, "memory": 18684, "data_time": 0.00506, "loss_rpn_cls": 0.01301, "loss_rpn_bbox": 0.05485, "loss_cls": 0.12459, "acc": 94.92754, "loss_bbox": 0.14242, "loss": 0.33486, "grad_norm": 4.92717, "time": 0.95854}
|
| 41 |
+
{"mode": "val", "epoch": 8, "iter": 507, "lr": 1e-05, "mAP": 0.82038}
|
| 42 |
+
{"mode": "train", "epoch": 9, "iter": 500, "lr": 0.0, "memory": 18684, "data_time": 0.01025, "loss_rpn_cls": 0.01003, "loss_rpn_bbox": 0.04765, "loss_cls": 0.11069, "acc": 95.49946, "loss_bbox": 0.12562, "loss": 0.29398, "grad_norm": 4.3049, "time": 0.91349}
|
| 43 |
+
{"mode": "train", "epoch": 9, "iter": 1000, "lr": 0.0, "memory": 18684, "data_time": 0.00495, "loss_rpn_cls": 0.01021, "loss_rpn_bbox": 0.04601, "loss_cls": 0.10919, "acc": 95.51021, "loss_bbox": 0.12339, "loss": 0.2888, "grad_norm": 4.20204, "time": 0.8631}
|
| 44 |
+
{"mode": "train", "epoch": 9, "iter": 1500, "lr": 0.0, "memory": 18684, "data_time": 0.00576, "loss_rpn_cls": 0.00961, "loss_rpn_bbox": 0.04675, "loss_cls": 0.10608, "acc": 95.65625, "loss_bbox": 0.1228, "loss": 0.28525, "grad_norm": 4.11367, "time": 0.89445}
|
| 45 |
+
{"mode": "train", "epoch": 9, "iter": 2000, "lr": 0.0, "memory": 18684, "data_time": 0.00502, "loss_rpn_cls": 0.00968, "loss_rpn_bbox": 0.04612, "loss_cls": 0.10711, "acc": 95.61128, "loss_bbox": 0.12204, "loss": 0.28495, "grad_norm": 4.12107, "time": 0.93494}
|
| 46 |
+
{"mode": "val", "epoch": 9, "iter": 507, "lr": 0.0, "mAP": 0.83246}
|
| 47 |
+
{"mode": "train", "epoch": 10, "iter": 500, "lr": 0.0, "memory": 18722, "data_time": 0.0103, "loss_rpn_cls": 0.009, "loss_rpn_bbox": 0.04503, "loss_cls": 0.1037, "acc": 95.71821, "loss_bbox": 0.11915, "loss": 0.27688, "grad_norm": NaN, "time": 0.86487}
|
| 48 |
+
{"mode": "train", "epoch": 10, "iter": 1000, "lr": 0.0, "memory": 18722, "data_time": 0.00489, "loss_rpn_cls": 0.00899, "loss_rpn_bbox": 0.04443, "loss_cls": 0.10421, "acc": 95.70728, "loss_bbox": 0.12051, "loss": 0.27814, "grad_norm": 4.01737, "time": 0.92096}
|
| 49 |
+
{"mode": "train", "epoch": 10, "iter": 1500, "lr": 0.0, "memory": 18722, "data_time": 0.00489, "loss_rpn_cls": 0.00916, "loss_rpn_bbox": 0.04545, "loss_cls": 0.10351, "acc": 95.72437, "loss_bbox": 0.11887, "loss": 0.27699, "grad_norm": 4.11703, "time": 0.93527}
|
| 50 |
+
{"mode": "train", "epoch": 10, "iter": 2000, "lr": 0.0, "memory": 18722, "data_time": 0.00452, "loss_rpn_cls": 0.00918, "loss_rpn_bbox": 0.04553, "loss_cls": 0.10346, "acc": 95.7481, "loss_bbox": 0.11914, "loss": 0.27731, "grad_norm": 4.26776, "time": 0.9228}
|
| 51 |
+
{"mode": "val", "epoch": 10, "iter": 507, "lr": 0.0, "mAP": 0.83925}
|
| 52 |
+
{"mode": "train", "epoch": 11, "iter": 500, "lr": 0.0, "memory": 18722, "data_time": 0.0105, "loss_rpn_cls": 0.00882, "loss_rpn_bbox": 0.04445, "loss_cls": 0.09972, "acc": 95.8978, "loss_bbox": 0.11679, "loss": 0.26977, "grad_norm": 4.12779, "time": 0.93442}
|
| 53 |
+
{"mode": "train", "epoch": 11, "iter": 1000, "lr": 0.0, "memory": 18722, "data_time": 0.0052, "loss_rpn_cls": 0.00876, "loss_rpn_bbox": 0.04516, "loss_cls": 0.10168, "acc": 95.81157, "loss_bbox": 0.11813, "loss": 0.27373, "grad_norm": 4.12723, "time": 0.89153}
|
| 54 |
+
{"mode": "train", "epoch": 11, "iter": 1500, "lr": 0.0, "memory": 18722, "data_time": 0.00459, "loss_rpn_cls": 0.00864, "loss_rpn_bbox": 0.04431, "loss_cls": 0.09949, "acc": 95.87734, "loss_bbox": 0.11514, "loss": 0.26758, "grad_norm": 4.13996, "time": 0.90387}
|
| 55 |
+
{"mode": "train", "epoch": 11, "iter": 2000, "lr": 0.0, "memory": 18722, "data_time": 0.00435, "loss_rpn_cls": 0.00881, "loss_rpn_bbox": 0.04572, "loss_cls": 0.1016, "acc": 95.80464, "loss_bbox": 0.11718, "loss": 0.27331, "grad_norm": 4.13149, "time": 0.86303}
|
| 56 |
+
{"mode": "val", "epoch": 11, "iter": 507, "lr": 0.0, "mAP": 0.84054}
|
| 57 |
+
{"mode": "train", "epoch": 12, "iter": 500, "lr": 0.0, "memory": 18722, "data_time": 0.01022, "loss_rpn_cls": 0.00828, "loss_rpn_bbox": 0.0456, "loss_cls": 0.09908, "acc": 95.91694, "loss_bbox": 0.11592, "loss": 0.26888, "grad_norm": 4.12893, "time": 0.93153}
|
| 58 |
+
{"mode": "train", "epoch": 12, "iter": 1000, "lr": 0.0, "memory": 18722, "data_time": 0.00509, "loss_rpn_cls": 0.00866, "loss_rpn_bbox": 0.04128, "loss_cls": 0.09904, "acc": 95.91704, "loss_bbox": 0.11454, "loss": 0.26352, "grad_norm": Infinity, "time": 0.90641}
|
| 59 |
+
{"mode": "train", "epoch": 12, "iter": 1500, "lr": 0.0, "memory": 18722, "data_time": 0.0044, "loss_rpn_cls": 0.00815, "loss_rpn_bbox": 0.0453, "loss_cls": 0.09877, "acc": 95.94561, "loss_bbox": 0.11316, "loss": 0.26537, "grad_norm": 3.98564, "time": 0.88759}
|
| 60 |
+
{"mode": "train", "epoch": 12, "iter": 2000, "lr": 0.0, "memory": 18722, "data_time": 0.00443, "loss_rpn_cls": 0.0083, "loss_rpn_bbox": 0.04348, "loss_cls": 0.09853, "acc": 95.96401, "loss_bbox": 0.11658, "loss": 0.26689, "grad_norm": 4.07181, "time": 0.88259}
|
| 61 |
+
{"mode": "val", "epoch": 12, "iter": 507, "lr": 0.0, "mAP": 0.83731}
|
ckpts/vitp_dotav2_orcnn_6073/epoch_12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8aa7c0b74b0b90c9e1560a679407b8fdaa6e35449c65d00ac99151d7c62ba075
|
| 3 |
+
size 1373270509
|
ckpts/vitp_dotav2_orcnn_6073/vitp_dotav2_orcnn.py
ADDED
|
@@ -0,0 +1,302 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_type = 'DOTAv2Dataset'
|
| 2 |
+
data_root = '/defaultShare/pubdata/remote_sensing/dota_v2/'
|
| 3 |
+
angle_version = 'le90'
|
| 4 |
+
img_norm_cfg = dict(
|
| 5 |
+
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
| 6 |
+
train_pipeline = [
|
| 7 |
+
dict(type='LoadImageFromFile'),
|
| 8 |
+
dict(type='LoadAnnotations', with_bbox=True),
|
| 9 |
+
dict(type='RResize', img_scale=(1024, 1024)),
|
| 10 |
+
dict(
|
| 11 |
+
type='RRandomFlip',
|
| 12 |
+
flip_ratio=[0.25, 0.25, 0.25],
|
| 13 |
+
direction=['horizontal', 'vertical', 'diagonal'],
|
| 14 |
+
version='le90'),
|
| 15 |
+
dict(
|
| 16 |
+
type='PolyRandomRotate',
|
| 17 |
+
rotate_ratio=0.5,
|
| 18 |
+
angles_range=180,
|
| 19 |
+
auto_bound=False,
|
| 20 |
+
rect_classes=[9, 11, 16],
|
| 21 |
+
version='le90'),
|
| 22 |
+
dict(
|
| 23 |
+
type='Normalize',
|
| 24 |
+
mean=[123.675, 116.28, 103.53],
|
| 25 |
+
std=[58.395, 57.12, 57.375],
|
| 26 |
+
to_rgb=True),
|
| 27 |
+
dict(type='Pad', size_divisor=32),
|
| 28 |
+
dict(type='DefaultFormatBundle'),
|
| 29 |
+
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
|
| 30 |
+
]
|
| 31 |
+
test_pipeline = [
|
| 32 |
+
dict(type='LoadImageFromFile'),
|
| 33 |
+
dict(
|
| 34 |
+
type='MultiScaleFlipAug',
|
| 35 |
+
img_scale=(1024, 1024),
|
| 36 |
+
flip=False,
|
| 37 |
+
transforms=[
|
| 38 |
+
dict(type='RResize'),
|
| 39 |
+
dict(
|
| 40 |
+
type='Normalize',
|
| 41 |
+
mean=[123.675, 116.28, 103.53],
|
| 42 |
+
std=[58.395, 57.12, 57.375],
|
| 43 |
+
to_rgb=True),
|
| 44 |
+
dict(type='Pad', size_divisor=32),
|
| 45 |
+
dict(type='DefaultFormatBundle'),
|
| 46 |
+
dict(type='Collect', keys=['img'])
|
| 47 |
+
])
|
| 48 |
+
]
|
| 49 |
+
data = dict(
|
| 50 |
+
samples_per_gpu=1,
|
| 51 |
+
workers_per_gpu=4,
|
| 52 |
+
train=dict(
|
| 53 |
+
type='DOTAv2Dataset',
|
| 54 |
+
ann_file=
|
| 55 |
+
'/defaultShare/pubdata/remote_sensing/dota_v2/split_ss_dota/trainval/annfiles/',
|
| 56 |
+
img_prefix=
|
| 57 |
+
'/defaultShare/pubdata/remote_sensing/dota_v2/split_ss_dota/trainval/images/',
|
| 58 |
+
pipeline=[
|
| 59 |
+
dict(type='LoadImageFromFile'),
|
| 60 |
+
dict(type='LoadAnnotations', with_bbox=True),
|
| 61 |
+
dict(type='RResize', img_scale=(1024, 1024)),
|
| 62 |
+
dict(
|
| 63 |
+
type='RRandomFlip',
|
| 64 |
+
flip_ratio=[0.25, 0.25, 0.25],
|
| 65 |
+
direction=['horizontal', 'vertical', 'diagonal'],
|
| 66 |
+
version='le90'),
|
| 67 |
+
dict(
|
| 68 |
+
type='PolyRandomRotate',
|
| 69 |
+
rotate_ratio=0.5,
|
| 70 |
+
angles_range=180,
|
| 71 |
+
auto_bound=False,
|
| 72 |
+
rect_classes=[9, 11, 16],
|
| 73 |
+
version='le90'),
|
| 74 |
+
dict(
|
| 75 |
+
type='Normalize',
|
| 76 |
+
mean=[123.675, 116.28, 103.53],
|
| 77 |
+
std=[58.395, 57.12, 57.375],
|
| 78 |
+
to_rgb=True),
|
| 79 |
+
dict(type='Pad', size_divisor=32),
|
| 80 |
+
dict(type='DefaultFormatBundle'),
|
| 81 |
+
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
|
| 82 |
+
],
|
| 83 |
+
version='le90'),
|
| 84 |
+
val=dict(
|
| 85 |
+
type='DOTAv2Dataset',
|
| 86 |
+
ann_file=
|
| 87 |
+
'/defaultShare/pubdata/remote_sensing/dota_v2/split_ss_dota/val/annfiles/',
|
| 88 |
+
img_prefix=
|
| 89 |
+
'/defaultShare/pubdata/remote_sensing/dota_v2/split_ss_dota/val/images/',
|
| 90 |
+
pipeline=[
|
| 91 |
+
dict(type='LoadImageFromFile'),
|
| 92 |
+
dict(
|
| 93 |
+
type='MultiScaleFlipAug',
|
| 94 |
+
img_scale=(1024, 1024),
|
| 95 |
+
flip=False,
|
| 96 |
+
transforms=[
|
| 97 |
+
dict(type='RResize'),
|
| 98 |
+
dict(
|
| 99 |
+
type='Normalize',
|
| 100 |
+
mean=[123.675, 116.28, 103.53],
|
| 101 |
+
std=[58.395, 57.12, 57.375],
|
| 102 |
+
to_rgb=True),
|
| 103 |
+
dict(type='Pad', size_divisor=32),
|
| 104 |
+
dict(type='DefaultFormatBundle'),
|
| 105 |
+
dict(type='Collect', keys=['img'])
|
| 106 |
+
])
|
| 107 |
+
],
|
| 108 |
+
version='le90'),
|
| 109 |
+
test=dict(
|
| 110 |
+
type='DOTAv2Dataset',
|
| 111 |
+
ann_file=
|
| 112 |
+
'/defaultShare/pubdata/remote_sensing/dota_v2/split_ss_dota/test/images/',
|
| 113 |
+
img_prefix=
|
| 114 |
+
'/defaultShare/pubdata/remote_sensing/dota_v2/split_ss_dota/test/images/',
|
| 115 |
+
pipeline=[
|
| 116 |
+
dict(type='LoadImageFromFile'),
|
| 117 |
+
dict(
|
| 118 |
+
type='MultiScaleFlipAug',
|
| 119 |
+
img_scale=(1024, 1024),
|
| 120 |
+
flip=False,
|
| 121 |
+
transforms=[
|
| 122 |
+
dict(type='RResize'),
|
| 123 |
+
dict(
|
| 124 |
+
type='Normalize',
|
| 125 |
+
mean=[123.675, 116.28, 103.53],
|
| 126 |
+
std=[58.395, 57.12, 57.375],
|
| 127 |
+
to_rgb=True),
|
| 128 |
+
dict(type='Pad', size_divisor=32),
|
| 129 |
+
dict(type='DefaultFormatBundle'),
|
| 130 |
+
dict(type='Collect', keys=['img'])
|
| 131 |
+
])
|
| 132 |
+
],
|
| 133 |
+
version='le90'))
|
| 134 |
+
evaluation = dict(interval=1, metric='mAP')
|
| 135 |
+
optimizer = dict(
|
| 136 |
+
type='AdamW',
|
| 137 |
+
lr=2.5e-05,
|
| 138 |
+
betas=(0.9, 0.999),
|
| 139 |
+
weight_decay=0.05,
|
| 140 |
+
constructor='InternViTAdapterLayerDecayOptimizerConstructor',
|
| 141 |
+
paramwise_cfg=dict(num_layers=24, layer_decay_rate=0.95))
|
| 142 |
+
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
|
| 143 |
+
lr_config = dict(
|
| 144 |
+
policy='step',
|
| 145 |
+
warmup='linear',
|
| 146 |
+
warmup_iters=500,
|
| 147 |
+
warmup_ratio=0.3333333333333333,
|
| 148 |
+
step=[8, 11])
|
| 149 |
+
runner = dict(type='EpochBasedRunner', max_epochs=12)
|
| 150 |
+
checkpoint_config = dict(interval=1, max_keep_ckpts=1)
|
| 151 |
+
log_config = dict(interval=500, hooks=[dict(type='TextLoggerHook')])
|
| 152 |
+
dist_params = dict(backend='nccl')
|
| 153 |
+
log_level = 'INFO'
|
| 154 |
+
load_from = None
|
| 155 |
+
resume_from = None
|
| 156 |
+
workflow = [('train', 1)]
|
| 157 |
+
opencv_num_threads = 0
|
| 158 |
+
mp_start_method = 'fork'
|
| 159 |
+
pretrained = 'pretrained/ViTP_ViT_L_300M_rs.safetensors'
|
| 160 |
+
norm_cfg = dict(type='LN', requires_grad=True)
|
| 161 |
+
model = dict(
|
| 162 |
+
type='OrientedRCNN',
|
| 163 |
+
backbone=dict(
|
| 164 |
+
type='InternViTAdapter',
|
| 165 |
+
pretrain_size=448,
|
| 166 |
+
img_size=1024,
|
| 167 |
+
patch_size=16,
|
| 168 |
+
embed_dim=1024,
|
| 169 |
+
depth=24,
|
| 170 |
+
num_heads=16,
|
| 171 |
+
mlp_ratio=4.0,
|
| 172 |
+
drop_path_rate=0.1,
|
| 173 |
+
init_values=0.1,
|
| 174 |
+
with_cp=True,
|
| 175 |
+
use_flash_attn=True,
|
| 176 |
+
qk_normalization=False,
|
| 177 |
+
layerscale_force_fp32=False,
|
| 178 |
+
with_fpn=False,
|
| 179 |
+
freeze_vit=False,
|
| 180 |
+
use_final_norm=True,
|
| 181 |
+
interaction_indexes=[[0, 7], [8, 11], [12, 15], [16, 23]],
|
| 182 |
+
cffn_ratio=0.25,
|
| 183 |
+
deform_ratio=0.25,
|
| 184 |
+
qkv_bias=True,
|
| 185 |
+
norm_type='layer_norm',
|
| 186 |
+
pretrained='pretrained/ViTP_ViT_L_300M_rs.safetensors',
|
| 187 |
+
pretrained_type='full',
|
| 188 |
+
only_feat_out=True),
|
| 189 |
+
neck=dict(
|
| 190 |
+
type='SimpleFPN',
|
| 191 |
+
in_channels=[1024, 1024, 1024, 1024],
|
| 192 |
+
out_channels=256,
|
| 193 |
+
norm_cfg=dict(type='LN', requires_grad=True),
|
| 194 |
+
use_residual=False,
|
| 195 |
+
num_outs=5),
|
| 196 |
+
rpn_head=dict(
|
| 197 |
+
type='OrientedRPNHead',
|
| 198 |
+
in_channels=256,
|
| 199 |
+
feat_channels=256,
|
| 200 |
+
version='le90',
|
| 201 |
+
anchor_generator=dict(
|
| 202 |
+
type='AnchorGenerator',
|
| 203 |
+
scales=[8],
|
| 204 |
+
ratios=[0.5, 1.0, 2.0],
|
| 205 |
+
strides=[4, 8, 16, 32, 64]),
|
| 206 |
+
bbox_coder=dict(
|
| 207 |
+
type='MidpointOffsetCoder',
|
| 208 |
+
angle_range='le90',
|
| 209 |
+
target_means=[0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
|
| 210 |
+
target_stds=[1.0, 1.0, 1.0, 1.0, 0.5, 0.5]),
|
| 211 |
+
loss_cls=dict(
|
| 212 |
+
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
|
| 213 |
+
loss_bbox=dict(
|
| 214 |
+
type='SmoothL1Loss', beta=0.1111111111111111, loss_weight=1.0)),
|
| 215 |
+
roi_head=dict(
|
| 216 |
+
type='OrientedStandardRoIHead',
|
| 217 |
+
bbox_roi_extractor=dict(
|
| 218 |
+
type='RotatedSingleRoIExtractor',
|
| 219 |
+
roi_layer=dict(
|
| 220 |
+
type='RoIAlignRotated',
|
| 221 |
+
out_size=7,
|
| 222 |
+
sample_num=2,
|
| 223 |
+
clockwise=True),
|
| 224 |
+
out_channels=256,
|
| 225 |
+
featmap_strides=[4, 8, 16, 32]),
|
| 226 |
+
bbox_head=dict(
|
| 227 |
+
type='RotatedShared2FCBBoxHead',
|
| 228 |
+
in_channels=256,
|
| 229 |
+
fc_out_channels=1024,
|
| 230 |
+
roi_feat_size=7,
|
| 231 |
+
num_classes=18,
|
| 232 |
+
bbox_coder=dict(
|
| 233 |
+
type='DeltaXYWHAOBBoxCoder',
|
| 234 |
+
angle_range='le90',
|
| 235 |
+
norm_factor=None,
|
| 236 |
+
edge_swap=True,
|
| 237 |
+
proj_xy=True,
|
| 238 |
+
target_means=(0.0, 0.0, 0.0, 0.0, 0.0),
|
| 239 |
+
target_stds=(0.1, 0.1, 0.2, 0.2, 0.1)),
|
| 240 |
+
reg_class_agnostic=True,
|
| 241 |
+
loss_cls=dict(
|
| 242 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
|
| 243 |
+
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))),
|
| 244 |
+
train_cfg=dict(
|
| 245 |
+
rpn=dict(
|
| 246 |
+
assigner=dict(
|
| 247 |
+
type='MaxIoUAssigner',
|
| 248 |
+
pos_iou_thr=0.7,
|
| 249 |
+
neg_iou_thr=0.3,
|
| 250 |
+
min_pos_iou=0.3,
|
| 251 |
+
match_low_quality=True,
|
| 252 |
+
gpu_assign_thr=1000,
|
| 253 |
+
ignore_iof_thr=-1),
|
| 254 |
+
sampler=dict(
|
| 255 |
+
type='RandomSampler',
|
| 256 |
+
num=256,
|
| 257 |
+
pos_fraction=0.5,
|
| 258 |
+
neg_pos_ub=-1,
|
| 259 |
+
add_gt_as_proposals=False),
|
| 260 |
+
allowed_border=0,
|
| 261 |
+
pos_weight=-1,
|
| 262 |
+
debug=False),
|
| 263 |
+
rpn_proposal=dict(
|
| 264 |
+
nms_pre=2000,
|
| 265 |
+
max_per_img=2000,
|
| 266 |
+
nms=dict(type='nms', iou_threshold=0.8),
|
| 267 |
+
min_bbox_size=0),
|
| 268 |
+
rcnn=dict(
|
| 269 |
+
assigner=dict(
|
| 270 |
+
type='MaxIoUAssigner',
|
| 271 |
+
pos_iou_thr=0.5,
|
| 272 |
+
neg_iou_thr=0.5,
|
| 273 |
+
min_pos_iou=0.5,
|
| 274 |
+
match_low_quality=False,
|
| 275 |
+
gpu_assign_thr=1000,
|
| 276 |
+
iou_calculator=dict(type='RBboxOverlaps2D'),
|
| 277 |
+
ignore_iof_thr=-1),
|
| 278 |
+
sampler=dict(
|
| 279 |
+
type='RRandomSampler',
|
| 280 |
+
num=512,
|
| 281 |
+
pos_fraction=0.25,
|
| 282 |
+
neg_pos_ub=-1,
|
| 283 |
+
add_gt_as_proposals=True),
|
| 284 |
+
pos_weight=-1,
|
| 285 |
+
debug=False)),
|
| 286 |
+
test_cfg=dict(
|
| 287 |
+
rpn=dict(
|
| 288 |
+
nms_pre=2000,
|
| 289 |
+
max_per_img=2000,
|
| 290 |
+
nms=dict(type='nms', iou_threshold=0.8),
|
| 291 |
+
min_bbox_size=0),
|
| 292 |
+
rcnn=dict(
|
| 293 |
+
nms_pre=2000,
|
| 294 |
+
min_bbox_size=0,
|
| 295 |
+
score_thr=0.05,
|
| 296 |
+
nms=dict(iou_thr=0.1),
|
| 297 |
+
max_per_img=2000)))
|
| 298 |
+
fp16 = dict(loss_scale=dict(init_scale=512))
|
| 299 |
+
work_dir = './work_dirs/vitp_dotav2_orcnn'
|
| 300 |
+
auto_resume = False
|
| 301 |
+
gpu_ids = range(0, 8)
|
| 302 |
+
device = 'cuda'
|
ckpts/vitp_isaid_upernet_7114/20250803_154801.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ckpts/vitp_isaid_upernet_7114/20250803_154801.log.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ckpts/vitp_isaid_upernet_7114/ViTP_isaid_upernet.py
ADDED
|
@@ -0,0 +1,192 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_type = 'iSAIDDataset'
|
| 2 |
+
data_root = '/defaultShare/pubdata/remote_sensing/iSAID'
|
| 3 |
+
img_norm_cfg = dict(
|
| 4 |
+
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
| 5 |
+
crop_size = (896, 896)
|
| 6 |
+
data = dict(
|
| 7 |
+
samples_per_gpu=2,
|
| 8 |
+
workers_per_gpu=4,
|
| 9 |
+
train=dict(
|
| 10 |
+
type='iSAIDDataset',
|
| 11 |
+
data_root='/defaultShare/pubdata/remote_sensing/iSAID',
|
| 12 |
+
img_dir='img_dir/train',
|
| 13 |
+
ann_dir='ann_dir_old/train',
|
| 14 |
+
pipeline=[
|
| 15 |
+
dict(type='LoadImageFromFile'),
|
| 16 |
+
dict(type='LoadAnnotations', reduce_zero_label=False),
|
| 17 |
+
dict(
|
| 18 |
+
type='Resize',
|
| 19 |
+
img_scale=(896, 896),
|
| 20 |
+
ratio_range=None,
|
| 21 |
+
keep_ratio=True),
|
| 22 |
+
dict(type='RandomCrop', crop_size=(896, 896)),
|
| 23 |
+
dict(type='RandomFlip', prob=0.5),
|
| 24 |
+
dict(type='PhotoMetricDistortion'),
|
| 25 |
+
dict(
|
| 26 |
+
type='Normalize',
|
| 27 |
+
mean=[123.675, 116.28, 103.53],
|
| 28 |
+
std=[58.395, 57.12, 57.375],
|
| 29 |
+
to_rgb=True),
|
| 30 |
+
dict(type='Pad', size=(896, 896), pad_val=0, seg_pad_val=255),
|
| 31 |
+
dict(type='DefaultFormatBundle'),
|
| 32 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 33 |
+
]),
|
| 34 |
+
val=dict(
|
| 35 |
+
type='iSAIDDataset',
|
| 36 |
+
data_root='/defaultShare/pubdata/remote_sensing/iSAID',
|
| 37 |
+
img_dir='img_dir/val',
|
| 38 |
+
ann_dir='ann_dir_old/val',
|
| 39 |
+
pipeline=[
|
| 40 |
+
dict(type='LoadImageFromFile'),
|
| 41 |
+
dict(
|
| 42 |
+
type='MultiScaleFlipAug',
|
| 43 |
+
img_scale=(896, 896),
|
| 44 |
+
flip=True,
|
| 45 |
+
img_ratios=[0.75,1.0,1.5],
|
| 46 |
+
transforms=[
|
| 47 |
+
dict(type='Resize', keep_ratio=True),
|
| 48 |
+
dict(type='RandomFlip', prob=1.0),
|
| 49 |
+
dict(
|
| 50 |
+
type='Pad',
|
| 51 |
+
size=(896, 896),
|
| 52 |
+
pad_val=0,
|
| 53 |
+
seg_pad_val=255),
|
| 54 |
+
dict(
|
| 55 |
+
type='Normalize',
|
| 56 |
+
mean=[123.675, 116.28, 103.53],
|
| 57 |
+
std=[58.395, 57.12, 57.375],
|
| 58 |
+
to_rgb=True),
|
| 59 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 60 |
+
dict(type='Collect', keys=['img'])
|
| 61 |
+
])
|
| 62 |
+
]),
|
| 63 |
+
test=dict(
|
| 64 |
+
type='iSAIDDataset',
|
| 65 |
+
data_root='/defaultShare/pubdata/remote_sensing/iSAID',
|
| 66 |
+
img_dir='img_dir/val',
|
| 67 |
+
ann_dir='ann_dir_old/val',
|
| 68 |
+
pipeline=[
|
| 69 |
+
dict(type='LoadImageFromFile'),
|
| 70 |
+
dict(
|
| 71 |
+
type='MultiScaleFlipAug',
|
| 72 |
+
img_scale=(896, 896),
|
| 73 |
+
flip=True,
|
| 74 |
+
img_ratios=[0.75,1.0,1.5],
|
| 75 |
+
transforms=[
|
| 76 |
+
dict(type='Resize', keep_ratio=True),
|
| 77 |
+
dict(type='RandomFlip', prob=1.0),
|
| 78 |
+
dict(
|
| 79 |
+
type='Pad',
|
| 80 |
+
size=(896, 896),
|
| 81 |
+
pad_val=0,
|
| 82 |
+
seg_pad_val=255),
|
| 83 |
+
dict(
|
| 84 |
+
type='Normalize',
|
| 85 |
+
mean=[123.675, 116.28, 103.53],
|
| 86 |
+
std=[58.395, 57.12, 57.375],
|
| 87 |
+
to_rgb=True),
|
| 88 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 89 |
+
dict(type='Collect', keys=['img'])
|
| 90 |
+
])
|
| 91 |
+
]))
|
| 92 |
+
log_config = dict(
|
| 93 |
+
interval=50,
|
| 94 |
+
hooks=[
|
| 95 |
+
dict(type='TextLoggerHook', by_epoch=False),
|
| 96 |
+
dict(type='TensorboardLoggerHook')
|
| 97 |
+
])
|
| 98 |
+
dist_params = dict(backend='nccl')
|
| 99 |
+
log_level = 'INFO'
|
| 100 |
+
load_from = None
|
| 101 |
+
resume_from = None
|
| 102 |
+
workflow = [('train', 1)]
|
| 103 |
+
cudnn_benchmark = True
|
| 104 |
+
optimizer = dict(
|
| 105 |
+
type='AdamW',
|
| 106 |
+
lr=1.5e-05,
|
| 107 |
+
betas=(0.9, 0.999),
|
| 108 |
+
weight_decay=0.05,
|
| 109 |
+
constructor='InternViTAdapterLayerDecayOptimizerConstructor',
|
| 110 |
+
paramwise_cfg=dict(num_layers=24, layer_decay_rate=0.9))
|
| 111 |
+
optimizer_config = dict()
|
| 112 |
+
lr_config = dict(
|
| 113 |
+
policy='CosineAnnealing',
|
| 114 |
+
warmup='linear',
|
| 115 |
+
warmup_iters=1500,
|
| 116 |
+
warmup_ratio=1e-06,
|
| 117 |
+
min_lr=0.0)
|
| 118 |
+
runner = dict(type='IterBasedRunner', max_iters=80000)
|
| 119 |
+
checkpoint_config = dict(by_epoch=False, interval=8000, max_keep_ckpts=3)
|
| 120 |
+
evaluation = dict(interval=4000, metric='mIoU', pre_eval=True, metrics='mIoU')
|
| 121 |
+
val_pipeline = [
|
| 122 |
+
dict(type='LoadImageFromFile'),
|
| 123 |
+
dict(
|
| 124 |
+
type='MultiScaleFlipAug',
|
| 125 |
+
img_scale=(896, 896),
|
| 126 |
+
flip=True,
|
| 127 |
+
transforms=[
|
| 128 |
+
dict(type='Resize', keep_ratio=True),
|
| 129 |
+
dict(type='RandomFlip', prob=1.0),
|
| 130 |
+
dict(type='Pad', size=(896, 896), pad_val=0, seg_pad_val=255),
|
| 131 |
+
dict(
|
| 132 |
+
type='Normalize',
|
| 133 |
+
mean=[123.675, 116.28, 103.53],
|
| 134 |
+
std=[58.395, 57.12, 57.375],
|
| 135 |
+
to_rgb=True),
|
| 136 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 137 |
+
dict(type='Collect', keys=['img'])
|
| 138 |
+
])
|
| 139 |
+
]
|
| 140 |
+
default_hooks = dict(
|
| 141 |
+
timer=dict(type='IterTimerHook'),
|
| 142 |
+
logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
|
| 143 |
+
sampler_seed=dict(type='DistSamplerSeedHook'))
|
| 144 |
+
pretrained = 'pretrained/ViTP_ViT_L_300M_rs.safetensors'
|
| 145 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
| 146 |
+
model = dict(
|
| 147 |
+
type='EncoderDecoder',
|
| 148 |
+
backbone=dict(
|
| 149 |
+
type='InternViTAdapter',
|
| 150 |
+
pretrain_size=448,
|
| 151 |
+
img_size=896,
|
| 152 |
+
patch_size=16,
|
| 153 |
+
embed_dim=1024,
|
| 154 |
+
depth=24,
|
| 155 |
+
num_heads=16,
|
| 156 |
+
mlp_ratio=4.0,
|
| 157 |
+
drop_path_rate=0.1,
|
| 158 |
+
init_values=0.1,
|
| 159 |
+
with_cp=True,
|
| 160 |
+
use_flash_attn=True,
|
| 161 |
+
qk_normalization=False,
|
| 162 |
+
layerscale_force_fp32=False,
|
| 163 |
+
with_fpn=False,
|
| 164 |
+
freeze_vit=False,
|
| 165 |
+
use_final_norm=True,
|
| 166 |
+
interaction_indexes=[[0, 7], [8, 11], [12, 15], [16, 23]],
|
| 167 |
+
cffn_ratio=0.25,
|
| 168 |
+
deform_ratio=0.25,
|
| 169 |
+
qkv_bias=True,
|
| 170 |
+
norm_type='layer_norm',
|
| 171 |
+
pretrained='pretrained/ViTP_ViT_L_300M_rs.safetensors',
|
| 172 |
+
pretrained_type='full'),
|
| 173 |
+
decode_head=dict(
|
| 174 |
+
type='UPerHead',
|
| 175 |
+
in_channels=[1024, 1024, 1024, 1024],
|
| 176 |
+
num_classes=16,
|
| 177 |
+
ignore_index=255,
|
| 178 |
+
in_index=[0, 1, 2, 3],
|
| 179 |
+
pool_scales=(1, 2, 3, 6),
|
| 180 |
+
channels=512,
|
| 181 |
+
dropout_ratio=0.1,
|
| 182 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 183 |
+
align_corners=False,
|
| 184 |
+
loss_decode=dict(
|
| 185 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
| 186 |
+
train_cfg=dict(),
|
| 187 |
+
test_cfg=dict(mode='slide', crop_size=crop_size, stride=(crop_size[0]//2, crop_size[1]//2)))
|
| 188 |
+
fp16 = dict(loss_scale=dict(init_scale=512))
|
| 189 |
+
randomness = dict(seed=3407)
|
| 190 |
+
work_dir = './work_dirs/ViTP_isaid_upernet'
|
| 191 |
+
gpu_ids = range(0, 8)
|
| 192 |
+
auto_resume = False
|
ckpts/vitp_isaid_upernet_7114/eval_20250921_141413.json
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"config": "./configs/internvit/upernet_internvit_adp_80k_isaid_cos_ldr90.py",
|
| 3 |
+
"metric": {
|
| 4 |
+
"aAcc": 0.9912000000000001,
|
| 5 |
+
"mIoU": 0.7114,
|
| 6 |
+
"mAcc": 0.7913,
|
| 7 |
+
"IoU.background": 0.9919000244140626,
|
| 8 |
+
"IoU.ship": 0.7715000152587891,
|
| 9 |
+
"IoU.store_tank": 0.7601000213623047,
|
| 10 |
+
"IoU.baseball_diamond": 0.8194000244140625,
|
| 11 |
+
"IoU.tennis_court": 0.9094000244140625,
|
| 12 |
+
"IoU.basketball_court": 0.7026000213623047,
|
| 13 |
+
"IoU.Ground_Track_Field": 0.65,
|
| 14 |
+
"IoU.Bridge": 0.494900016784668,
|
| 15 |
+
"IoU.Large_Vehicle": 0.7023999786376953,
|
| 16 |
+
"IoU.Small_Vehicle": 0.560099983215332,
|
| 17 |
+
"IoU.Helicopter": 0.44459999084472657,
|
| 18 |
+
"IoU.Swimming_pool": 0.5159999847412109,
|
| 19 |
+
"IoU.Roundabout": 0.7868000030517578,
|
| 20 |
+
"IoU.Soccer_ball_field": 0.7841999816894532,
|
| 21 |
+
"IoU.plane": 0.8708999633789063,
|
| 22 |
+
"IoU.Harbor": 0.6168999862670899,
|
| 23 |
+
"Acc.background": 0.9976000213623046,
|
| 24 |
+
"Acc.ship": 0.856500015258789,
|
| 25 |
+
"Acc.store_tank": 0.8390000152587891,
|
| 26 |
+
"Acc.baseball_diamond": 0.8686000061035156,
|
| 27 |
+
"Acc.tennis_court": 0.9333000183105469,
|
| 28 |
+
"Acc.basketball_court": 0.8120999908447266,
|
| 29 |
+
"Acc.Ground_Track_Field": 0.7269999694824218,
|
| 30 |
+
"Acc.Bridge": 0.5695000076293946,
|
| 31 |
+
"Acc.Large_Vehicle": 0.7880999755859375,
|
| 32 |
+
"Acc.Small_Vehicle": 0.6397000122070312,
|
| 33 |
+
"Acc.Helicopter": 0.7390000152587891,
|
| 34 |
+
"Acc.Swimming_pool": 0.5818000030517578,
|
| 35 |
+
"Acc.Roundabout": 0.8454000091552735,
|
| 36 |
+
"Acc.Soccer_ball_field": 0.8494999694824219,
|
| 37 |
+
"Acc.plane": 0.9266000366210938,
|
| 38 |
+
"Acc.Harbor": 0.6873999786376953
|
| 39 |
+
}
|
| 40 |
+
}
|
ckpts/vitp_isaid_upernet_7114/iter_80000.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:71e964b20d73596832f96920fa676404294b52981ecc04824546f0522120e82d
|
| 3 |
+
size 1435132133
|
ckpts/vitp_levir_upernet_7268/20250919_030132/20250919_030132.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ckpts/vitp_levir_upernet_7268/20250919_030132/20250921_105914.log
ADDED
|
@@ -0,0 +1,485 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025/09/21 10:59:18 - mmengine - INFO -
|
| 2 |
+
------------------------------------------------------------
|
| 3 |
+
System environment:
|
| 4 |
+
sys.platform: linux
|
| 5 |
+
Python: 3.10.16 (main, Dec 11 2024, 16:24:50) [GCC 11.2.0]
|
| 6 |
+
CUDA available: True
|
| 7 |
+
MUSA available: False
|
| 8 |
+
numpy_random_seed: 908216666
|
| 9 |
+
GPU 0,1,2,3,4,5,6,7: NVIDIA GeForce RTX 3090
|
| 10 |
+
CUDA_HOME: /mnt/petrelfs/share_data/liqingyun/cuda/cuda-12.4/
|
| 11 |
+
GCC: gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0
|
| 12 |
+
PyTorch: 1.12.0
|
| 13 |
+
PyTorch compiling details: PyTorch built with:
|
| 14 |
+
- GCC 9.3
|
| 15 |
+
- C++ Version: 201402
|
| 16 |
+
- Intel(R) oneAPI Math Kernel Library Version 2024.0-Product Build 20231011 for Intel(R) 64 architecture applications
|
| 17 |
+
- Intel(R) MKL-DNN v2.6.0 (Git Hash 52b5f107dd9cf10910aaa19cb47f3abf9b349815)
|
| 18 |
+
- OpenMP 201511 (a.k.a. OpenMP 4.5)
|
| 19 |
+
- LAPACK is enabled (usually provided by MKL)
|
| 20 |
+
- NNPACK is enabled
|
| 21 |
+
- CPU capability usage: AVX2
|
| 22 |
+
- CUDA Runtime 11.3
|
| 23 |
+
- NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_37,code=compute_37
|
| 24 |
+
- CuDNN 8.3.2 (built against CUDA 11.5)
|
| 25 |
+
- Magma 2.5.2
|
| 26 |
+
- Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.3, CUDNN_VERSION=8.3.2, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.12.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=OFF, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF,
|
| 27 |
+
|
| 28 |
+
TorchVision: 0.13.0
|
| 29 |
+
OpenCV: 4.11.0
|
| 30 |
+
MMEngine: 0.10.7
|
| 31 |
+
|
| 32 |
+
Runtime environment:
|
| 33 |
+
cudnn_benchmark: True
|
| 34 |
+
mp_cfg: {'mp_start_method': 'fork', 'opencv_num_threads': 0}
|
| 35 |
+
dist_cfg: {'backend': 'nccl'}
|
| 36 |
+
seed: 908216666
|
| 37 |
+
Distributed launcher: pytorch
|
| 38 |
+
Distributed training: True
|
| 39 |
+
GPU number: 8
|
| 40 |
+
------------------------------------------------------------
|
| 41 |
+
|
| 42 |
+
2025/09/21 10:59:19 - mmengine - INFO - Config:
|
| 43 |
+
crop_size = (
|
| 44 |
+
256,
|
| 45 |
+
256,
|
| 46 |
+
)
|
| 47 |
+
data_preprocessor = dict(
|
| 48 |
+
bgr_to_rgb=True,
|
| 49 |
+
mean=[
|
| 50 |
+
123.675,
|
| 51 |
+
116.28,
|
| 52 |
+
103.53,
|
| 53 |
+
123.675,
|
| 54 |
+
116.28,
|
| 55 |
+
103.53,
|
| 56 |
+
],
|
| 57 |
+
pad_val=0,
|
| 58 |
+
seg_pad_val=255,
|
| 59 |
+
size_divisor=32,
|
| 60 |
+
std=[
|
| 61 |
+
58.395,
|
| 62 |
+
57.12,
|
| 63 |
+
57.375,
|
| 64 |
+
58.395,
|
| 65 |
+
57.12,
|
| 66 |
+
57.375,
|
| 67 |
+
],
|
| 68 |
+
test_cfg=dict(size_divisor=32),
|
| 69 |
+
type='DualInputSegDataPreProcessor')
|
| 70 |
+
data_root = '/defaultShare/pubdata/remote_sensing/LEVIR-CD-256'
|
| 71 |
+
dataset_type = 'LEVIR_CD_Dataset'
|
| 72 |
+
default_hooks = dict(
|
| 73 |
+
checkpoint=dict(by_epoch=False, interval=8000, type='CheckpointHook'),
|
| 74 |
+
logger=dict(interval=500, log_metric_by_epoch=False, type='LoggerHook'),
|
| 75 |
+
param_scheduler=dict(type='ParamSchedulerHook'),
|
| 76 |
+
sampler_seed=dict(type='DistSamplerSeedHook'),
|
| 77 |
+
timer=dict(type='IterTimerHook'),
|
| 78 |
+
visualization=dict(interval=1, type='CDVisualizationHook'))
|
| 79 |
+
default_scope = 'opencd'
|
| 80 |
+
env_cfg = dict(
|
| 81 |
+
cudnn_benchmark=True,
|
| 82 |
+
dist_cfg=dict(backend='nccl'),
|
| 83 |
+
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
|
| 84 |
+
fp16 = dict(loss_scale=dict(init_scale=512))
|
| 85 |
+
img_ratios = [
|
| 86 |
+
0.75,
|
| 87 |
+
1.0,
|
| 88 |
+
1.25,
|
| 89 |
+
]
|
| 90 |
+
launcher = 'pytorch'
|
| 91 |
+
load_from = './work_dirs/upernet_internvit_adp_levir/iter_80000.pth'
|
| 92 |
+
log_level = 'INFO'
|
| 93 |
+
log_processor = dict(by_epoch=False)
|
| 94 |
+
model = dict(
|
| 95 |
+
backbone=dict(
|
| 96 |
+
cffn_ratio=0.25,
|
| 97 |
+
deform_ratio=0.25,
|
| 98 |
+
depth=24,
|
| 99 |
+
drop_path_rate=0.1,
|
| 100 |
+
embed_dim=1024,
|
| 101 |
+
freeze_vit=False,
|
| 102 |
+
img_size=256,
|
| 103 |
+
init_values=0.1,
|
| 104 |
+
interaction_indexes=[
|
| 105 |
+
[
|
| 106 |
+
0,
|
| 107 |
+
7,
|
| 108 |
+
],
|
| 109 |
+
[
|
| 110 |
+
8,
|
| 111 |
+
11,
|
| 112 |
+
],
|
| 113 |
+
[
|
| 114 |
+
12,
|
| 115 |
+
15,
|
| 116 |
+
],
|
| 117 |
+
[
|
| 118 |
+
16,
|
| 119 |
+
23,
|
| 120 |
+
],
|
| 121 |
+
],
|
| 122 |
+
layerscale_force_fp32=False,
|
| 123 |
+
mlp_ratio=4.0,
|
| 124 |
+
norm_type='layer_norm',
|
| 125 |
+
num_heads=16,
|
| 126 |
+
patch_size=16,
|
| 127 |
+
pretrain_size=448,
|
| 128 |
+
pretrained=
|
| 129 |
+
'/nfs/liyuxuan/zhangyicheng/mmrotate/pretrained/ft_full_1b_8ksteps_instruct_tuning_as_pretrain_TMAug75.safetensors',
|
| 130 |
+
pretrained_type='full',
|
| 131 |
+
qk_normalization=False,
|
| 132 |
+
qkv_bias=True,
|
| 133 |
+
type='InternViTAdapter',
|
| 134 |
+
use_final_norm=True,
|
| 135 |
+
use_flash_attn=False,
|
| 136 |
+
with_cp=True,
|
| 137 |
+
with_fpn=False),
|
| 138 |
+
data_preprocessor=dict(
|
| 139 |
+
bgr_to_rgb=True,
|
| 140 |
+
mean=[
|
| 141 |
+
123.675,
|
| 142 |
+
116.28,
|
| 143 |
+
103.53,
|
| 144 |
+
123.675,
|
| 145 |
+
116.28,
|
| 146 |
+
103.53,
|
| 147 |
+
],
|
| 148 |
+
pad_val=0,
|
| 149 |
+
seg_pad_val=255,
|
| 150 |
+
size_divisor=32,
|
| 151 |
+
std=[
|
| 152 |
+
58.395,
|
| 153 |
+
57.12,
|
| 154 |
+
57.375,
|
| 155 |
+
58.395,
|
| 156 |
+
57.12,
|
| 157 |
+
57.375,
|
| 158 |
+
],
|
| 159 |
+
test_cfg=dict(size_divisor=32),
|
| 160 |
+
type='DualInputSegDataPreProcessor'),
|
| 161 |
+
decode_head=dict(
|
| 162 |
+
align_corners=False,
|
| 163 |
+
channels=1024,
|
| 164 |
+
dropout_ratio=0.1,
|
| 165 |
+
in_channels=[
|
| 166 |
+
2048,
|
| 167 |
+
2048,
|
| 168 |
+
2048,
|
| 169 |
+
2048,
|
| 170 |
+
],
|
| 171 |
+
in_index=[
|
| 172 |
+
0,
|
| 173 |
+
1,
|
| 174 |
+
2,
|
| 175 |
+
3,
|
| 176 |
+
],
|
| 177 |
+
loss_decode=dict(
|
| 178 |
+
loss_weight=1.0, type='mmseg.CrossEntropyLoss', use_sigmoid=False),
|
| 179 |
+
norm_cfg=dict(requires_grad=True, type='SyncBN'),
|
| 180 |
+
num_classes=2,
|
| 181 |
+
pool_scales=(
|
| 182 |
+
1,
|
| 183 |
+
2,
|
| 184 |
+
3,
|
| 185 |
+
6,
|
| 186 |
+
),
|
| 187 |
+
type='mmseg.UPerHead'),
|
| 188 |
+
neck=dict(policy='concat', type='FeatureFusionNeck'),
|
| 189 |
+
test_cfg=dict(crop_size=(
|
| 190 |
+
256,
|
| 191 |
+
256,
|
| 192 |
+
), mode='slide', stride=(
|
| 193 |
+
128,
|
| 194 |
+
128,
|
| 195 |
+
)),
|
| 196 |
+
train_cfg=dict(),
|
| 197 |
+
type='SiamEncoderDecoder')
|
| 198 |
+
norm_cfg = dict(requires_grad=True, type='SyncBN')
|
| 199 |
+
optim_wrapper = dict(
|
| 200 |
+
clip_grad=None,
|
| 201 |
+
constructor='InternViTAdapterLayerDecayOptimizerConstructor',
|
| 202 |
+
optimizer=dict(
|
| 203 |
+
betas=(
|
| 204 |
+
0.9,
|
| 205 |
+
0.999,
|
| 206 |
+
), lr=0.0001, type='AdamW', weight_decay=0.05),
|
| 207 |
+
paramwise_cfg=dict(layer_decay_rate=0.9, num_layers=24),
|
| 208 |
+
type='OptimWrapper')
|
| 209 |
+
optimizer = dict(lr=0.01, momentum=0.9, type='SGD', weight_decay=0.0005)
|
| 210 |
+
param_scheduler = [
|
| 211 |
+
dict(
|
| 212 |
+
begin=0, by_epoch=False, end=1500, start_factor=1e-06,
|
| 213 |
+
type='LinearLR'),
|
| 214 |
+
dict(
|
| 215 |
+
T_max=78500,
|
| 216 |
+
begin=1500,
|
| 217 |
+
by_epoch=False,
|
| 218 |
+
end=80000,
|
| 219 |
+
eta_min=0.0,
|
| 220 |
+
type='CosineAnnealingLR'),
|
| 221 |
+
]
|
| 222 |
+
pretrained = '/nfs/liyuxuan/zhangyicheng/mmrotate/pretrained/ft_full_1b_8ksteps_instruct_tuning_as_pretrain_TMAug75.safetensors'
|
| 223 |
+
resume = False
|
| 224 |
+
test_cfg = dict(type='TestLoop')
|
| 225 |
+
test_dataloader = dict(
|
| 226 |
+
batch_size=1,
|
| 227 |
+
dataset=dict(
|
| 228 |
+
data_prefix=dict(
|
| 229 |
+
img_path_from='A', img_path_to='B', seg_map_path='label'),
|
| 230 |
+
data_root='/defaultShare/pubdata/remote_sensing/LEVIR_CD/test',
|
| 231 |
+
pipeline=[
|
| 232 |
+
dict(type='MultiImgLoadImageFromFile'),
|
| 233 |
+
dict(type='MultiImgLoadAnnotations'),
|
| 234 |
+
dict(type='MultiImgPackSegInputs'),
|
| 235 |
+
],
|
| 236 |
+
type='LEVIR_CD_Dataset'),
|
| 237 |
+
num_workers=8,
|
| 238 |
+
persistent_workers=True,
|
| 239 |
+
sampler=dict(shuffle=False, type='DefaultSampler'))
|
| 240 |
+
test_evaluator = dict(
|
| 241 |
+
iou_metrics=[
|
| 242 |
+
'mFscore',
|
| 243 |
+
'mIoU',
|
| 244 |
+
], type='mmseg.IoUMetric')
|
| 245 |
+
test_pipeline = [
|
| 246 |
+
dict(type='MultiImgLoadImageFromFile'),
|
| 247 |
+
dict(type='MultiImgLoadAnnotations'),
|
| 248 |
+
dict(type='MultiImgPackSegInputs'),
|
| 249 |
+
]
|
| 250 |
+
train_cfg = dict(max_iters=80000, type='IterBasedTrainLoop', val_interval=8000)
|
| 251 |
+
train_dataloader = dict(
|
| 252 |
+
batch_size=4,
|
| 253 |
+
dataset=dict(
|
| 254 |
+
ann_file='list/train.txt',
|
| 255 |
+
data_prefix=dict(
|
| 256 |
+
img_path_from='A', img_path_to='B', seg_map_path='label'),
|
| 257 |
+
data_root='/defaultShare/pubdata/remote_sensing/LEVIR-CD-256',
|
| 258 |
+
pipeline=[
|
| 259 |
+
dict(type='MultiImgLoadImageFromFile'),
|
| 260 |
+
dict(type='MultiImgLoadAnnotations'),
|
| 261 |
+
dict(degree=180, prob=0.5, type='MultiImgRandomRotate'),
|
| 262 |
+
dict(
|
| 263 |
+
cat_max_ratio=0.75,
|
| 264 |
+
crop_size=(
|
| 265 |
+
256,
|
| 266 |
+
256,
|
| 267 |
+
),
|
| 268 |
+
type='MultiImgRandomCrop'),
|
| 269 |
+
dict(direction='horizontal', prob=0.5, type='MultiImgRandomFlip'),
|
| 270 |
+
dict(direction='vertical', prob=0.5, type='MultiImgRandomFlip'),
|
| 271 |
+
dict(prob=0.5, type='MultiImgExchangeTime'),
|
| 272 |
+
dict(
|
| 273 |
+
brightness_delta=10,
|
| 274 |
+
contrast_range=(
|
| 275 |
+
0.8,
|
| 276 |
+
1.2,
|
| 277 |
+
),
|
| 278 |
+
hue_delta=10,
|
| 279 |
+
saturation_range=(
|
| 280 |
+
0.8,
|
| 281 |
+
1.2,
|
| 282 |
+
),
|
| 283 |
+
type='MultiImgPhotoMetricDistortion'),
|
| 284 |
+
dict(type='MultiImgPackSegInputs'),
|
| 285 |
+
],
|
| 286 |
+
type='LEVIR_CD_Dataset'),
|
| 287 |
+
num_workers=8,
|
| 288 |
+
persistent_workers=True,
|
| 289 |
+
sampler=dict(shuffle=True, type='DefaultSampler'))
|
| 290 |
+
train_pipeline = [
|
| 291 |
+
dict(type='MultiImgLoadImageFromFile'),
|
| 292 |
+
dict(type='MultiImgLoadAnnotations'),
|
| 293 |
+
dict(degree=180, prob=0.5, type='MultiImgRandomRotate'),
|
| 294 |
+
dict(
|
| 295 |
+
cat_max_ratio=0.75, crop_size=(
|
| 296 |
+
256,
|
| 297 |
+
256,
|
| 298 |
+
), type='MultiImgRandomCrop'),
|
| 299 |
+
dict(direction='horizontal', prob=0.5, type='MultiImgRandomFlip'),
|
| 300 |
+
dict(direction='vertical', prob=0.5, type='MultiImgRandomFlip'),
|
| 301 |
+
dict(prob=0.5, type='MultiImgExchangeTime'),
|
| 302 |
+
dict(
|
| 303 |
+
brightness_delta=10,
|
| 304 |
+
contrast_range=(
|
| 305 |
+
0.8,
|
| 306 |
+
1.2,
|
| 307 |
+
),
|
| 308 |
+
hue_delta=10,
|
| 309 |
+
saturation_range=(
|
| 310 |
+
0.8,
|
| 311 |
+
1.2,
|
| 312 |
+
),
|
| 313 |
+
type='MultiImgPhotoMetricDistortion'),
|
| 314 |
+
dict(type='MultiImgPackSegInputs'),
|
| 315 |
+
]
|
| 316 |
+
tta_model = dict(type='mmseg.SegTTAModel')
|
| 317 |
+
tta_pipeline = [
|
| 318 |
+
dict(backend_args=None, type='MultiImgLoadImageFromFile'),
|
| 319 |
+
dict(
|
| 320 |
+
transforms=[
|
| 321 |
+
[
|
| 322 |
+
dict(
|
| 323 |
+
keep_ratio=True, scale_factor=0.75, type='MultiImgResize'),
|
| 324 |
+
dict(keep_ratio=True, scale_factor=1.0, type='MultiImgResize'),
|
| 325 |
+
dict(
|
| 326 |
+
keep_ratio=True, scale_factor=1.25, type='MultiImgResize'),
|
| 327 |
+
],
|
| 328 |
+
[
|
| 329 |
+
dict(
|
| 330 |
+
direction='horizontal',
|
| 331 |
+
prob=0.0,
|
| 332 |
+
type='MultiImgRandomFlip'),
|
| 333 |
+
dict(
|
| 334 |
+
direction='horizontal',
|
| 335 |
+
prob=1.0,
|
| 336 |
+
type='MultiImgRandomFlip'),
|
| 337 |
+
],
|
| 338 |
+
[
|
| 339 |
+
dict(type='MultiImgLoadAnnotations'),
|
| 340 |
+
],
|
| 341 |
+
[
|
| 342 |
+
dict(type='MultiImgPackSegInputs'),
|
| 343 |
+
],
|
| 344 |
+
],
|
| 345 |
+
type='TestTimeAug'),
|
| 346 |
+
]
|
| 347 |
+
val_cfg = dict(type='ValLoop')
|
| 348 |
+
val_dataloader = dict(
|
| 349 |
+
batch_size=1,
|
| 350 |
+
dataset=dict(
|
| 351 |
+
ann_file='list/test.txt',
|
| 352 |
+
data_prefix=dict(
|
| 353 |
+
img_path_from='A', img_path_to='B', seg_map_path='label'),
|
| 354 |
+
data_root='/defaultShare/pubdata/remote_sensing/LEVIR-CD-256',
|
| 355 |
+
pipeline=[
|
| 356 |
+
dict(type='MultiImgLoadImageFromFile'),
|
| 357 |
+
dict(type='MultiImgLoadAnnotations'),
|
| 358 |
+
dict(type='MultiImgPackSegInputs'),
|
| 359 |
+
],
|
| 360 |
+
type='LEVIR_CD_Dataset'),
|
| 361 |
+
num_workers=8,
|
| 362 |
+
persistent_workers=True,
|
| 363 |
+
sampler=dict(shuffle=False, type='DefaultSampler'))
|
| 364 |
+
val_evaluator = dict(
|
| 365 |
+
iou_metrics=[
|
| 366 |
+
'mFscore',
|
| 367 |
+
'mIoU',
|
| 368 |
+
], type='mmseg.IoUMetric')
|
| 369 |
+
val_pipeline = [
|
| 370 |
+
dict(type='MultiImgLoadImageFromFile'),
|
| 371 |
+
dict(keep_ratio=True, scale=(
|
| 372 |
+
256,
|
| 373 |
+
256,
|
| 374 |
+
), type='MultiImgResize'),
|
| 375 |
+
dict(type='MultiImgLoadAnnotations'),
|
| 376 |
+
dict(type='MultiImgPackSegInputs'),
|
| 377 |
+
]
|
| 378 |
+
vis_backends = [
|
| 379 |
+
dict(type='CDLocalVisBackend'),
|
| 380 |
+
]
|
| 381 |
+
visualizer = dict(
|
| 382 |
+
alpha=1.0,
|
| 383 |
+
name='visualizer',
|
| 384 |
+
type='CDLocalVisualizer',
|
| 385 |
+
vis_backends=[
|
| 386 |
+
dict(type='CDLocalVisBackend'),
|
| 387 |
+
])
|
| 388 |
+
work_dir = './work_dirs/upernet_internvit_adp_levir'
|
| 389 |
+
|
| 390 |
+
2025/09/21 10:59:27 - mmengine - INFO - Hooks will be executed in the following order:
|
| 391 |
+
before_run:
|
| 392 |
+
(VERY_HIGH ) RuntimeInfoHook
|
| 393 |
+
(BELOW_NORMAL) LoggerHook
|
| 394 |
+
--------------------
|
| 395 |
+
before_train:
|
| 396 |
+
(VERY_HIGH ) RuntimeInfoHook
|
| 397 |
+
(NORMAL ) IterTimerHook
|
| 398 |
+
(VERY_LOW ) CheckpointHook
|
| 399 |
+
--------------------
|
| 400 |
+
before_train_epoch:
|
| 401 |
+
(VERY_HIGH ) RuntimeInfoHook
|
| 402 |
+
(NORMAL ) IterTimerHook
|
| 403 |
+
(NORMAL ) DistSamplerSeedHook
|
| 404 |
+
--------------------
|
| 405 |
+
before_train_iter:
|
| 406 |
+
(VERY_HIGH ) RuntimeInfoHook
|
| 407 |
+
(NORMAL ) IterTimerHook
|
| 408 |
+
--------------------
|
| 409 |
+
after_train_iter:
|
| 410 |
+
(VERY_HIGH ) RuntimeInfoHook
|
| 411 |
+
(NORMAL ) IterTimerHook
|
| 412 |
+
(NORMAL ) CDVisualizationHook
|
| 413 |
+
(BELOW_NORMAL) LoggerHook
|
| 414 |
+
(LOW ) ParamSchedulerHook
|
| 415 |
+
(VERY_LOW ) CheckpointHook
|
| 416 |
+
--------------------
|
| 417 |
+
after_train_epoch:
|
| 418 |
+
(NORMAL ) IterTimerHook
|
| 419 |
+
(LOW ) ParamSchedulerHook
|
| 420 |
+
(VERY_LOW ) CheckpointHook
|
| 421 |
+
--------------------
|
| 422 |
+
before_val:
|
| 423 |
+
(VERY_HIGH ) RuntimeInfoHook
|
| 424 |
+
--------------------
|
| 425 |
+
before_val_epoch:
|
| 426 |
+
(NORMAL ) IterTimerHook
|
| 427 |
+
--------------------
|
| 428 |
+
before_val_iter:
|
| 429 |
+
(NORMAL ) IterTimerHook
|
| 430 |
+
--------------------
|
| 431 |
+
after_val_iter:
|
| 432 |
+
(NORMAL ) IterTimerHook
|
| 433 |
+
(NORMAL ) CDVisualizationHook
|
| 434 |
+
(BELOW_NORMAL) LoggerHook
|
| 435 |
+
--------------------
|
| 436 |
+
after_val_epoch:
|
| 437 |
+
(VERY_HIGH ) RuntimeInfoHook
|
| 438 |
+
(NORMAL ) IterTimerHook
|
| 439 |
+
(BELOW_NORMAL) LoggerHook
|
| 440 |
+
(LOW ) ParamSchedulerHook
|
| 441 |
+
(VERY_LOW ) CheckpointHook
|
| 442 |
+
--------------------
|
| 443 |
+
after_val:
|
| 444 |
+
(VERY_HIGH ) RuntimeInfoHook
|
| 445 |
+
--------------------
|
| 446 |
+
after_train:
|
| 447 |
+
(VERY_HIGH ) RuntimeInfoHook
|
| 448 |
+
(VERY_LOW ) CheckpointHook
|
| 449 |
+
--------------------
|
| 450 |
+
before_test:
|
| 451 |
+
(VERY_HIGH ) RuntimeInfoHook
|
| 452 |
+
--------------------
|
| 453 |
+
before_test_epoch:
|
| 454 |
+
(NORMAL ) IterTimerHook
|
| 455 |
+
--------------------
|
| 456 |
+
before_test_iter:
|
| 457 |
+
(NORMAL ) IterTimerHook
|
| 458 |
+
--------------------
|
| 459 |
+
after_test_iter:
|
| 460 |
+
(NORMAL ) IterTimerHook
|
| 461 |
+
(NORMAL ) CDVisualizationHook
|
| 462 |
+
(BELOW_NORMAL) LoggerHook
|
| 463 |
+
--------------------
|
| 464 |
+
after_test_epoch:
|
| 465 |
+
(VERY_HIGH ) RuntimeInfoHook
|
| 466 |
+
(NORMAL ) IterTimerHook
|
| 467 |
+
(BELOW_NORMAL) LoggerHook
|
| 468 |
+
--------------------
|
| 469 |
+
after_test:
|
| 470 |
+
(VERY_HIGH ) RuntimeInfoHook
|
| 471 |
+
--------------------
|
| 472 |
+
after_run:
|
| 473 |
+
(BELOW_NORMAL) LoggerHook
|
| 474 |
+
--------------------
|
| 475 |
+
2025/09/21 10:59:27 - mmengine - WARNING - The prefix is not set in metric class IoUMetric.
|
| 476 |
+
2025/09/21 10:59:34 - mmengine - INFO - Load checkpoint from ./work_dirs/upernet_internvit_adp_levir/iter_80000.pth
|
| 477 |
+
2025/09/21 11:00:38 - mmengine - INFO - per class results:
|
| 478 |
+
2025/09/21 11:00:38 - mmengine - INFO -
|
| 479 |
+
+-----------+--------+-----------+--------+-------+-------+
|
| 480 |
+
| Class | Fscore | Precision | Recall | IoU | Acc |
|
| 481 |
+
+-----------+--------+-----------+--------+-------+-------+
|
| 482 |
+
| unchanged | 99.61 | 99.54 | 99.68 | 99.23 | 99.68 |
|
| 483 |
+
| changed | 92.67 | 93.92 | 91.45 | 86.34 | 91.45 |
|
| 484 |
+
+-----------+--------+-----------+--------+-------+-------+
|
| 485 |
+
2025/09/21 11:00:38 - mmengine - INFO - Iter(test) [16/16] aAcc: 99.2500 mFscore: 96.0900 mPrecision: 96.7300 mRecall: 95.4700 mIoU: 92.7000 mAcc: 95.4700 data_time: 0.0690 time: 3.9244
|
ckpts/vitp_levir_upernet_7268/iter_80000.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:54ac1c9895902db02909169fc1aeb19b7c8732b777a42ad2a3d16876fc53da31
|
| 3 |
+
size 4569761364
|
ckpts/vitp_levir_upernet_7268/upernet_internvit_adp_levir.py
ADDED
|
@@ -0,0 +1,344 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
crop_size = (
|
| 2 |
+
256,
|
| 3 |
+
256,
|
| 4 |
+
)
|
| 5 |
+
data_preprocessor = dict(
|
| 6 |
+
bgr_to_rgb=True,
|
| 7 |
+
mean=[
|
| 8 |
+
123.675,
|
| 9 |
+
116.28,
|
| 10 |
+
103.53,
|
| 11 |
+
123.675,
|
| 12 |
+
116.28,
|
| 13 |
+
103.53,
|
| 14 |
+
],
|
| 15 |
+
pad_val=0,
|
| 16 |
+
seg_pad_val=255,
|
| 17 |
+
size_divisor=32,
|
| 18 |
+
std=[
|
| 19 |
+
58.395,
|
| 20 |
+
57.12,
|
| 21 |
+
57.375,
|
| 22 |
+
58.395,
|
| 23 |
+
57.12,
|
| 24 |
+
57.375,
|
| 25 |
+
],
|
| 26 |
+
test_cfg=dict(size_divisor=32),
|
| 27 |
+
type='DualInputSegDataPreProcessor')
|
| 28 |
+
data_root = '/defaultShare/pubdata/remote_sensing/LEVIR-CD-256'
|
| 29 |
+
dataset_type = 'LEVIR_CD_Dataset'
|
| 30 |
+
default_hooks = dict(
|
| 31 |
+
checkpoint=dict(by_epoch=False, interval=8000, type='CheckpointHook'),
|
| 32 |
+
logger=dict(interval=500, log_metric_by_epoch=False, type='LoggerHook'),
|
| 33 |
+
param_scheduler=dict(type='ParamSchedulerHook'),
|
| 34 |
+
sampler_seed=dict(type='DistSamplerSeedHook'),
|
| 35 |
+
timer=dict(type='IterTimerHook'),
|
| 36 |
+
visualization=dict(interval=1, type='CDVisualizationHook'))
|
| 37 |
+
default_scope = 'opencd'
|
| 38 |
+
env_cfg = dict(
|
| 39 |
+
cudnn_benchmark=True,
|
| 40 |
+
dist_cfg=dict(backend='nccl'),
|
| 41 |
+
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
|
| 42 |
+
fp16 = dict(loss_scale=dict(init_scale=512))
|
| 43 |
+
img_ratios = [
|
| 44 |
+
0.75,
|
| 45 |
+
1.0,
|
| 46 |
+
1.25,
|
| 47 |
+
]
|
| 48 |
+
launcher = 'pytorch'
|
| 49 |
+
load_from = './work_dirs/upernet_internvit_adp_levir_cos/iter_80000.pth'
|
| 50 |
+
log_level = 'INFO'
|
| 51 |
+
log_processor = dict(by_epoch=False)
|
| 52 |
+
model = dict(
|
| 53 |
+
backbone=dict(
|
| 54 |
+
cffn_ratio=0.25,
|
| 55 |
+
deform_ratio=0.25,
|
| 56 |
+
depth=24,
|
| 57 |
+
drop_path_rate=0.1,
|
| 58 |
+
embed_dim=1024,
|
| 59 |
+
freeze_vit=False,
|
| 60 |
+
img_size=256,
|
| 61 |
+
init_values=0.1,
|
| 62 |
+
interaction_indexes=[
|
| 63 |
+
[
|
| 64 |
+
0,
|
| 65 |
+
7,
|
| 66 |
+
],
|
| 67 |
+
[
|
| 68 |
+
8,
|
| 69 |
+
11,
|
| 70 |
+
],
|
| 71 |
+
[
|
| 72 |
+
12,
|
| 73 |
+
15,
|
| 74 |
+
],
|
| 75 |
+
[
|
| 76 |
+
16,
|
| 77 |
+
23,
|
| 78 |
+
],
|
| 79 |
+
],
|
| 80 |
+
layerscale_force_fp32=False,
|
| 81 |
+
mlp_ratio=4.0,
|
| 82 |
+
norm_type='layer_norm',
|
| 83 |
+
num_heads=16,
|
| 84 |
+
patch_size=16,
|
| 85 |
+
pretrain_size=448,
|
| 86 |
+
pretrained='pretrained/ViTP_ViT_L_300M_rs.safetensors',
|
| 87 |
+
pretrained_type='full',
|
| 88 |
+
qk_normalization=False,
|
| 89 |
+
qkv_bias=True,
|
| 90 |
+
type='InternViTAdapter',
|
| 91 |
+
use_final_norm=True,
|
| 92 |
+
use_flash_attn=False,
|
| 93 |
+
with_cp=True,
|
| 94 |
+
with_fpn=False),
|
| 95 |
+
data_preprocessor=dict(
|
| 96 |
+
bgr_to_rgb=True,
|
| 97 |
+
mean=[
|
| 98 |
+
123.675,
|
| 99 |
+
116.28,
|
| 100 |
+
103.53,
|
| 101 |
+
123.675,
|
| 102 |
+
116.28,
|
| 103 |
+
103.53,
|
| 104 |
+
],
|
| 105 |
+
pad_val=0,
|
| 106 |
+
seg_pad_val=255,
|
| 107 |
+
size_divisor=32,
|
| 108 |
+
std=[
|
| 109 |
+
58.395,
|
| 110 |
+
57.12,
|
| 111 |
+
57.375,
|
| 112 |
+
58.395,
|
| 113 |
+
57.12,
|
| 114 |
+
57.375,
|
| 115 |
+
],
|
| 116 |
+
test_cfg=dict(size_divisor=32),
|
| 117 |
+
type='DualInputSegDataPreProcessor'),
|
| 118 |
+
decode_head=dict(
|
| 119 |
+
align_corners=False,
|
| 120 |
+
channels=1024,
|
| 121 |
+
dropout_ratio=0.1,
|
| 122 |
+
in_channels=[
|
| 123 |
+
2048,
|
| 124 |
+
2048,
|
| 125 |
+
2048,
|
| 126 |
+
2048,
|
| 127 |
+
],
|
| 128 |
+
in_index=[
|
| 129 |
+
0,
|
| 130 |
+
1,
|
| 131 |
+
2,
|
| 132 |
+
3,
|
| 133 |
+
],
|
| 134 |
+
loss_decode=dict(
|
| 135 |
+
loss_weight=1.0, type='mmseg.CrossEntropyLoss', use_sigmoid=False),
|
| 136 |
+
norm_cfg=dict(requires_grad=True, type='SyncBN'),
|
| 137 |
+
num_classes=2,
|
| 138 |
+
pool_scales=(
|
| 139 |
+
1,
|
| 140 |
+
2,
|
| 141 |
+
3,
|
| 142 |
+
6,
|
| 143 |
+
),
|
| 144 |
+
type='mmseg.UPerHead'),
|
| 145 |
+
neck=dict(policy='concat', type='FeatureFusionNeck'),
|
| 146 |
+
test_cfg=dict(crop_size=(
|
| 147 |
+
256,
|
| 148 |
+
256,
|
| 149 |
+
), mode='slide', stride=(
|
| 150 |
+
128,
|
| 151 |
+
128,
|
| 152 |
+
)),
|
| 153 |
+
train_cfg=dict(),
|
| 154 |
+
type='SiamEncoderDecoder')
|
| 155 |
+
norm_cfg = dict(requires_grad=True, type='SyncBN')
|
| 156 |
+
optim_wrapper = dict(
|
| 157 |
+
clip_grad=None,
|
| 158 |
+
constructor='InternViTAdapterLayerDecayOptimizerConstructor',
|
| 159 |
+
optimizer=dict(
|
| 160 |
+
betas=(
|
| 161 |
+
0.9,
|
| 162 |
+
0.999,
|
| 163 |
+
), lr=0.0001, type='AdamW', weight_decay=0.05),
|
| 164 |
+
paramwise_cfg=dict(layer_decay_rate=0.9, num_layers=24),
|
| 165 |
+
type='OptimWrapper')
|
| 166 |
+
optimizer = dict(lr=0.01, momentum=0.9, type='SGD', weight_decay=0.0005)
|
| 167 |
+
param_scheduler = [
|
| 168 |
+
dict(
|
| 169 |
+
begin=0, by_epoch=False, end=1500, start_factor=1e-06,
|
| 170 |
+
type='LinearLR'),
|
| 171 |
+
dict(
|
| 172 |
+
T_max=78500,
|
| 173 |
+
begin=1500,
|
| 174 |
+
by_epoch=False,
|
| 175 |
+
end=80000,
|
| 176 |
+
eta_min=0.0,
|
| 177 |
+
type='CosineAnnealingLR'),
|
| 178 |
+
]
|
| 179 |
+
resume = False
|
| 180 |
+
test_cfg = dict(type='TestLoop')
|
| 181 |
+
test_dataloader = dict(
|
| 182 |
+
batch_size=1,
|
| 183 |
+
dataset=dict(
|
| 184 |
+
data_prefix=dict(
|
| 185 |
+
img_path_from='A', img_path_to='B', seg_map_path='label'),
|
| 186 |
+
data_root='/defaultShare/pubdata/remote_sensing/LEVIR_CD/test',
|
| 187 |
+
pipeline=[
|
| 188 |
+
dict(type='MultiImgLoadImageFromFile'),
|
| 189 |
+
dict(type='MultiImgLoadAnnotations'),
|
| 190 |
+
dict(type='MultiImgPackSegInputs'),
|
| 191 |
+
],
|
| 192 |
+
type='LEVIR_CD_Dataset'),
|
| 193 |
+
num_workers=8,
|
| 194 |
+
persistent_workers=True,
|
| 195 |
+
sampler=dict(shuffle=False, type='DefaultSampler'))
|
| 196 |
+
test_evaluator = dict(
|
| 197 |
+
iou_metrics=[
|
| 198 |
+
'mFscore',
|
| 199 |
+
'mIoU',
|
| 200 |
+
], type='mmseg.IoUMetric')
|
| 201 |
+
test_pipeline = [
|
| 202 |
+
dict(type='MultiImgLoadImageFromFile'),
|
| 203 |
+
dict(type='MultiImgLoadAnnotations'),
|
| 204 |
+
dict(type='MultiImgPackSegInputs'),
|
| 205 |
+
]
|
| 206 |
+
train_cfg = dict(max_iters=80000, type='IterBasedTrainLoop', val_interval=8000)
|
| 207 |
+
train_dataloader = dict(
|
| 208 |
+
batch_size=4,
|
| 209 |
+
dataset=dict(
|
| 210 |
+
ann_file='list/train.txt',
|
| 211 |
+
data_prefix=dict(
|
| 212 |
+
img_path_from='A', img_path_to='B', seg_map_path='label'),
|
| 213 |
+
data_root='/defaultShare/pubdata/remote_sensing/LEVIR-CD-256',
|
| 214 |
+
pipeline=[
|
| 215 |
+
dict(type='MultiImgLoadImageFromFile'),
|
| 216 |
+
dict(type='MultiImgLoadAnnotations'),
|
| 217 |
+
dict(degree=180, prob=0.5, type='MultiImgRandomRotate'),
|
| 218 |
+
dict(
|
| 219 |
+
cat_max_ratio=0.75,
|
| 220 |
+
crop_size=(
|
| 221 |
+
256,
|
| 222 |
+
256,
|
| 223 |
+
),
|
| 224 |
+
type='MultiImgRandomCrop'),
|
| 225 |
+
dict(direction='horizontal', prob=0.5, type='MultiImgRandomFlip'),
|
| 226 |
+
dict(direction='vertical', prob=0.5, type='MultiImgRandomFlip'),
|
| 227 |
+
dict(prob=0.5, type='MultiImgExchangeTime'),
|
| 228 |
+
dict(
|
| 229 |
+
brightness_delta=10,
|
| 230 |
+
contrast_range=(
|
| 231 |
+
0.8,
|
| 232 |
+
1.2,
|
| 233 |
+
),
|
| 234 |
+
hue_delta=10,
|
| 235 |
+
saturation_range=(
|
| 236 |
+
0.8,
|
| 237 |
+
1.2,
|
| 238 |
+
),
|
| 239 |
+
type='MultiImgPhotoMetricDistortion'),
|
| 240 |
+
dict(type='MultiImgPackSegInputs'),
|
| 241 |
+
],
|
| 242 |
+
type='LEVIR_CD_Dataset'),
|
| 243 |
+
num_workers=8,
|
| 244 |
+
persistent_workers=True,
|
| 245 |
+
sampler=dict(shuffle=True, type='DefaultSampler'))
|
| 246 |
+
train_pipeline = [
|
| 247 |
+
dict(type='MultiImgLoadImageFromFile'),
|
| 248 |
+
dict(type='MultiImgLoadAnnotations'),
|
| 249 |
+
dict(degree=180, prob=0.5, type='MultiImgRandomRotate'),
|
| 250 |
+
dict(
|
| 251 |
+
cat_max_ratio=0.75, crop_size=(
|
| 252 |
+
256,
|
| 253 |
+
256,
|
| 254 |
+
), type='MultiImgRandomCrop'),
|
| 255 |
+
dict(direction='horizontal', prob=0.5, type='MultiImgRandomFlip'),
|
| 256 |
+
dict(direction='vertical', prob=0.5, type='MultiImgRandomFlip'),
|
| 257 |
+
dict(prob=0.5, type='MultiImgExchangeTime'),
|
| 258 |
+
dict(
|
| 259 |
+
brightness_delta=10,
|
| 260 |
+
contrast_range=(
|
| 261 |
+
0.8,
|
| 262 |
+
1.2,
|
| 263 |
+
),
|
| 264 |
+
hue_delta=10,
|
| 265 |
+
saturation_range=(
|
| 266 |
+
0.8,
|
| 267 |
+
1.2,
|
| 268 |
+
),
|
| 269 |
+
type='MultiImgPhotoMetricDistortion'),
|
| 270 |
+
dict(type='MultiImgPackSegInputs'),
|
| 271 |
+
]
|
| 272 |
+
tta_model = dict(type='mmseg.SegTTAModel')
|
| 273 |
+
tta_pipeline = [
|
| 274 |
+
dict(backend_args=None, type='MultiImgLoadImageFromFile'),
|
| 275 |
+
dict(
|
| 276 |
+
transforms=[
|
| 277 |
+
[
|
| 278 |
+
dict(
|
| 279 |
+
keep_ratio=True, scale_factor=0.75, type='MultiImgResize'),
|
| 280 |
+
dict(keep_ratio=True, scale_factor=1.0, type='MultiImgResize'),
|
| 281 |
+
dict(
|
| 282 |
+
keep_ratio=True, scale_factor=1.25, type='MultiImgResize'),
|
| 283 |
+
],
|
| 284 |
+
[
|
| 285 |
+
dict(
|
| 286 |
+
direction='horizontal',
|
| 287 |
+
prob=0.0,
|
| 288 |
+
type='MultiImgRandomFlip'),
|
| 289 |
+
dict(
|
| 290 |
+
direction='horizontal',
|
| 291 |
+
prob=1.0,
|
| 292 |
+
type='MultiImgRandomFlip'),
|
| 293 |
+
],
|
| 294 |
+
[
|
| 295 |
+
dict(type='MultiImgLoadAnnotations'),
|
| 296 |
+
],
|
| 297 |
+
[
|
| 298 |
+
dict(type='MultiImgPackSegInputs'),
|
| 299 |
+
],
|
| 300 |
+
],
|
| 301 |
+
type='TestTimeAug'),
|
| 302 |
+
]
|
| 303 |
+
val_cfg = dict(type='ValLoop')
|
| 304 |
+
val_dataloader = dict(
|
| 305 |
+
batch_size=1,
|
| 306 |
+
dataset=dict(
|
| 307 |
+
ann_file='list/test.txt',
|
| 308 |
+
data_prefix=dict(
|
| 309 |
+
img_path_from='A', img_path_to='B', seg_map_path='label'),
|
| 310 |
+
data_root='/defaultShare/pubdata/remote_sensing/LEVIR-CD-256',
|
| 311 |
+
pipeline=[
|
| 312 |
+
dict(type='MultiImgLoadImageFromFile'),
|
| 313 |
+
dict(type='MultiImgLoadAnnotations'),
|
| 314 |
+
dict(type='MultiImgPackSegInputs'),
|
| 315 |
+
],
|
| 316 |
+
type='LEVIR_CD_Dataset'),
|
| 317 |
+
num_workers=8,
|
| 318 |
+
persistent_workers=True,
|
| 319 |
+
sampler=dict(shuffle=False, type='DefaultSampler'))
|
| 320 |
+
val_evaluator = dict(
|
| 321 |
+
iou_metrics=[
|
| 322 |
+
'mFscore',
|
| 323 |
+
'mIoU',
|
| 324 |
+
], type='mmseg.IoUMetric')
|
| 325 |
+
val_pipeline = [
|
| 326 |
+
dict(type='MultiImgLoadImageFromFile'),
|
| 327 |
+
dict(keep_ratio=True, scale=(
|
| 328 |
+
256,
|
| 329 |
+
256,
|
| 330 |
+
), type='MultiImgResize'),
|
| 331 |
+
dict(type='MultiImgLoadAnnotations'),
|
| 332 |
+
dict(type='MultiImgPackSegInputs'),
|
| 333 |
+
]
|
| 334 |
+
vis_backends = [
|
| 335 |
+
dict(type='CDLocalVisBackend'),
|
| 336 |
+
]
|
| 337 |
+
visualizer = dict(
|
| 338 |
+
alpha=1.0,
|
| 339 |
+
name='visualizer',
|
| 340 |
+
type='CDLocalVisualizer',
|
| 341 |
+
vis_backends=[
|
| 342 |
+
dict(type='CDLocalVisBackend'),
|
| 343 |
+
])
|
| 344 |
+
work_dir = './work_dirs/upernet_internvit_adp_levir'
|
ckpts/vitp_loveda_upernet_5428/20250807_180314.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ckpts/vitp_loveda_upernet_5428/20250807_180314.log.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ckpts/vitp_loveda_upernet_5428/iter_80000.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7e6abbbb371c6c42da1d1ba6809404454a8a3bdbf14e27390f66ed040cfd5648
|
| 3 |
+
size 1426461989
|
ckpts/vitp_loveda_upernet_5428/vitp_loveda_upernet.py
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_type = 'LoveDADataset'
|
| 2 |
+
data_root = '/defaultShare/pubdata/remote_sensing/loveda_dataset'
|
| 3 |
+
img_norm_cfg = dict(
|
| 4 |
+
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
| 5 |
+
crop_size = (512, 512)
|
| 6 |
+
train_pipeline = [
|
| 7 |
+
dict(type='LoadImageFromFile'),
|
| 8 |
+
dict(type='LoadAnnotations', reduce_zero_label=True),
|
| 9 |
+
dict(
|
| 10 |
+
type='Resize',
|
| 11 |
+
img_scale=(512, 512),
|
| 12 |
+
ratio_range=(0.5, 2.0),
|
| 13 |
+
keep_ratio=True),
|
| 14 |
+
dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
|
| 15 |
+
dict(type='RandomFlip', prob=0.5),
|
| 16 |
+
dict(type='PhotoMetricDistortion'),
|
| 17 |
+
dict(
|
| 18 |
+
type='Normalize',
|
| 19 |
+
mean=[123.675, 116.28, 103.53],
|
| 20 |
+
std=[58.395, 57.12, 57.375],
|
| 21 |
+
to_rgb=True),
|
| 22 |
+
dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255),
|
| 23 |
+
dict(type='DefaultFormatBundle'),
|
| 24 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 25 |
+
]
|
| 26 |
+
test_pipeline = [
|
| 27 |
+
dict(type='LoadImageFromFile'),
|
| 28 |
+
dict(
|
| 29 |
+
type='MultiScaleFlipAug',
|
| 30 |
+
img_scale=(512, 512),
|
| 31 |
+
flip=False,
|
| 32 |
+
transforms=[
|
| 33 |
+
dict(type='Resize', keep_ratio=True),
|
| 34 |
+
dict(type='RandomFlip'),
|
| 35 |
+
dict(
|
| 36 |
+
type='Normalize',
|
| 37 |
+
mean=[123.675, 116.28, 103.53],
|
| 38 |
+
std=[58.395, 57.12, 57.375],
|
| 39 |
+
to_rgb=True),
|
| 40 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 41 |
+
dict(type='Collect', keys=['img'])
|
| 42 |
+
])
|
| 43 |
+
]
|
| 44 |
+
data = dict(
|
| 45 |
+
samples_per_gpu=1,
|
| 46 |
+
workers_per_gpu=4,
|
| 47 |
+
train=dict(
|
| 48 |
+
type='LoveDADataset',
|
| 49 |
+
data_root='/defaultShare/pubdata/remote_sensing/loveda_dataset',
|
| 50 |
+
img_dir='trainval/images',
|
| 51 |
+
ann_dir='trainval/labels',
|
| 52 |
+
pipeline=[
|
| 53 |
+
dict(type='LoadImageFromFile'),
|
| 54 |
+
dict(type='LoadAnnotations', reduce_zero_label=True),
|
| 55 |
+
dict(
|
| 56 |
+
type='Resize',
|
| 57 |
+
img_scale=(512, 512),
|
| 58 |
+
ratio_range=(0.5, 2.0),
|
| 59 |
+
keep_ratio=True),
|
| 60 |
+
dict(type='RandomCrop', crop_size=(512, 512), cat_max_ratio=0.75),
|
| 61 |
+
dict(type='RandomFlip', prob=0.5),
|
| 62 |
+
dict(type='PhotoMetricDistortion'),
|
| 63 |
+
dict(
|
| 64 |
+
type='Normalize',
|
| 65 |
+
mean=[123.675, 116.28, 103.53],
|
| 66 |
+
std=[58.395, 57.12, 57.375],
|
| 67 |
+
to_rgb=True),
|
| 68 |
+
dict(type='Pad', size=(512, 512), pad_val=0, seg_pad_val=255),
|
| 69 |
+
dict(type='DefaultFormatBundle'),
|
| 70 |
+
dict(type='Collect', keys=['img', 'gt_semantic_seg'])
|
| 71 |
+
]),
|
| 72 |
+
val=dict(
|
| 73 |
+
type='LoveDADataset',
|
| 74 |
+
data_root='/defaultShare/pubdata/remote_sensing/loveda_dataset',
|
| 75 |
+
img_dir='val/images',
|
| 76 |
+
ann_dir='val/labels',
|
| 77 |
+
pipeline=[
|
| 78 |
+
dict(type='LoadImageFromFile'),
|
| 79 |
+
dict(
|
| 80 |
+
type='MultiScaleFlipAug',
|
| 81 |
+
img_scale=(512, 512),
|
| 82 |
+
flip=False,
|
| 83 |
+
transforms=[
|
| 84 |
+
dict(type='Resize', keep_ratio=True),
|
| 85 |
+
dict(type='RandomFlip'),
|
| 86 |
+
dict(
|
| 87 |
+
type='Normalize',
|
| 88 |
+
mean=[123.675, 116.28, 103.53],
|
| 89 |
+
std=[58.395, 57.12, 57.375],
|
| 90 |
+
to_rgb=True),
|
| 91 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 92 |
+
dict(type='Collect', keys=['img'])
|
| 93 |
+
])
|
| 94 |
+
]),
|
| 95 |
+
test=dict(
|
| 96 |
+
type='LoveDADataset',
|
| 97 |
+
data_root='/defaultShare/pubdata/remote_sensing/loveda_dataset',
|
| 98 |
+
img_dir='test/images',
|
| 99 |
+
ann_dir='test/labels',
|
| 100 |
+
pipeline=[
|
| 101 |
+
dict(type='LoadImageFromFile'),
|
| 102 |
+
dict(
|
| 103 |
+
type='MultiScaleFlipAug',
|
| 104 |
+
img_scale=(512, 512),
|
| 105 |
+
flip=False,
|
| 106 |
+
transforms=[
|
| 107 |
+
dict(type='Resize', keep_ratio=True),
|
| 108 |
+
dict(type='RandomFlip'),
|
| 109 |
+
dict(
|
| 110 |
+
type='Normalize',
|
| 111 |
+
mean=[123.675, 116.28, 103.53],
|
| 112 |
+
std=[58.395, 57.12, 57.375],
|
| 113 |
+
to_rgb=True),
|
| 114 |
+
dict(type='ImageToTensor', keys=['img']),
|
| 115 |
+
dict(type='Collect', keys=['img'])
|
| 116 |
+
])
|
| 117 |
+
]))
|
| 118 |
+
log_config = dict(
|
| 119 |
+
interval=50,
|
| 120 |
+
hooks=[
|
| 121 |
+
dict(type='TextLoggerHook', by_epoch=False),
|
| 122 |
+
dict(type='TensorboardLoggerHook')
|
| 123 |
+
])
|
| 124 |
+
dist_params = dict(backend='nccl')
|
| 125 |
+
log_level = 'INFO'
|
| 126 |
+
load_from = None
|
| 127 |
+
resume_from = None
|
| 128 |
+
workflow = [('train', 1)]
|
| 129 |
+
cudnn_benchmark = True
|
| 130 |
+
optimizer = dict(
|
| 131 |
+
type='AdamW',
|
| 132 |
+
lr=1e-05,
|
| 133 |
+
betas=(0.9, 0.999),
|
| 134 |
+
weight_decay=0.1,
|
| 135 |
+
constructor='InternViTAdapterLayerDecayOptimizerConstructor',
|
| 136 |
+
paramwise_cfg=dict(num_layers=24, layer_decay_rate=0.9))
|
| 137 |
+
optimizer_config = dict()
|
| 138 |
+
lr_config = dict(
|
| 139 |
+
policy='CosineAnnealing',
|
| 140 |
+
warmup='linear',
|
| 141 |
+
warmup_iters=1500,
|
| 142 |
+
warmup_ratio=1e-06,
|
| 143 |
+
min_lr=0.0)
|
| 144 |
+
runner = dict(type='IterBasedRunner', max_iters=80000)
|
| 145 |
+
checkpoint_config = dict(by_epoch=False, interval=4000, max_keep_ckpts=10)
|
| 146 |
+
evaluation = dict(interval=4000, metric='mIoU', pre_eval=True, metrics='mIoU')
|
| 147 |
+
default_hooks = dict(
|
| 148 |
+
timer=dict(type='IterTimerHook'),
|
| 149 |
+
logger=dict(type='LoggerHook', interval=50, log_metric_by_epoch=False),
|
| 150 |
+
sampler_seed=dict(type='DistSamplerSeedHook'),
|
| 151 |
+
visualization=dict(type='SegVisualizationHook', draw=True, interval=1000))
|
| 152 |
+
pretrained = 'pretrained/ViTP_ViT_L_300M_rs.safetensors'
|
| 153 |
+
norm_cfg = dict(type='SyncBN', requires_grad=True)
|
| 154 |
+
data_preprocessor = dict(
|
| 155 |
+
type='SegDataPreProcessor',
|
| 156 |
+
size=(512, 512),
|
| 157 |
+
mean=[123.675, 116.28, 103.53],
|
| 158 |
+
std=[58.395, 57.12, 57.375],
|
| 159 |
+
bgr_to_rgb=True,
|
| 160 |
+
pad_val=0,
|
| 161 |
+
seg_pad_val=255)
|
| 162 |
+
model = dict(
|
| 163 |
+
type='EncoderDecoder',
|
| 164 |
+
backbone=dict(
|
| 165 |
+
type='InternViTAdapter',
|
| 166 |
+
pretrain_size=448,
|
| 167 |
+
img_size=512,
|
| 168 |
+
patch_size=16,
|
| 169 |
+
embed_dim=1024,
|
| 170 |
+
depth=24,
|
| 171 |
+
num_heads=16,
|
| 172 |
+
mlp_ratio=4.0,
|
| 173 |
+
drop_path_rate=0.1,
|
| 174 |
+
init_values=0.1,
|
| 175 |
+
with_cp=True,
|
| 176 |
+
use_flash_attn=True,
|
| 177 |
+
qk_normalization=False,
|
| 178 |
+
layerscale_force_fp32=False,
|
| 179 |
+
with_fpn=False,
|
| 180 |
+
freeze_vit=False,
|
| 181 |
+
use_final_norm=True,
|
| 182 |
+
interaction_indexes=[[0, 7], [8, 11], [12, 15], [16, 23]],
|
| 183 |
+
cffn_ratio=0.25,
|
| 184 |
+
deform_ratio=0.25,
|
| 185 |
+
qkv_bias=True,
|
| 186 |
+
norm_type='layer_norm',
|
| 187 |
+
pretrained='pretrained/ViTP_ViT_L_300M_rs.safetensors',
|
| 188 |
+
pretrained_type='full'),
|
| 189 |
+
decode_head=dict(
|
| 190 |
+
type='UPerHead',
|
| 191 |
+
in_channels=[1024, 1024, 1024, 1024],
|
| 192 |
+
num_classes=7,
|
| 193 |
+
ignore_index=255,
|
| 194 |
+
in_index=[0, 1, 2, 3],
|
| 195 |
+
pool_scales=(1, 2, 3, 6),
|
| 196 |
+
channels=512,
|
| 197 |
+
dropout_ratio=0.1,
|
| 198 |
+
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
| 199 |
+
align_corners=False,
|
| 200 |
+
loss_decode=dict(
|
| 201 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)),
|
| 202 |
+
train_cfg=dict(),
|
| 203 |
+
test_cfg=dict(mode='slide', stride=(384, 384), crop_size=(512, 512)))
|
| 204 |
+
fp16 = dict(loss_scale=dict(init_scale=512))
|
| 205 |
+
randomness = dict(seed=3407)
|
| 206 |
+
work_dir = './work_dirs/vitp_loveda_upernet'
|
| 207 |
+
gpu_ids = range(0, 8)
|
| 208 |
+
auto_resume = False
|
ckpts/vitp_rsar_orcnn_7231/20250716_042910.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ckpts/vitp_rsar_orcnn_7231/20250716_042910.log.json
ADDED
|
@@ -0,0 +1,241 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"env_info": "sys.platform: linux\nPython: 3.10.16 (main, Dec 11 2024, 16:24:50) [GCC 11.2.0]\nCUDA available: True\nGPU 0,1,2,3,4,5,6,7: NVIDIA GeForce RTX 3090\nCUDA_HOME: /mnt/petrelfs/share_data/liqingyun/cuda/cuda-12.4/\nGCC: gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0\nPyTorch: 1.12.0\nPyTorch compiling details: PyTorch built with:\n - GCC 9.3\n - C++ Version: 201402\n - Intel(R) oneAPI Math Kernel Library Version 2024.0-Product Build 20231011 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v2.6.0 (Git Hash 52b5f107dd9cf10910aaa19cb47f3abf9b349815)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 11.3\n - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_37,code=compute_37\n - CuDNN 8.3.2 (built against CUDA 11.5)\n - Magma 2.5.2\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.3, CUDNN_VERSION=8.3.2, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.12.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=OFF, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, \n\nTorchVision: 0.13.0\nOpenCV: 4.11.0\nMMCV: 1.6.1\nMMCV Compiler: GCC 9.3\nMMCV CUDA Compiler: 11.4\nMMRotate: 0.3.4+6fc0c4e", "config": "dataset_type = 'RSARDataset'\ndata_root = '/liyuxuan/DATA/RSAR/'\nangle_version = 'le90'\nimg_norm_cfg = dict(\n mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n dict(type='LoadImageFromFile'),\n dict(type='LoadAnnotations', with_bbox=True),\n dict(type='RResize', img_scale=(800, 800), keep_ratio=False),\n dict(\n type='RRandomFlip',\n flip_ratio=[0.25, 0.25, 0.25],\n direction=['horizontal', 'vertical', 'diagonal'],\n version='le90'),\n dict(\n type='PolyRandomRotate',\n rotate_ratio=0.5,\n angles_range=180,\n auto_bound=False,\n rect_classes=[3],\n version='le90'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size_divisor=32),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])\n]\ntest_pipeline = [\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(800, 800),\n flip=False,\n transforms=[\n dict(type='RResize', img_scale=(800, 800), keep_ratio=False),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size_divisor=32),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img'])\n ])\n]\ndata = dict(\n samples_per_gpu=1,\n workers_per_gpu=4,\n train=dict(\n type='RSARDataset',\n ann_file='/liyuxuan/DATA/RSAR/train/annfiles/',\n img_prefix='/liyuxuan/DATA/RSAR/train/images/',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(type='LoadAnnotations', with_bbox=True),\n dict(type='RResize', img_scale=(800, 800), keep_ratio=False),\n dict(\n type='RRandomFlip',\n flip_ratio=[0.25, 0.25, 0.25],\n direction=['horizontal', 'vertical', 'diagonal'],\n version='le90'),\n dict(\n type='PolyRandomRotate',\n rotate_ratio=0.5,\n angles_range=180,\n auto_bound=False,\n rect_classes=[3],\n version='le90'),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size_divisor=32),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])\n ],\n version='le90'),\n val=dict(\n type='RSARDataset',\n ann_file='/liyuxuan/DATA/RSAR/test/annfiles/',\n img_prefix='/liyuxuan/DATA/RSAR/test/images/',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(800, 800),\n flip=False,\n transforms=[\n dict(\n type='RResize', img_scale=(800, 800),\n keep_ratio=False),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size_divisor=32),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img'])\n ])\n ],\n version='le90'),\n test=dict(\n type='RSARDataset',\n ann_file='/liyuxuan/DATA/RSAR/test/images/',\n img_prefix='/liyuxuan/DATA/RSAR/test/images/',\n pipeline=[\n dict(type='LoadImageFromFile'),\n dict(\n type='MultiScaleFlipAug',\n img_scale=(800, 800),\n flip=False,\n transforms=[\n dict(\n type='RResize', img_scale=(800, 800),\n keep_ratio=False),\n dict(\n type='Normalize',\n mean=[123.675, 116.28, 103.53],\n std=[58.395, 57.12, 57.375],\n to_rgb=True),\n dict(type='Pad', size_divisor=32),\n dict(type='DefaultFormatBundle'),\n dict(type='Collect', keys=['img'])\n ])\n ],\n version='le90'))\nevaluation = dict(interval=1, metric='mAP')\noptimizer = dict(\n type='AdamW',\n lr=2.5e-05,\n betas=(0.9, 0.999),\n weight_decay=0.05,\n constructor='LayerDecayOptimizerConstructor',\n paramwise_cfg=dict(num_layers=24, layer_decay_rate=0.75))\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\nlr_config = dict(\n policy='step',\n warmup='linear',\n warmup_iters=500,\n warmup_ratio=0.3333333333333333,\n step=[8, 11])\nrunner = dict(type='EpochBasedRunner', max_epochs=12)\ncheckpoint_config = dict(interval=1, max_keep_ckpts=1)\nlog_config = dict(interval=500, hooks=[dict(type='TextLoggerHook')])\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\nopencv_num_threads = 0\nmp_start_method = 'fork'\npretrained = 'pretrained/ft_full_1b_8ksteps_instruct_tuning_as_pretrain_TMAug75.safetensors'\nnorm_cfg = dict(type='LN', requires_grad=True)\nmodel = dict(\n type='OrientedRCNN',\n backbone=dict(\n type='InternViTAdapter',\n pretrain_size=448,\n img_size=800,\n patch_size=16,\n embed_dim=1024,\n depth=24,\n num_heads=16,\n mlp_ratio=4.0,\n drop_path_rate=0.1,\n init_values=0.1,\n with_cp=True,\n use_flash_attn=True,\n qk_normalization=False,\n layerscale_force_fp32=False,\n with_fpn=False,\n freeze_vit=False,\n use_final_norm=True,\n interaction_indexes=[[0, 7], [8, 11], [12, 15], [16, 23]],\n cffn_ratio=0.25,\n deform_ratio=0.25,\n qkv_bias=True,\n norm_type='layer_norm',\n pretrained=\n 'pretrained/ft_full_1b_8ksteps_instruct_tuning_as_pretrain_TMAug75.safetensors',\n pretrained_type='full',\n only_feat_out=True),\n neck=dict(\n type='SimpleFPN',\n in_channels=[1024, 1024, 1024, 1024],\n out_channels=256,\n norm_cfg=dict(type='LN', requires_grad=True),\n use_residual=False,\n num_outs=5),\n rpn_head=dict(\n type='OrientedRPNHead',\n in_channels=256,\n feat_channels=256,\n version='le90',\n anchor_generator=dict(\n type='AnchorGenerator',\n scales=[8],\n ratios=[0.5, 1.0, 2.0],\n strides=[4, 8, 16, 32, 64]),\n bbox_coder=dict(\n type='MidpointOffsetCoder',\n angle_range='le90',\n target_means=[0.0, 0.0, 0.0, 0.0, 0.0, 0.0],\n target_stds=[1.0, 1.0, 1.0, 1.0, 0.5, 0.5]),\n loss_cls=dict(\n type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n loss_bbox=dict(\n type='SmoothL1Loss', beta=0.1111111111111111, loss_weight=1.0)),\n roi_head=dict(\n type='OrientedStandardRoIHead',\n bbox_roi_extractor=dict(\n type='RotatedSingleRoIExtractor',\n roi_layer=dict(\n type='RoIAlignRotated',\n out_size=7,\n sample_num=2,\n clockwise=True),\n out_channels=256,\n featmap_strides=[4, 8, 16, 32]),\n bbox_head=dict(\n type='RotatedShared2FCBBoxHead',\n in_channels=256,\n fc_out_channels=1024,\n roi_feat_size=7,\n num_classes=6,\n bbox_coder=dict(\n type='DeltaXYWHAOBBoxCoder',\n angle_range='le90',\n norm_factor=None,\n edge_swap=True,\n proj_xy=True,\n target_means=(0.0, 0.0, 0.0, 0.0, 0.0),\n target_stds=(0.1, 0.1, 0.2, 0.2, 0.1)),\n reg_class_agnostic=True,\n loss_cls=dict(\n type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))),\n train_cfg=dict(\n rpn=dict(\n assigner=dict(\n type='MaxIoUAssigner',\n pos_iou_thr=0.7,\n neg_iou_thr=0.3,\n min_pos_iou=0.3,\n match_low_quality=True,\n gpu_assign_thr=1000,\n ignore_iof_thr=-1),\n sampler=dict(\n type='RandomSampler',\n num=256,\n pos_fraction=0.5,\n neg_pos_ub=-1,\n add_gt_as_proposals=False),\n allowed_border=0,\n pos_weight=-1,\n debug=False),\n rpn_proposal=dict(\n nms_pre=2000,\n max_per_img=2000,\n nms=dict(type='nms', iou_threshold=0.8),\n min_bbox_size=0),\n rcnn=dict(\n assigner=dict(\n type='MaxIoUAssigner',\n pos_iou_thr=0.5,\n neg_iou_thr=0.5,\n min_pos_iou=0.5,\n match_low_quality=False,\n gpu_assign_thr=1000,\n iou_calculator=dict(type='RBboxOverlaps2D'),\n ignore_iof_thr=-1),\n sampler=dict(\n type='RRandomSampler',\n num=512,\n pos_fraction=0.25,\n neg_pos_ub=-1,\n add_gt_as_proposals=True),\n pos_weight=-1,\n debug=False)),\n test_cfg=dict(\n rpn=dict(\n nms_pre=2000,\n max_per_img=2000,\n nms=dict(type='nms', iou_threshold=0.8),\n min_bbox_size=0),\n rcnn=dict(\n nms_pre=2000,\n min_bbox_size=0,\n score_thr=0.05,\n nms=dict(iou_thr=0.1),\n max_per_img=2000)))\nfp16 = dict(loss_scale=dict(init_scale=512))\nwork_dir = './work_dirs/rsar_8k_TMAug75_orcnn_8k'\nauto_resume = False\ngpu_ids = range(0, 8)\ndevice = 'cuda'\n", "seed": 0, "exp_name": "rsar_8k_TMAug75_orcnn_8k.py"}
|
| 2 |
+
{"mode": "train", "epoch": 1, "iter": 500, "lr": 2e-05, "memory": 9594, "data_time": 0.01252, "loss_rpn_cls": 0.14024, "loss_rpn_bbox": 0.04185, "loss_cls": 0.06949, "acc": 98.53896, "loss_bbox": 0.05972, "loss": 0.3113, "grad_norm": 2.83047, "time": 0.72335}
|
| 3 |
+
{"mode": "train", "epoch": 1, "iter": 1000, "lr": 3e-05, "memory": 9609, "data_time": 0.00319, "loss_rpn_cls": 0.03151, "loss_rpn_bbox": 0.03272, "loss_cls": 0.06674, "acc": 98.07568, "loss_bbox": 0.06939, "loss": 0.20037, "grad_norm": 3.47344, "time": 0.70378}
|
| 4 |
+
{"mode": "train", "epoch": 1, "iter": 1500, "lr": 3e-05, "memory": 9609, "data_time": 0.00326, "loss_rpn_cls": 0.03022, "loss_rpn_bbox": 0.03259, "loss_cls": 0.0768, "acc": 97.52959, "loss_bbox": 0.09163, "loss": 0.23124, "grad_norm": 4.1174, "time": 0.70302}
|
| 5 |
+
{"mode": "train", "epoch": 1, "iter": 2000, "lr": 3e-05, "memory": 9804, "data_time": 0.00329, "loss_rpn_cls": 0.02422, "loss_rpn_bbox": 0.02749, "loss_cls": 0.08185, "acc": 97.22637, "loss_bbox": 0.10107, "loss": 0.23464, "grad_norm": 4.28043, "time": 0.70249}
|
| 6 |
+
{"mode": "train", "epoch": 1, "iter": 2500, "lr": 3e-05, "memory": 9804, "data_time": 0.00323, "loss_rpn_cls": 0.02269, "loss_rpn_bbox": 0.02681, "loss_cls": 0.08763, "acc": 96.94111, "loss_bbox": 0.1116, "loss": 0.24873, "grad_norm": 4.45184, "time": 0.70266}
|
| 7 |
+
{"mode": "train", "epoch": 1, "iter": 3000, "lr": 3e-05, "memory": 9804, "data_time": 0.00318, "loss_rpn_cls": 0.02193, "loss_rpn_bbox": 0.02498, "loss_cls": 0.08141, "acc": 97.06045, "loss_bbox": 0.1079, "loss": 0.23622, "grad_norm": 4.20132, "time": 0.70263}
|
| 8 |
+
{"mode": "train", "epoch": 1, "iter": 3500, "lr": 3e-05, "memory": 9804, "data_time": 0.00313, "loss_rpn_cls": 0.01956, "loss_rpn_bbox": 0.02406, "loss_cls": 0.07905, "acc": 97.08457, "loss_bbox": 0.10492, "loss": 0.22758, "grad_norm": 3.9723, "time": 0.70249}
|
| 9 |
+
{"mode": "train", "epoch": 1, "iter": 4000, "lr": 3e-05, "memory": 9804, "data_time": 0.00306, "loss_rpn_cls": 0.01821, "loss_rpn_bbox": 0.02268, "loss_cls": 0.08242, "acc": 96.94653, "loss_bbox": 0.10654, "loss": 0.22985, "grad_norm": 4.01182, "time": 0.7028}
|
| 10 |
+
{"mode": "train", "epoch": 1, "iter": 4500, "lr": 3e-05, "memory": 9863, "data_time": 0.00303, "loss_rpn_cls": 0.02081, "loss_rpn_bbox": 0.02342, "loss_cls": 0.0812, "acc": 97.02617, "loss_bbox": 0.10412, "loss": 0.22954, "grad_norm": 3.8387, "time": 0.70321}
|
| 11 |
+
{"mode": "train", "epoch": 1, "iter": 5000, "lr": 3e-05, "memory": 9863, "data_time": 0.00311, "loss_rpn_cls": 0.01721, "loss_rpn_bbox": 0.02267, "loss_cls": 0.08074, "acc": 96.88999, "loss_bbox": 0.1063, "loss": 0.22692, "grad_norm": 3.90445, "time": 0.70261}
|
| 12 |
+
{"mode": "train", "epoch": 1, "iter": 5500, "lr": 3e-05, "memory": 10002, "data_time": 0.00323, "loss_rpn_cls": 0.01705, "loss_rpn_bbox": 0.02129, "loss_cls": 0.07841, "acc": 97.06235, "loss_bbox": 0.10166, "loss": 0.2184, "grad_norm": 3.74052, "time": 0.70236}
|
| 13 |
+
{"mode": "train", "epoch": 1, "iter": 6000, "lr": 3e-05, "memory": 10002, "data_time": 0.00317, "loss_rpn_cls": 0.01783, "loss_rpn_bbox": 0.02099, "loss_cls": 0.08222, "acc": 96.87183, "loss_bbox": 0.10329, "loss": 0.22433, "grad_norm": 3.64682, "time": 0.70259}
|
| 14 |
+
{"mode": "train", "epoch": 1, "iter": 6500, "lr": 3e-05, "memory": 10002, "data_time": 0.0032, "loss_rpn_cls": 0.01543, "loss_rpn_bbox": 0.01982, "loss_cls": 0.07824, "acc": 96.98604, "loss_bbox": 0.10209, "loss": 0.21558, "grad_norm": 3.73653, "time": 0.70237}
|
| 15 |
+
{"mode": "train", "epoch": 1, "iter": 7000, "lr": 3e-05, "memory": 10002, "data_time": 0.00315, "loss_rpn_cls": 0.01463, "loss_rpn_bbox": 0.01899, "loss_cls": 0.08044, "acc": 96.89829, "loss_bbox": 0.10384, "loss": 0.2179, "grad_norm": 3.7265, "time": 0.70222}
|
| 16 |
+
{"mode": "train", "epoch": 1, "iter": 7500, "lr": 3e-05, "memory": 10002, "data_time": 0.00314, "loss_rpn_cls": 0.01677, "loss_rpn_bbox": 0.01934, "loss_cls": 0.07842, "acc": 96.9874, "loss_bbox": 0.10063, "loss": 0.21516, "grad_norm": 3.47063, "time": 0.70232}
|
| 17 |
+
{"mode": "train", "epoch": 1, "iter": 8000, "lr": 3e-05, "memory": 10002, "data_time": 0.00307, "loss_rpn_cls": 0.01495, "loss_rpn_bbox": 0.01959, "loss_cls": 0.07729, "acc": 97.01699, "loss_bbox": 0.10029, "loss": 0.21212, "grad_norm": 3.4604, "time": 0.70215}
|
| 18 |
+
{"mode": "train", "epoch": 1, "iter": 8500, "lr": 3e-05, "memory": 10002, "data_time": 0.00308, "loss_rpn_cls": 0.01268, "loss_rpn_bbox": 0.01807, "loss_cls": 0.0763, "acc": 97.00757, "loss_bbox": 0.09835, "loss": 0.20541, "grad_norm": 3.35526, "time": 0.70215}
|
| 19 |
+
{"mode": "train", "epoch": 1, "iter": 9000, "lr": 3e-05, "memory": 10002, "data_time": 0.00307, "loss_rpn_cls": 0.01364, "loss_rpn_bbox": 0.01792, "loss_cls": 0.07642, "acc": 96.99141, "loss_bbox": 0.09923, "loss": 0.20722, "grad_norm": Infinity, "time": 0.70215}
|
| 20 |
+
{"mode": "train", "epoch": 1, "iter": 9500, "lr": 3e-05, "memory": 10002, "data_time": 0.00313, "loss_rpn_cls": 0.01348, "loss_rpn_bbox": 0.01866, "loss_cls": 0.07748, "acc": 96.94526, "loss_bbox": 0.10217, "loss": 0.21179, "grad_norm": 3.06029, "time": 0.70261}
|
| 21 |
+
{"mode": "val", "epoch": 1, "iter": 1068, "lr": 3e-05, "mAP": 0.54913}
|
| 22 |
+
{"mode": "train", "epoch": 2, "iter": 500, "lr": 3e-05, "memory": 10002, "data_time": 0.01001, "loss_rpn_cls": 0.01264, "loss_rpn_bbox": 0.01745, "loss_cls": 0.07466, "acc": 97.05649, "loss_bbox": 0.09693, "loss": 0.20169, "grad_norm": 3.06205, "time": 0.71073}
|
| 23 |
+
{"mode": "train", "epoch": 2, "iter": 1000, "lr": 3e-05, "memory": 10002, "data_time": 0.00363, "loss_rpn_cls": 0.01453, "loss_rpn_bbox": 0.01903, "loss_cls": 0.07678, "acc": 96.98926, "loss_bbox": 0.10161, "loss": 0.21195, "grad_norm": 2.81391, "time": 0.70426}
|
| 24 |
+
{"mode": "train", "epoch": 2, "iter": 1500, "lr": 3e-05, "memory": 10002, "data_time": 0.00342, "loss_rpn_cls": 0.01272, "loss_rpn_bbox": 0.01806, "loss_cls": 0.07582, "acc": 96.99722, "loss_bbox": 0.10491, "loss": 0.21151, "grad_norm": 2.69963, "time": 0.7033}
|
| 25 |
+
{"mode": "train", "epoch": 2, "iter": 2000, "lr": 3e-05, "memory": 10002, "data_time": 0.00317, "loss_rpn_cls": 0.01317, "loss_rpn_bbox": 0.01781, "loss_cls": 0.07812, "acc": 96.9375, "loss_bbox": 0.10101, "loss": 0.2101, "grad_norm": Infinity, "time": 0.70344}
|
| 26 |
+
{"mode": "train", "epoch": 2, "iter": 2500, "lr": 3e-05, "memory": 10002, "data_time": 0.0032, "loss_rpn_cls": 0.01183, "loss_rpn_bbox": 0.0188, "loss_cls": 0.07497, "acc": 97.01626, "loss_bbox": 0.10139, "loss": 0.20698, "grad_norm": Infinity, "time": 0.70333}
|
| 27 |
+
{"mode": "train", "epoch": 2, "iter": 3000, "lr": 3e-05, "memory": 10002, "data_time": 0.00309, "loss_rpn_cls": 0.01219, "loss_rpn_bbox": 0.01684, "loss_cls": 0.07539, "acc": 97.01655, "loss_bbox": 0.10069, "loss": 0.20511, "grad_norm": 2.86299, "time": 0.70291}
|
| 28 |
+
{"mode": "train", "epoch": 2, "iter": 3500, "lr": 3e-05, "memory": 10002, "data_time": 0.00318, "loss_rpn_cls": 0.01403, "loss_rpn_bbox": 0.01747, "loss_cls": 0.07667, "acc": 97.03076, "loss_bbox": 0.09864, "loss": 0.20681, "grad_norm": 2.91259, "time": 0.70318}
|
| 29 |
+
{"mode": "train", "epoch": 2, "iter": 4000, "lr": 3e-05, "memory": 10002, "data_time": 0.00318, "loss_rpn_cls": 0.01174, "loss_rpn_bbox": 0.01663, "loss_cls": 0.07314, "acc": 97.09561, "loss_bbox": 0.09782, "loss": 0.19933, "grad_norm": 2.3958, "time": 0.70322}
|
| 30 |
+
{"mode": "train", "epoch": 2, "iter": 4500, "lr": 3e-05, "memory": 10002, "data_time": 0.00321, "loss_rpn_cls": 0.01325, "loss_rpn_bbox": 0.01766, "loss_cls": 0.07407, "acc": 97.0645, "loss_bbox": 0.09748, "loss": 0.20247, "grad_norm": 2.62973, "time": 0.70296}
|
| 31 |
+
{"mode": "train", "epoch": 2, "iter": 5000, "lr": 3e-05, "memory": 10002, "data_time": 0.00325, "loss_rpn_cls": 0.01175, "loss_rpn_bbox": 0.01704, "loss_cls": 0.0751, "acc": 97.01191, "loss_bbox": 0.10018, "loss": 0.20407, "grad_norm": 2.54269, "time": 0.70272}
|
| 32 |
+
{"mode": "train", "epoch": 2, "iter": 5500, "lr": 3e-05, "memory": 10002, "data_time": 0.00318, "loss_rpn_cls": 0.01187, "loss_rpn_bbox": 0.01637, "loss_cls": 0.07613, "acc": 96.95884, "loss_bbox": 0.10095, "loss": 0.20532, "grad_norm": 2.58255, "time": 0.70285}
|
| 33 |
+
{"mode": "train", "epoch": 2, "iter": 6000, "lr": 3e-05, "memory": 10002, "data_time": 0.00318, "loss_rpn_cls": 0.01242, "loss_rpn_bbox": 0.0171, "loss_cls": 0.07773, "acc": 96.92734, "loss_bbox": 0.10004, "loss": 0.20729, "grad_norm": 2.56368, "time": 0.70258}
|
| 34 |
+
{"mode": "train", "epoch": 2, "iter": 6500, "lr": 3e-05, "memory": 10002, "data_time": 0.00322, "loss_rpn_cls": 0.01204, "loss_rpn_bbox": 0.01746, "loss_cls": 0.0758, "acc": 96.97783, "loss_bbox": 0.10155, "loss": 0.20685, "grad_norm": 2.38867, "time": 0.70278}
|
| 35 |
+
{"mode": "train", "epoch": 2, "iter": 7000, "lr": 3e-05, "memory": 10002, "data_time": 0.00322, "loss_rpn_cls": 0.01155, "loss_rpn_bbox": 0.01716, "loss_cls": 0.07403, "acc": 97.05576, "loss_bbox": 0.09605, "loss": 0.19879, "grad_norm": 2.26777, "time": 0.70241}
|
| 36 |
+
{"mode": "train", "epoch": 2, "iter": 7500, "lr": 3e-05, "memory": 10002, "data_time": 0.00335, "loss_rpn_cls": 0.01152, "loss_rpn_bbox": 0.01741, "loss_cls": 0.07043, "acc": 97.19365, "loss_bbox": 0.09526, "loss": 0.19462, "grad_norm": 2.26232, "time": 0.70225}
|
| 37 |
+
{"mode": "train", "epoch": 2, "iter": 8000, "lr": 3e-05, "memory": 10002, "data_time": 0.00329, "loss_rpn_cls": 0.01188, "loss_rpn_bbox": 0.01723, "loss_cls": 0.07551, "acc": 96.99136, "loss_bbox": 0.09941, "loss": 0.20404, "grad_norm": Infinity, "time": 0.70277}
|
| 38 |
+
{"mode": "train", "epoch": 2, "iter": 8500, "lr": 3e-05, "memory": 10002, "data_time": 0.00326, "loss_rpn_cls": 0.0112, "loss_rpn_bbox": 0.01575, "loss_cls": 0.07394, "acc": 97.05522, "loss_bbox": 0.09978, "loss": 0.20066, "grad_norm": 2.22689, "time": 0.7032}
|
| 39 |
+
{"mode": "train", "epoch": 2, "iter": 9000, "lr": 3e-05, "memory": 10002, "data_time": 0.00318, "loss_rpn_cls": 0.01149, "loss_rpn_bbox": 0.01683, "loss_cls": 0.07321, "acc": 97.07358, "loss_bbox": 0.09761, "loss": 0.19915, "grad_norm": 2.31324, "time": 0.70255}
|
| 40 |
+
{"mode": "train", "epoch": 2, "iter": 9500, "lr": 3e-05, "memory": 10002, "data_time": 0.00306, "loss_rpn_cls": 0.01133, "loss_rpn_bbox": 0.0162, "loss_cls": 0.07127, "acc": 97.15786, "loss_bbox": 0.09541, "loss": 0.19421, "grad_norm": 2.2412, "time": 0.70273}
|
| 41 |
+
{"mode": "val", "epoch": 2, "iter": 1068, "lr": 3e-05, "mAP": 0.53252}
|
| 42 |
+
{"mode": "train", "epoch": 3, "iter": 500, "lr": 3e-05, "memory": 10002, "data_time": 0.00981, "loss_rpn_cls": 0.00997, "loss_rpn_bbox": 0.01622, "loss_cls": 0.07148, "acc": 97.13555, "loss_bbox": 0.09721, "loss": 0.19488, "grad_norm": 2.10508, "time": 0.71277}
|
| 43 |
+
{"mode": "train", "epoch": 3, "iter": 1000, "lr": 3e-05, "memory": 10003, "data_time": 0.00315, "loss_rpn_cls": 0.01148, "loss_rpn_bbox": 0.01587, "loss_cls": 0.07184, "acc": 97.15791, "loss_bbox": 0.09583, "loss": 0.19502, "grad_norm": 2.16656, "time": 0.70523}
|
| 44 |
+
{"mode": "train", "epoch": 3, "iter": 1500, "lr": 3e-05, "memory": 10003, "data_time": 0.00307, "loss_rpn_cls": 0.01019, "loss_rpn_bbox": 0.01632, "loss_cls": 0.07246, "acc": 97.08247, "loss_bbox": 0.0952, "loss": 0.19417, "grad_norm": 2.15082, "time": 0.70447}
|
| 45 |
+
{"mode": "train", "epoch": 3, "iter": 2000, "lr": 3e-05, "memory": 10003, "data_time": 0.00304, "loss_rpn_cls": 0.00923, "loss_rpn_bbox": 0.01648, "loss_cls": 0.07168, "acc": 97.10303, "loss_bbox": 0.09521, "loss": 0.19261, "grad_norm": 2.20279, "time": 0.70484}
|
| 46 |
+
{"mode": "train", "epoch": 3, "iter": 2500, "lr": 3e-05, "memory": 10003, "data_time": 0.00311, "loss_rpn_cls": 0.01206, "loss_rpn_bbox": 0.01617, "loss_cls": 0.07087, "acc": 97.18853, "loss_bbox": 0.09401, "loss": 0.19311, "grad_norm": 2.11463, "time": 0.70431}
|
| 47 |
+
{"mode": "train", "epoch": 3, "iter": 3000, "lr": 3e-05, "memory": 10003, "data_time": 0.0031, "loss_rpn_cls": 0.01025, "loss_rpn_bbox": 0.01513, "loss_cls": 0.0698, "acc": 97.18262, "loss_bbox": 0.09556, "loss": 0.19074, "grad_norm": Infinity, "time": 0.70573}
|
| 48 |
+
{"mode": "train", "epoch": 3, "iter": 3500, "lr": 3e-05, "memory": 10003, "data_time": 0.00319, "loss_rpn_cls": 0.00986, "loss_rpn_bbox": 0.01545, "loss_cls": 0.07453, "acc": 96.99062, "loss_bbox": 0.10101, "loss": 0.20086, "grad_norm": 2.13368, "time": 0.70543}
|
| 49 |
+
{"mode": "train", "epoch": 3, "iter": 4000, "lr": 3e-05, "memory": 10003, "data_time": 0.00318, "loss_rpn_cls": 0.00933, "loss_rpn_bbox": 0.01608, "loss_cls": 0.07074, "acc": 97.16372, "loss_bbox": 0.0962, "loss": 0.19235, "grad_norm": 2.08251, "time": 0.70585}
|
| 50 |
+
{"mode": "train", "epoch": 3, "iter": 4500, "lr": 3e-05, "memory": 10003, "data_time": 0.0031, "loss_rpn_cls": 0.0093, "loss_rpn_bbox": 0.0148, "loss_cls": 0.06874, "acc": 97.23203, "loss_bbox": 0.09521, "loss": 0.18805, "grad_norm": 2.01941, "time": 0.70526}
|
| 51 |
+
{"mode": "train", "epoch": 3, "iter": 5000, "lr": 3e-05, "memory": 10003, "data_time": 0.00308, "loss_rpn_cls": 0.00913, "loss_rpn_bbox": 0.01516, "loss_cls": 0.06954, "acc": 97.19756, "loss_bbox": 0.09339, "loss": 0.18722, "grad_norm": Infinity, "time": 0.70571}
|
| 52 |
+
{"mode": "train", "epoch": 3, "iter": 5500, "lr": 3e-05, "memory": 10003, "data_time": 0.00314, "loss_rpn_cls": 0.00987, "loss_rpn_bbox": 0.01553, "loss_cls": 0.07124, "acc": 97.12363, "loss_bbox": 0.09675, "loss": 0.1934, "grad_norm": 1.8974, "time": 0.70542}
|
| 53 |
+
{"mode": "train", "epoch": 3, "iter": 6000, "lr": 3e-05, "memory": 10003, "data_time": 0.00312, "loss_rpn_cls": 0.00988, "loss_rpn_bbox": 0.01508, "loss_cls": 0.07057, "acc": 97.13765, "loss_bbox": 0.09445, "loss": 0.18997, "grad_norm": 1.98686, "time": 0.7051}
|
| 54 |
+
{"mode": "train", "epoch": 3, "iter": 6500, "lr": 3e-05, "memory": 10003, "data_time": 0.00311, "loss_rpn_cls": 0.00969, "loss_rpn_bbox": 0.0159, "loss_cls": 0.07231, "acc": 97.1084, "loss_bbox": 0.09718, "loss": 0.19508, "grad_norm": 1.90614, "time": 0.70588}
|
| 55 |
+
{"mode": "train", "epoch": 3, "iter": 7000, "lr": 3e-05, "memory": 10003, "data_time": 0.00299, "loss_rpn_cls": 0.00856, "loss_rpn_bbox": 0.01414, "loss_cls": 0.06858, "acc": 97.20181, "loss_bbox": 0.095, "loss": 0.18629, "grad_norm": 1.8433, "time": 0.70592}
|
| 56 |
+
{"mode": "train", "epoch": 3, "iter": 7500, "lr": 3e-05, "memory": 10003, "data_time": 0.00296, "loss_rpn_cls": 0.01094, "loss_rpn_bbox": 0.01469, "loss_cls": 0.07238, "acc": 97.1252, "loss_bbox": 0.0941, "loss": 0.19212, "grad_norm": Infinity, "time": 0.7066}
|
| 57 |
+
{"mode": "train", "epoch": 3, "iter": 8000, "lr": 3e-05, "memory": 10003, "data_time": 0.00302, "loss_rpn_cls": 0.00917, "loss_rpn_bbox": 0.01499, "loss_cls": 0.07211, "acc": 97.07046, "loss_bbox": 0.09669, "loss": 0.19296, "grad_norm": 1.8321, "time": 0.70517}
|
| 58 |
+
{"mode": "train", "epoch": 3, "iter": 8500, "lr": 3e-05, "memory": 10003, "data_time": 0.00296, "loss_rpn_cls": 0.0107, "loss_rpn_bbox": 0.016, "loss_cls": 0.07017, "acc": 97.21187, "loss_bbox": 0.09263, "loss": 0.1895, "grad_norm": Infinity, "time": 0.70567}
|
| 59 |
+
{"mode": "train", "epoch": 3, "iter": 9000, "lr": 3e-05, "memory": 10003, "data_time": 0.00304, "loss_rpn_cls": 0.00963, "loss_rpn_bbox": 0.01576, "loss_cls": 0.06813, "acc": 97.2709, "loss_bbox": 0.0945, "loss": 0.18802, "grad_norm": 1.82404, "time": 0.70469}
|
| 60 |
+
{"mode": "train", "epoch": 3, "iter": 9500, "lr": 3e-05, "memory": 10003, "data_time": 0.00314, "loss_rpn_cls": 0.01016, "loss_rpn_bbox": 0.01574, "loss_cls": 0.06758, "acc": 97.31445, "loss_bbox": 0.09189, "loss": 0.18537, "grad_norm": 1.73728, "time": 0.70551}
|
| 61 |
+
{"mode": "val", "epoch": 3, "iter": 1068, "lr": 3e-05, "mAP": 0.6086}
|
| 62 |
+
{"mode": "train", "epoch": 4, "iter": 500, "lr": 3e-05, "memory": 10003, "data_time": 0.01032, "loss_rpn_cls": 0.00869, "loss_rpn_bbox": 0.01404, "loss_cls": 0.07056, "acc": 97.15483, "loss_bbox": 0.09609, "loss": 0.18937, "grad_norm": 1.8767, "time": 0.71473}
|
| 63 |
+
{"mode": "train", "epoch": 4, "iter": 1000, "lr": 3e-05, "memory": 10003, "data_time": 0.00329, "loss_rpn_cls": 0.00907, "loss_rpn_bbox": 0.01438, "loss_cls": 0.067, "acc": 97.28423, "loss_bbox": 0.09107, "loss": 0.18152, "grad_norm": 1.75521, "time": 0.70711}
|
| 64 |
+
{"mode": "train", "epoch": 4, "iter": 1500, "lr": 3e-05, "memory": 10003, "data_time": 0.00318, "loss_rpn_cls": 0.00821, "loss_rpn_bbox": 0.01507, "loss_cls": 0.06811, "acc": 97.21646, "loss_bbox": 0.0941, "loss": 0.18549, "grad_norm": 1.79809, "time": 0.7077}
|
| 65 |
+
{"mode": "train", "epoch": 4, "iter": 2000, "lr": 3e-05, "memory": 10003, "data_time": 0.00307, "loss_rpn_cls": 0.00864, "loss_rpn_bbox": 0.0144, "loss_cls": 0.06952, "acc": 97.19336, "loss_bbox": 0.09287, "loss": 0.18542, "grad_norm": 1.81303, "time": 0.70657}
|
| 66 |
+
{"mode": "train", "epoch": 4, "iter": 2500, "lr": 3e-05, "memory": 10003, "data_time": 0.00309, "loss_rpn_cls": 0.0091, "loss_rpn_bbox": 0.01468, "loss_cls": 0.07162, "acc": 97.10884, "loss_bbox": 0.09516, "loss": 0.19055, "grad_norm": 1.80157, "time": 0.70669}
|
| 67 |
+
{"mode": "train", "epoch": 4, "iter": 3000, "lr": 3e-05, "memory": 10003, "data_time": 0.00315, "loss_rpn_cls": 0.00997, "loss_rpn_bbox": 0.01518, "loss_cls": 0.07082, "acc": 97.13716, "loss_bbox": 0.09598, "loss": 0.19195, "grad_norm": Infinity, "time": 0.70753}
|
| 68 |
+
{"mode": "train", "epoch": 4, "iter": 3500, "lr": 3e-05, "memory": 10003, "data_time": 0.00312, "loss_rpn_cls": 0.00887, "loss_rpn_bbox": 0.01468, "loss_cls": 0.06601, "acc": 97.33809, "loss_bbox": 0.09029, "loss": 0.17985, "grad_norm": 1.69034, "time": 0.70737}
|
| 69 |
+
{"mode": "train", "epoch": 4, "iter": 4000, "lr": 3e-05, "memory": 10003, "data_time": 0.00309, "loss_rpn_cls": 0.00873, "loss_rpn_bbox": 0.01396, "loss_cls": 0.06786, "acc": 97.22559, "loss_bbox": 0.09471, "loss": 0.18526, "grad_norm": 1.71019, "time": 0.70798}
|
| 70 |
+
{"mode": "train", "epoch": 4, "iter": 4500, "lr": 3e-05, "memory": 10003, "data_time": 0.0031, "loss_rpn_cls": 0.0089, "loss_rpn_bbox": 0.01543, "loss_cls": 0.0673, "acc": 97.28882, "loss_bbox": 0.09298, "loss": 0.1846, "grad_norm": 1.73542, "time": 0.70716}
|
| 71 |
+
{"mode": "train", "epoch": 4, "iter": 5000, "lr": 3e-05, "memory": 10003, "data_time": 0.00304, "loss_rpn_cls": 0.00868, "loss_rpn_bbox": 0.01478, "loss_cls": 0.06884, "acc": 97.20615, "loss_bbox": 0.09509, "loss": 0.18739, "grad_norm": Infinity, "time": 0.7072}
|
| 72 |
+
{"mode": "train", "epoch": 4, "iter": 5500, "lr": 3e-05, "memory": 10003, "data_time": 0.0031, "loss_rpn_cls": 0.00945, "loss_rpn_bbox": 0.01537, "loss_cls": 0.0675, "acc": 97.27456, "loss_bbox": 0.09177, "loss": 0.18408, "grad_norm": 1.70174, "time": 0.7061}
|
| 73 |
+
{"mode": "train", "epoch": 4, "iter": 6000, "lr": 3e-05, "memory": 10003, "data_time": 0.00302, "loss_rpn_cls": 0.0083, "loss_rpn_bbox": 0.01522, "loss_cls": 0.06722, "acc": 97.24316, "loss_bbox": 0.09413, "loss": 0.18486, "grad_norm": 1.58961, "time": 0.70672}
|
| 74 |
+
{"mode": "train", "epoch": 4, "iter": 6500, "lr": 3e-05, "memory": 10003, "data_time": 0.00299, "loss_rpn_cls": 0.00867, "loss_rpn_bbox": 0.01511, "loss_cls": 0.06884, "acc": 97.21025, "loss_bbox": 0.09541, "loss": 0.18803, "grad_norm": 1.74774, "time": 0.70614}
|
| 75 |
+
{"mode": "train", "epoch": 4, "iter": 7000, "lr": 3e-05, "memory": 10003, "data_time": 0.00298, "loss_rpn_cls": 0.00855, "loss_rpn_bbox": 0.01443, "loss_cls": 0.06406, "acc": 97.42847, "loss_bbox": 0.08737, "loss": 0.17441, "grad_norm": 1.55191, "time": 0.70692}
|
| 76 |
+
{"mode": "train", "epoch": 4, "iter": 7500, "lr": 3e-05, "memory": 10003, "data_time": 0.00311, "loss_rpn_cls": 0.00791, "loss_rpn_bbox": 0.01405, "loss_cls": 0.06507, "acc": 97.36416, "loss_bbox": 0.09107, "loss": 0.1781, "grad_norm": Infinity, "time": 0.70683}
|
| 77 |
+
{"mode": "train", "epoch": 4, "iter": 8000, "lr": 3e-05, "memory": 10003, "data_time": 0.00293, "loss_rpn_cls": 0.00856, "loss_rpn_bbox": 0.01447, "loss_cls": 0.06834, "acc": 97.2417, "loss_bbox": 0.09381, "loss": 0.18517, "grad_norm": 1.60042, "time": 0.70698}
|
| 78 |
+
{"mode": "train", "epoch": 4, "iter": 8500, "lr": 3e-05, "memory": 10003, "data_time": 0.003, "loss_rpn_cls": 0.00841, "loss_rpn_bbox": 0.01396, "loss_cls": 0.06623, "acc": 97.31113, "loss_bbox": 0.0903, "loss": 0.17891, "grad_norm": Infinity, "time": 0.70621}
|
| 79 |
+
{"mode": "train", "epoch": 4, "iter": 9000, "lr": 3e-05, "memory": 10003, "data_time": 0.00315, "loss_rpn_cls": 0.00782, "loss_rpn_bbox": 0.01505, "loss_cls": 0.06803, "acc": 97.20898, "loss_bbox": 0.09263, "loss": 0.18354, "grad_norm": 1.5997, "time": 0.70617}
|
| 80 |
+
{"mode": "train", "epoch": 4, "iter": 9500, "lr": 3e-05, "memory": 10003, "data_time": 0.00305, "loss_rpn_cls": 0.00786, "loss_rpn_bbox": 0.01421, "loss_cls": 0.06668, "acc": 97.28804, "loss_bbox": 0.09168, "loss": 0.18044, "grad_norm": 1.6306, "time": 0.70627}
|
| 81 |
+
{"mode": "val", "epoch": 4, "iter": 1068, "lr": 3e-05, "mAP": 0.60241}
|
| 82 |
+
{"mode": "train", "epoch": 5, "iter": 500, "lr": 3e-05, "memory": 10004, "data_time": 0.00982, "loss_rpn_cls": 0.00797, "loss_rpn_bbox": 0.0148, "loss_cls": 0.06447, "acc": 97.35972, "loss_bbox": 0.0921, "loss": 0.17934, "grad_norm": 1.54504, "time": 0.71315}
|
| 83 |
+
{"mode": "train", "epoch": 5, "iter": 1000, "lr": 3e-05, "memory": 10004, "data_time": 0.00318, "loss_rpn_cls": 0.0074, "loss_rpn_bbox": 0.01338, "loss_cls": 0.06703, "acc": 97.26191, "loss_bbox": 0.09351, "loss": 0.18133, "grad_norm": 1.64228, "time": 0.70567}
|
| 84 |
+
{"mode": "train", "epoch": 5, "iter": 1500, "lr": 3e-05, "memory": 10004, "data_time": 0.00313, "loss_rpn_cls": 0.00752, "loss_rpn_bbox": 0.01398, "loss_cls": 0.06632, "acc": 97.3188, "loss_bbox": 0.09033, "loss": 0.17815, "grad_norm": Infinity, "time": 0.70605}
|
| 85 |
+
{"mode": "train", "epoch": 5, "iter": 2000, "lr": 3e-05, "memory": 10004, "data_time": 0.00316, "loss_rpn_cls": 0.00822, "loss_rpn_bbox": 0.01453, "loss_cls": 0.06723, "acc": 97.26997, "loss_bbox": 0.09379, "loss": 0.18376, "grad_norm": 1.57531, "time": 0.70583}
|
| 86 |
+
{"mode": "train", "epoch": 5, "iter": 2500, "lr": 3e-05, "memory": 10004, "data_time": 0.00315, "loss_rpn_cls": 0.00659, "loss_rpn_bbox": 0.01368, "loss_cls": 0.06615, "acc": 97.30889, "loss_bbox": 0.09161, "loss": 0.17803, "grad_norm": 1.47328, "time": 0.70617}
|
| 87 |
+
{"mode": "train", "epoch": 5, "iter": 3000, "lr": 3e-05, "memory": 10004, "data_time": 0.00317, "loss_rpn_cls": 0.00753, "loss_rpn_bbox": 0.01355, "loss_cls": 0.06472, "acc": 97.35776, "loss_bbox": 0.09029, "loss": 0.1761, "grad_norm": 1.4743, "time": 0.70599}
|
| 88 |
+
{"mode": "train", "epoch": 5, "iter": 3500, "lr": 3e-05, "memory": 10004, "data_time": 0.00341, "loss_rpn_cls": 0.00867, "loss_rpn_bbox": 0.0137, "loss_cls": 0.06716, "acc": 97.30776, "loss_bbox": 0.09246, "loss": 0.18198, "grad_norm": 1.57557, "time": 0.7047}
|
| 89 |
+
{"mode": "train", "epoch": 5, "iter": 4000, "lr": 3e-05, "memory": 10004, "data_time": 0.00329, "loss_rpn_cls": 0.00725, "loss_rpn_bbox": 0.01336, "loss_cls": 0.06399, "acc": 97.36479, "loss_bbox": 0.09183, "loss": 0.17643, "grad_norm": 1.53289, "time": 0.7041}
|
| 90 |
+
{"mode": "train", "epoch": 5, "iter": 4500, "lr": 3e-05, "memory": 10004, "data_time": 0.0032, "loss_rpn_cls": 0.0077, "loss_rpn_bbox": 0.01343, "loss_cls": 0.06722, "acc": 97.27275, "loss_bbox": 0.0919, "loss": 0.18025, "grad_norm": 1.64129, "time": 0.70387}
|
| 91 |
+
{"mode": "train", "epoch": 5, "iter": 5000, "lr": 3e-05, "memory": 10004, "data_time": 0.00322, "loss_rpn_cls": 0.00719, "loss_rpn_bbox": 0.01408, "loss_cls": 0.06313, "acc": 97.4249, "loss_bbox": 0.09242, "loss": 0.17681, "grad_norm": 1.48663, "time": 0.70352}
|
| 92 |
+
{"mode": "train", "epoch": 5, "iter": 5500, "lr": 3e-05, "memory": 10004, "data_time": 0.0032, "loss_rpn_cls": 0.00771, "loss_rpn_bbox": 0.01382, "loss_cls": 0.06601, "acc": 97.30776, "loss_bbox": 0.09194, "loss": 0.17947, "grad_norm": 1.44187, "time": 0.70352}
|
| 93 |
+
{"mode": "train", "epoch": 5, "iter": 6000, "lr": 3e-05, "memory": 10004, "data_time": 0.00319, "loss_rpn_cls": 0.00798, "loss_rpn_bbox": 0.01414, "loss_cls": 0.06534, "acc": 97.36636, "loss_bbox": 0.0932, "loss": 0.18065, "grad_norm": NaN, "time": 0.70363}
|
| 94 |
+
{"mode": "train", "epoch": 5, "iter": 6500, "lr": 3e-05, "memory": 10004, "data_time": 0.00316, "loss_rpn_cls": 0.00667, "loss_rpn_bbox": 0.01421, "loss_cls": 0.06538, "acc": 97.32427, "loss_bbox": 0.09177, "loss": 0.17802, "grad_norm": 1.51969, "time": 0.70379}
|
| 95 |
+
{"mode": "train", "epoch": 5, "iter": 7000, "lr": 3e-05, "memory": 10004, "data_time": 0.00314, "loss_rpn_cls": 0.00796, "loss_rpn_bbox": 0.01494, "loss_cls": 0.06666, "acc": 97.28408, "loss_bbox": 0.09254, "loss": 0.18209, "grad_norm": 1.49924, "time": 0.70353}
|
| 96 |
+
{"mode": "train", "epoch": 5, "iter": 7500, "lr": 3e-05, "memory": 10004, "data_time": 0.00322, "loss_rpn_cls": 0.00761, "loss_rpn_bbox": 0.01475, "loss_cls": 0.06705, "acc": 97.26299, "loss_bbox": 0.09411, "loss": 0.18353, "grad_norm": 1.49761, "time": 0.70387}
|
| 97 |
+
{"mode": "train", "epoch": 5, "iter": 8000, "lr": 3e-05, "memory": 10004, "data_time": 0.00319, "loss_rpn_cls": 0.00806, "loss_rpn_bbox": 0.01437, "loss_cls": 0.06515, "acc": 97.33442, "loss_bbox": 0.09193, "loss": 0.17951, "grad_norm": 1.39995, "time": 0.70343}
|
| 98 |
+
{"mode": "train", "epoch": 5, "iter": 8500, "lr": 3e-05, "memory": 10004, "data_time": 0.00311, "loss_rpn_cls": 0.00788, "loss_rpn_bbox": 0.0141, "loss_cls": 0.06625, "acc": 97.28555, "loss_bbox": 0.09326, "loss": 0.18149, "grad_norm": 1.48954, "time": 0.70349}
|
| 99 |
+
{"mode": "train", "epoch": 5, "iter": 9000, "lr": 3e-05, "memory": 10004, "data_time": 0.00309, "loss_rpn_cls": 0.00756, "loss_rpn_bbox": 0.01379, "loss_cls": 0.06828, "acc": 97.23066, "loss_bbox": 0.09411, "loss": 0.18374, "grad_norm": 1.44517, "time": 0.704}
|
| 100 |
+
{"mode": "train", "epoch": 5, "iter": 9500, "lr": 3e-05, "memory": 10004, "data_time": 0.00312, "loss_rpn_cls": 0.00687, "loss_rpn_bbox": 0.01412, "loss_cls": 0.0643, "acc": 97.37168, "loss_bbox": 0.09014, "loss": 0.17544, "grad_norm": NaN, "time": 0.70304}
|
| 101 |
+
{"mode": "val", "epoch": 5, "iter": 1068, "lr": 3e-05, "mAP": 0.635}
|
| 102 |
+
{"mode": "train", "epoch": 6, "iter": 500, "lr": 3e-05, "memory": 10004, "data_time": 0.00969, "loss_rpn_cls": 0.00758, "loss_rpn_bbox": 0.01393, "loss_cls": 0.06761, "acc": 97.24829, "loss_bbox": 0.09384, "loss": 0.18295, "grad_norm": 1.52964, "time": 0.7108}
|
| 103 |
+
{"mode": "train", "epoch": 6, "iter": 1000, "lr": 3e-05, "memory": 10004, "data_time": 0.00317, "loss_rpn_cls": 0.00741, "loss_rpn_bbox": 0.01353, "loss_cls": 0.06772, "acc": 97.2397, "loss_bbox": 0.09614, "loss": 0.18481, "grad_norm": 1.50269, "time": 0.70345}
|
| 104 |
+
{"mode": "train", "epoch": 6, "iter": 1500, "lr": 3e-05, "memory": 10004, "data_time": 0.00317, "loss_rpn_cls": 0.00773, "loss_rpn_bbox": 0.01487, "loss_cls": 0.06721, "acc": 97.20547, "loss_bbox": 0.09557, "loss": 0.18539, "grad_norm": 1.4774, "time": 0.70405}
|
| 105 |
+
{"mode": "train", "epoch": 6, "iter": 2000, "lr": 3e-05, "memory": 10004, "data_time": 0.00314, "loss_rpn_cls": 0.00694, "loss_rpn_bbox": 0.01344, "loss_cls": 0.06499, "acc": 97.33574, "loss_bbox": 0.09222, "loss": 0.17759, "grad_norm": 1.42718, "time": 0.70324}
|
| 106 |
+
{"mode": "train", "epoch": 6, "iter": 2500, "lr": 3e-05, "memory": 10004, "data_time": 0.00315, "loss_rpn_cls": 0.00729, "loss_rpn_bbox": 0.01414, "loss_cls": 0.06448, "acc": 97.371, "loss_bbox": 0.09273, "loss": 0.17864, "grad_norm": 1.40054, "time": 0.7034}
|
| 107 |
+
{"mode": "train", "epoch": 6, "iter": 3000, "lr": 3e-05, "memory": 10004, "data_time": 0.00316, "loss_rpn_cls": 0.00631, "loss_rpn_bbox": 0.01256, "loss_cls": 0.06312, "acc": 97.39292, "loss_bbox": 0.09029, "loss": 0.17228, "grad_norm": Infinity, "time": 0.70377}
|
| 108 |
+
{"mode": "train", "epoch": 6, "iter": 3500, "lr": 3e-05, "memory": 10004, "data_time": 0.00306, "loss_rpn_cls": 0.00797, "loss_rpn_bbox": 0.01365, "loss_cls": 0.06694, "acc": 97.28818, "loss_bbox": 0.09198, "loss": 0.18055, "grad_norm": 1.46973, "time": 0.70329}
|
| 109 |
+
{"mode": "train", "epoch": 6, "iter": 4000, "lr": 3e-05, "memory": 10004, "data_time": 0.00315, "loss_rpn_cls": 0.0067, "loss_rpn_bbox": 0.01313, "loss_cls": 0.06606, "acc": 97.29551, "loss_bbox": 0.09322, "loss": 0.1791, "grad_norm": 1.4166, "time": 0.70342}
|
| 110 |
+
{"mode": "train", "epoch": 6, "iter": 4500, "lr": 3e-05, "memory": 10004, "data_time": 0.00317, "loss_rpn_cls": 0.00608, "loss_rpn_bbox": 0.01367, "loss_cls": 0.06253, "acc": 97.42217, "loss_bbox": 0.09013, "loss": 0.17241, "grad_norm": 1.3777, "time": 0.70328}
|
| 111 |
+
{"mode": "train", "epoch": 6, "iter": 5000, "lr": 3e-05, "memory": 10004, "data_time": 0.00316, "loss_rpn_cls": 0.00722, "loss_rpn_bbox": 0.01366, "loss_cls": 0.06422, "acc": 97.36304, "loss_bbox": 0.08906, "loss": 0.17416, "grad_norm": NaN, "time": 0.70365}
|
| 112 |
+
{"mode": "train", "epoch": 6, "iter": 5500, "lr": 3e-05, "memory": 10004, "data_time": 0.00313, "loss_rpn_cls": 0.00645, "loss_rpn_bbox": 0.01318, "loss_cls": 0.06122, "acc": 97.50303, "loss_bbox": 0.0903, "loss": 0.17115, "grad_norm": 1.33215, "time": 0.70295}
|
| 113 |
+
{"mode": "train", "epoch": 6, "iter": 6000, "lr": 3e-05, "memory": 10004, "data_time": 0.00314, "loss_rpn_cls": 0.00727, "loss_rpn_bbox": 0.01344, "loss_cls": 0.06388, "acc": 97.36821, "loss_bbox": 0.0907, "loss": 0.17529, "grad_norm": 1.3809, "time": 0.70382}
|
| 114 |
+
{"mode": "train", "epoch": 6, "iter": 6500, "lr": 3e-05, "memory": 10004, "data_time": 0.00319, "loss_rpn_cls": 0.00633, "loss_rpn_bbox": 0.01213, "loss_cls": 0.06113, "acc": 97.52168, "loss_bbox": 0.08842, "loss": 0.16801, "grad_norm": 1.34015, "time": 0.70386}
|
| 115 |
+
{"mode": "train", "epoch": 6, "iter": 7000, "lr": 3e-05, "memory": 10004, "data_time": 0.0031, "loss_rpn_cls": 0.00712, "loss_rpn_bbox": 0.01389, "loss_cls": 0.06616, "acc": 97.30811, "loss_bbox": 0.09317, "loss": 0.18034, "grad_norm": 1.39726, "time": 0.70336}
|
| 116 |
+
{"mode": "train", "epoch": 6, "iter": 7500, "lr": 3e-05, "memory": 10004, "data_time": 0.00314, "loss_rpn_cls": 0.00669, "loss_rpn_bbox": 0.01386, "loss_cls": 0.06566, "acc": 97.33442, "loss_bbox": 0.09314, "loss": 0.17935, "grad_norm": 1.43897, "time": 0.70315}
|
| 117 |
+
{"mode": "train", "epoch": 6, "iter": 8000, "lr": 3e-05, "memory": 10004, "data_time": 0.00315, "loss_rpn_cls": 0.00684, "loss_rpn_bbox": 0.01376, "loss_cls": 0.06401, "acc": 97.40571, "loss_bbox": 0.09196, "loss": 0.17657, "grad_norm": 1.32909, "time": 0.70396}
|
| 118 |
+
{"mode": "train", "epoch": 6, "iter": 8500, "lr": 3e-05, "memory": 10004, "data_time": 0.00318, "loss_rpn_cls": 0.0066, "loss_rpn_bbox": 0.01342, "loss_cls": 0.06328, "acc": 97.38291, "loss_bbox": 0.09124, "loss": 0.17454, "grad_norm": 1.38079, "time": 0.70355}
|
| 119 |
+
{"mode": "train", "epoch": 6, "iter": 9000, "lr": 3e-05, "memory": 10004, "data_time": 0.00317, "loss_rpn_cls": 0.00644, "loss_rpn_bbox": 0.01311, "loss_cls": 0.06469, "acc": 97.32671, "loss_bbox": 0.09195, "loss": 0.1762, "grad_norm": 1.36109, "time": 0.7034}
|
| 120 |
+
{"mode": "train", "epoch": 6, "iter": 9500, "lr": 3e-05, "memory": 10004, "data_time": 0.00319, "loss_rpn_cls": 0.00624, "loss_rpn_bbox": 0.0122, "loss_cls": 0.06377, "acc": 97.36157, "loss_bbox": 0.09188, "loss": 0.17409, "grad_norm": 1.31857, "time": 0.70464}
|
| 121 |
+
{"mode": "val", "epoch": 6, "iter": 1068, "lr": 3e-05, "mAP": 0.63884}
|
| 122 |
+
{"mode": "train", "epoch": 7, "iter": 500, "lr": 3e-05, "memory": 10004, "data_time": 0.01027, "loss_rpn_cls": 0.00685, "loss_rpn_bbox": 0.01373, "loss_cls": 0.06395, "acc": 97.36973, "loss_bbox": 0.09241, "loss": 0.17694, "grad_norm": Infinity, "time": 0.71474}
|
| 123 |
+
{"mode": "train", "epoch": 7, "iter": 1000, "lr": 3e-05, "memory": 10004, "data_time": 0.00314, "loss_rpn_cls": 0.00616, "loss_rpn_bbox": 0.0136, "loss_cls": 0.06221, "acc": 97.42837, "loss_bbox": 0.09066, "loss": 0.17262, "grad_norm": 1.3114, "time": 0.70725}
|
| 124 |
+
{"mode": "train", "epoch": 7, "iter": 1500, "lr": 3e-05, "memory": 10004, "data_time": 0.00316, "loss_rpn_cls": 0.00643, "loss_rpn_bbox": 0.01377, "loss_cls": 0.06172, "acc": 97.43096, "loss_bbox": 0.08918, "loss": 0.1711, "grad_norm": 1.29066, "time": 0.70709}
|
| 125 |
+
{"mode": "train", "epoch": 7, "iter": 2000, "lr": 3e-05, "memory": 10004, "data_time": 0.00314, "loss_rpn_cls": 0.00601, "loss_rpn_bbox": 0.01277, "loss_cls": 0.06304, "acc": 97.40586, "loss_bbox": 0.08979, "loss": 0.1716, "grad_norm": 1.35276, "time": 0.70546}
|
| 126 |
+
{"mode": "train", "epoch": 7, "iter": 2500, "lr": 3e-05, "memory": 10004, "data_time": 0.00306, "loss_rpn_cls": 0.00642, "loss_rpn_bbox": 0.01393, "loss_cls": 0.06648, "acc": 97.26895, "loss_bbox": 0.09322, "loss": 0.18005, "grad_norm": 1.38566, "time": 0.70778}
|
| 127 |
+
{"mode": "train", "epoch": 7, "iter": 3000, "lr": 3e-05, "memory": 10004, "data_time": 0.00313, "loss_rpn_cls": 0.0062, "loss_rpn_bbox": 0.01256, "loss_cls": 0.06256, "acc": 97.41543, "loss_bbox": 0.08949, "loss": 0.17082, "grad_norm": NaN, "time": 0.70611}
|
| 128 |
+
{"mode": "train", "epoch": 7, "iter": 3500, "lr": 3e-05, "memory": 10004, "data_time": 0.00322, "loss_rpn_cls": 0.00676, "loss_rpn_bbox": 0.01293, "loss_cls": 0.06427, "acc": 97.3709, "loss_bbox": 0.09181, "loss": 0.17578, "grad_norm": 1.3366, "time": 0.70723}
|
| 129 |
+
{"mode": "train", "epoch": 7, "iter": 4000, "lr": 3e-05, "memory": 10004, "data_time": 0.00337, "loss_rpn_cls": 0.0057, "loss_rpn_bbox": 0.01296, "loss_cls": 0.06401, "acc": 97.37295, "loss_bbox": 0.0906, "loss": 0.17327, "grad_norm": 1.33023, "time": 0.70982}
|
| 130 |
+
{"mode": "train", "epoch": 7, "iter": 4500, "lr": 3e-05, "memory": 10004, "data_time": 0.00325, "loss_rpn_cls": 0.00674, "loss_rpn_bbox": 0.01322, "loss_cls": 0.06736, "acc": 97.21436, "loss_bbox": 0.09479, "loss": 0.18211, "grad_norm": 1.33832, "time": 0.70937}
|
| 131 |
+
{"mode": "train", "epoch": 7, "iter": 5000, "lr": 3e-05, "memory": 10004, "data_time": 0.00333, "loss_rpn_cls": 0.00679, "loss_rpn_bbox": 0.0143, "loss_cls": 0.06497, "acc": 97.30078, "loss_bbox": 0.09545, "loss": 0.18151, "grad_norm": 1.32336, "time": 0.70671}
|
| 132 |
+
{"mode": "train", "epoch": 7, "iter": 5500, "lr": 3e-05, "memory": 10004, "data_time": 0.00335, "loss_rpn_cls": 0.0067, "loss_rpn_bbox": 0.01376, "loss_cls": 0.06426, "acc": 97.34722, "loss_bbox": 0.09269, "loss": 0.17742, "grad_norm": Infinity, "time": 0.70309}
|
| 133 |
+
{"mode": "train", "epoch": 7, "iter": 6000, "lr": 3e-05, "memory": 10004, "data_time": 0.00344, "loss_rpn_cls": 0.0065, "loss_rpn_bbox": 0.01291, "loss_cls": 0.06326, "acc": 97.42749, "loss_bbox": 0.09275, "loss": 0.17542, "grad_norm": 1.2851, "time": 0.70416}
|
| 134 |
+
{"mode": "train", "epoch": 7, "iter": 6500, "lr": 3e-05, "memory": 10004, "data_time": 0.00351, "loss_rpn_cls": 0.00605, "loss_rpn_bbox": 0.01399, "loss_cls": 0.06299, "acc": 97.40537, "loss_bbox": 0.09415, "loss": 0.17718, "grad_norm": 1.34968, "time": 0.70305}
|
| 135 |
+
{"mode": "train", "epoch": 7, "iter": 7000, "lr": 3e-05, "memory": 10004, "data_time": 0.0035, "loss_rpn_cls": 0.00615, "loss_rpn_bbox": 0.01284, "loss_cls": 0.06222, "acc": 97.44932, "loss_bbox": 0.08895, "loss": 0.17016, "grad_norm": 1.2599, "time": 0.70376}
|
| 136 |
+
{"mode": "train", "epoch": 7, "iter": 7500, "lr": 3e-05, "memory": 10004, "data_time": 0.00345, "loss_rpn_cls": 0.00613, "loss_rpn_bbox": 0.01281, "loss_cls": 0.064, "acc": 97.34673, "loss_bbox": 0.0924, "loss": 0.17534, "grad_norm": 1.31366, "time": 0.70371}
|
| 137 |
+
{"mode": "train", "epoch": 7, "iter": 8000, "lr": 3e-05, "memory": 10004, "data_time": 0.00347, "loss_rpn_cls": 0.00594, "loss_rpn_bbox": 0.01243, "loss_cls": 0.06329, "acc": 97.40229, "loss_bbox": 0.08854, "loss": 0.17021, "grad_norm": NaN, "time": 0.70413}
|
| 138 |
+
{"mode": "train", "epoch": 7, "iter": 8500, "lr": 3e-05, "memory": 10004, "data_time": 0.00389, "loss_rpn_cls": 0.00602, "loss_rpn_bbox": 0.01294, "loss_cls": 0.06216, "acc": 97.44419, "loss_bbox": 0.09134, "loss": 0.17245, "grad_norm": 1.32243, "time": 0.7034}
|
| 139 |
+
{"mode": "train", "epoch": 7, "iter": 9000, "lr": 3e-05, "memory": 10004, "data_time": 0.00373, "loss_rpn_cls": 0.00637, "loss_rpn_bbox": 0.01273, "loss_cls": 0.06169, "acc": 97.45122, "loss_bbox": 0.08917, "loss": 0.16996, "grad_norm": 1.32909, "time": 0.70342}
|
| 140 |
+
{"mode": "train", "epoch": 7, "iter": 9500, "lr": 3e-05, "memory": 10004, "data_time": 0.00361, "loss_rpn_cls": 0.00604, "loss_rpn_bbox": 0.01278, "loss_cls": 0.06167, "acc": 97.45146, "loss_bbox": 0.08879, "loss": 0.16928, "grad_norm": 1.24395, "time": 0.7037}
|
| 141 |
+
{"mode": "val", "epoch": 7, "iter": 1068, "lr": 3e-05, "mAP": 0.65362}
|
| 142 |
+
{"mode": "train", "epoch": 8, "iter": 500, "lr": 3e-05, "memory": 10004, "data_time": 0.01034, "loss_rpn_cls": 0.00607, "loss_rpn_bbox": 0.01297, "loss_cls": 0.06371, "acc": 97.37153, "loss_bbox": 0.0903, "loss": 0.17305, "grad_norm": NaN, "time": 0.71107}
|
| 143 |
+
{"mode": "train", "epoch": 8, "iter": 1000, "lr": 3e-05, "memory": 10004, "data_time": 0.00319, "loss_rpn_cls": 0.00623, "loss_rpn_bbox": 0.013, "loss_cls": 0.06318, "acc": 97.40625, "loss_bbox": 0.0913, "loss": 0.17371, "grad_norm": 1.27415, "time": 0.70521}
|
| 144 |
+
{"mode": "train", "epoch": 8, "iter": 1500, "lr": 3e-05, "memory": 10004, "data_time": 0.00326, "loss_rpn_cls": 0.00542, "loss_rpn_bbox": 0.01337, "loss_cls": 0.0618, "acc": 97.41802, "loss_bbox": 0.08969, "loss": 0.17029, "grad_norm": 1.25077, "time": 0.70549}
|
| 145 |
+
{"mode": "train", "epoch": 8, "iter": 2000, "lr": 3e-05, "memory": 10004, "data_time": 0.00317, "loss_rpn_cls": 0.00589, "loss_rpn_bbox": 0.0127, "loss_cls": 0.06165, "acc": 97.46597, "loss_bbox": 0.0908, "loss": 0.17103, "grad_norm": 1.22051, "time": 0.70561}
|
| 146 |
+
{"mode": "train", "epoch": 8, "iter": 2500, "lr": 3e-05, "memory": 10004, "data_time": 0.0032, "loss_rpn_cls": 0.00561, "loss_rpn_bbox": 0.01292, "loss_cls": 0.06164, "acc": 97.45371, "loss_bbox": 0.09063, "loss": 0.1708, "grad_norm": 1.30841, "time": 0.70471}
|
| 147 |
+
{"mode": "train", "epoch": 8, "iter": 3000, "lr": 3e-05, "memory": 10004, "data_time": 0.0033, "loss_rpn_cls": 0.0059, "loss_rpn_bbox": 0.01316, "loss_cls": 0.06146, "acc": 97.45718, "loss_bbox": 0.08864, "loss": 0.16917, "grad_norm": 1.24525, "time": 0.70526}
|
| 148 |
+
{"mode": "train", "epoch": 8, "iter": 3500, "lr": 3e-05, "memory": 10004, "data_time": 0.00319, "loss_rpn_cls": 0.00577, "loss_rpn_bbox": 0.01213, "loss_cls": 0.06265, "acc": 97.44863, "loss_bbox": 0.09067, "loss": 0.17121, "grad_norm": 1.23977, "time": 0.70482}
|
| 149 |
+
{"mode": "train", "epoch": 8, "iter": 4000, "lr": 3e-05, "memory": 10004, "data_time": 0.00325, "loss_rpn_cls": 0.00628, "loss_rpn_bbox": 0.01313, "loss_cls": 0.06282, "acc": 97.3937, "loss_bbox": 0.09135, "loss": 0.17358, "grad_norm": 1.30069, "time": 0.70511}
|
| 150 |
+
{"mode": "train", "epoch": 8, "iter": 4500, "lr": 3e-05, "memory": 10004, "data_time": 0.00313, "loss_rpn_cls": 0.00556, "loss_rpn_bbox": 0.01266, "loss_cls": 0.06305, "acc": 97.40029, "loss_bbox": 0.09225, "loss": 0.17353, "grad_norm": 1.24952, "time": 0.705}
|
| 151 |
+
{"mode": "train", "epoch": 8, "iter": 5000, "lr": 3e-05, "memory": 10004, "data_time": 0.00318, "loss_rpn_cls": 0.0054, "loss_rpn_bbox": 0.01244, "loss_cls": 0.06059, "acc": 97.48159, "loss_bbox": 0.08863, "loss": 0.16706, "grad_norm": NaN, "time": 0.70445}
|
| 152 |
+
{"mode": "train", "epoch": 8, "iter": 5500, "lr": 3e-05, "memory": 10004, "data_time": 0.00312, "loss_rpn_cls": 0.00524, "loss_rpn_bbox": 0.01271, "loss_cls": 0.06255, "acc": 97.41436, "loss_bbox": 0.08984, "loss": 0.17035, "grad_norm": 1.21673, "time": 0.70431}
|
| 153 |
+
{"mode": "train", "epoch": 8, "iter": 6000, "lr": 3e-05, "memory": 10004, "data_time": 0.00317, "loss_rpn_cls": 0.0061, "loss_rpn_bbox": 0.0142, "loss_cls": 0.06329, "acc": 97.40269, "loss_bbox": 0.09341, "loss": 0.177, "grad_norm": 1.27723, "time": 0.70393}
|
| 154 |
+
{"mode": "train", "epoch": 8, "iter": 6500, "lr": 3e-05, "memory": 10004, "data_time": 0.00312, "loss_rpn_cls": 0.0055, "loss_rpn_bbox": 0.01249, "loss_cls": 0.06222, "acc": 97.40522, "loss_bbox": 0.09164, "loss": 0.17185, "grad_norm": 1.22069, "time": 0.70418}
|
| 155 |
+
{"mode": "train", "epoch": 8, "iter": 7000, "lr": 3e-05, "memory": 10004, "data_time": 0.00319, "loss_rpn_cls": 0.00561, "loss_rpn_bbox": 0.01308, "loss_cls": 0.06063, "acc": 97.50688, "loss_bbox": 0.09079, "loss": 0.17011, "grad_norm": NaN, "time": 0.70416}
|
| 156 |
+
{"mode": "train", "epoch": 8, "iter": 7500, "lr": 3e-05, "memory": 10004, "data_time": 0.00312, "loss_rpn_cls": 0.00559, "loss_rpn_bbox": 0.01216, "loss_cls": 0.06069, "acc": 97.48203, "loss_bbox": 0.0894, "loss": 0.16784, "grad_norm": 1.22819, "time": 0.70464}
|
| 157 |
+
{"mode": "train", "epoch": 8, "iter": 8000, "lr": 3e-05, "memory": 10004, "data_time": 0.00309, "loss_rpn_cls": 0.00609, "loss_rpn_bbox": 0.01369, "loss_cls": 0.06292, "acc": 97.39512, "loss_bbox": 0.09196, "loss": 0.17466, "grad_norm": 1.28696, "time": 0.7039}
|
| 158 |
+
{"mode": "train", "epoch": 8, "iter": 8500, "lr": 3e-05, "memory": 10004, "data_time": 0.00312, "loss_rpn_cls": 0.00633, "loss_rpn_bbox": 0.01368, "loss_cls": 0.06267, "acc": 97.43198, "loss_bbox": 0.0905, "loss": 0.17319, "grad_norm": 1.22907, "time": 0.70491}
|
| 159 |
+
{"mode": "train", "epoch": 8, "iter": 9000, "lr": 3e-05, "memory": 10004, "data_time": 0.00314, "loss_rpn_cls": 0.00643, "loss_rpn_bbox": 0.01311, "loss_cls": 0.06295, "acc": 97.40654, "loss_bbox": 0.09002, "loss": 0.17251, "grad_norm": 1.24505, "time": 0.70456}
|
| 160 |
+
{"mode": "train", "epoch": 8, "iter": 9500, "lr": 3e-05, "memory": 10004, "data_time": 0.00316, "loss_rpn_cls": 0.00613, "loss_rpn_bbox": 0.01365, "loss_cls": 0.0627, "acc": 97.40762, "loss_bbox": 0.08999, "loss": 0.17247, "grad_norm": 1.26403, "time": 0.70388}
|
| 161 |
+
{"mode": "val", "epoch": 8, "iter": 1068, "lr": 3e-05, "mAP": 0.68028}
|
| 162 |
+
{"mode": "train", "epoch": 9, "iter": 500, "lr": 0.0, "memory": 10004, "data_time": 0.0096, "loss_rpn_cls": 0.00531, "loss_rpn_bbox": 0.01194, "loss_cls": 0.05948, "acc": 97.52534, "loss_bbox": 0.0875, "loss": 0.16423, "grad_norm": 1.13119, "time": 0.71111}
|
| 163 |
+
{"mode": "train", "epoch": 9, "iter": 1000, "lr": 0.0, "memory": 10004, "data_time": 0.00315, "loss_rpn_cls": 0.00485, "loss_rpn_bbox": 0.01133, "loss_cls": 0.0576, "acc": 97.61523, "loss_bbox": 0.08432, "loss": 0.1581, "grad_norm": NaN, "time": 0.70328}
|
| 164 |
+
{"mode": "train", "epoch": 9, "iter": 1500, "lr": 0.0, "memory": 10004, "data_time": 0.00318, "loss_rpn_cls": 0.00479, "loss_rpn_bbox": 0.01147, "loss_cls": 0.05719, "acc": 97.63667, "loss_bbox": 0.08564, "loss": 0.15909, "grad_norm": 1.10588, "time": 0.70418}
|
| 165 |
+
{"mode": "train", "epoch": 9, "iter": 2000, "lr": 0.0, "memory": 10004, "data_time": 0.00328, "loss_rpn_cls": 0.00445, "loss_rpn_bbox": 0.01102, "loss_cls": 0.05777, "acc": 97.60742, "loss_bbox": 0.08281, "loss": 0.15605, "grad_norm": 1.10485, "time": 0.70377}
|
| 166 |
+
{"mode": "train", "epoch": 9, "iter": 2500, "lr": 0.0, "memory": 10004, "data_time": 0.00323, "loss_rpn_cls": 0.004, "loss_rpn_bbox": 0.0104, "loss_cls": 0.05642, "acc": 97.65225, "loss_bbox": 0.08345, "loss": 0.15427, "grad_norm": 1.08479, "time": 0.70413}
|
| 167 |
+
{"mode": "train", "epoch": 9, "iter": 3000, "lr": 0.0, "memory": 10009, "data_time": 0.00321, "loss_rpn_cls": 0.0043, "loss_rpn_bbox": 0.01177, "loss_cls": 0.05633, "acc": 97.63774, "loss_bbox": 0.08429, "loss": 0.15669, "grad_norm": 1.0901, "time": 0.70429}
|
| 168 |
+
{"mode": "train", "epoch": 9, "iter": 3500, "lr": 0.0, "memory": 10009, "data_time": 0.00336, "loss_rpn_cls": 0.00444, "loss_rpn_bbox": 0.01165, "loss_cls": 0.05807, "acc": 97.59429, "loss_bbox": 0.08766, "loss": 0.16183, "grad_norm": 1.09523, "time": 0.70379}
|
| 169 |
+
{"mode": "train", "epoch": 9, "iter": 4000, "lr": 0.0, "memory": 10009, "data_time": 0.00323, "loss_rpn_cls": 0.00416, "loss_rpn_bbox": 0.01136, "loss_cls": 0.05576, "acc": 97.6645, "loss_bbox": 0.08266, "loss": 0.15395, "grad_norm": 1.08236, "time": 0.70433}
|
| 170 |
+
{"mode": "train", "epoch": 9, "iter": 4500, "lr": 0.0, "memory": 10009, "data_time": 0.00327, "loss_rpn_cls": 0.00403, "loss_rpn_bbox": 0.01108, "loss_cls": 0.05465, "acc": 97.73335, "loss_bbox": 0.0813, "loss": 0.15106, "grad_norm": 1.09286, "time": 0.70361}
|
| 171 |
+
{"mode": "train", "epoch": 9, "iter": 5000, "lr": 0.0, "memory": 10009, "data_time": 0.00312, "loss_rpn_cls": 0.00432, "loss_rpn_bbox": 0.01168, "loss_cls": 0.05585, "acc": 97.68145, "loss_bbox": 0.08638, "loss": 0.15823, "grad_norm": 1.08831, "time": 0.70429}
|
| 172 |
+
{"mode": "train", "epoch": 9, "iter": 5500, "lr": 0.0, "memory": 10009, "data_time": 0.00318, "loss_rpn_cls": 0.00476, "loss_rpn_bbox": 0.01131, "loss_cls": 0.05941, "acc": 97.49692, "loss_bbox": 0.08783, "loss": 0.16332, "grad_norm": NaN, "time": 0.7037}
|
| 173 |
+
{"mode": "train", "epoch": 9, "iter": 6000, "lr": 0.0, "memory": 10009, "data_time": 0.00317, "loss_rpn_cls": 0.00435, "loss_rpn_bbox": 0.01112, "loss_cls": 0.05707, "acc": 97.60728, "loss_bbox": 0.08462, "loss": 0.15716, "grad_norm": 1.09507, "time": 0.70447}
|
| 174 |
+
{"mode": "train", "epoch": 9, "iter": 6500, "lr": 0.0, "memory": 10009, "data_time": 0.0032, "loss_rpn_cls": 0.00469, "loss_rpn_bbox": 0.01169, "loss_cls": 0.05773, "acc": 97.60688, "loss_bbox": 0.08611, "loss": 0.16022, "grad_norm": 1.09424, "time": 0.70396}
|
| 175 |
+
{"mode": "train", "epoch": 9, "iter": 7000, "lr": 0.0, "memory": 10009, "data_time": 0.00312, "loss_rpn_cls": 0.00392, "loss_rpn_bbox": 0.01088, "loss_cls": 0.05582, "acc": 97.67026, "loss_bbox": 0.08469, "loss": 0.1553, "grad_norm": 1.08446, "time": 0.70505}
|
| 176 |
+
{"mode": "train", "epoch": 9, "iter": 7500, "lr": 0.0, "memory": 10009, "data_time": 0.00306, "loss_rpn_cls": 0.00398, "loss_rpn_bbox": 0.01162, "loss_cls": 0.05699, "acc": 97.62109, "loss_bbox": 0.08521, "loss": 0.15779, "grad_norm": NaN, "time": 0.7043}
|
| 177 |
+
{"mode": "train", "epoch": 9, "iter": 8000, "lr": 0.0, "memory": 10009, "data_time": 0.00314, "loss_rpn_cls": 0.00398, "loss_rpn_bbox": 0.01094, "loss_cls": 0.0542, "acc": 97.7332, "loss_bbox": 0.08182, "loss": 0.15094, "grad_norm": 1.06138, "time": 0.70418}
|
| 178 |
+
{"mode": "train", "epoch": 9, "iter": 8500, "lr": 0.0, "memory": 10009, "data_time": 0.00307, "loss_rpn_cls": 0.00364, "loss_rpn_bbox": 0.0109, "loss_cls": 0.05573, "acc": 97.67026, "loss_bbox": 0.08424, "loss": 0.15451, "grad_norm": 1.08848, "time": 0.70459}
|
| 179 |
+
{"mode": "train", "epoch": 9, "iter": 9000, "lr": 0.0, "memory": 10009, "data_time": 0.00303, "loss_rpn_cls": 0.00388, "loss_rpn_bbox": 0.01061, "loss_cls": 0.05564, "acc": 97.67539, "loss_bbox": 0.08523, "loss": 0.15535, "grad_norm": 1.07302, "time": 0.70385}
|
| 180 |
+
{"mode": "train", "epoch": 9, "iter": 9500, "lr": 0.0, "memory": 10009, "data_time": 0.00317, "loss_rpn_cls": 0.00389, "loss_rpn_bbox": 0.01115, "loss_cls": 0.05628, "acc": 97.64321, "loss_bbox": 0.08461, "loss": 0.15593, "grad_norm": NaN, "time": 0.70431}
|
| 181 |
+
{"mode": "val", "epoch": 9, "iter": 1068, "lr": 0.0, "mAP": 0.70837}
|
| 182 |
+
{"mode": "train", "epoch": 10, "iter": 500, "lr": 0.0, "memory": 10009, "data_time": 0.01063, "loss_rpn_cls": 0.00395, "loss_rpn_bbox": 0.01082, "loss_cls": 0.05517, "acc": 97.72305, "loss_bbox": 0.08164, "loss": 0.15158, "grad_norm": NaN, "time": 0.71268}
|
| 183 |
+
{"mode": "train", "epoch": 10, "iter": 1000, "lr": 0.0, "memory": 10009, "data_time": 0.00316, "loss_rpn_cls": 0.00342, "loss_rpn_bbox": 0.0106, "loss_cls": 0.05366, "acc": 97.74199, "loss_bbox": 0.08213, "loss": 0.1498, "grad_norm": 1.07341, "time": 0.70521}
|
| 184 |
+
{"mode": "train", "epoch": 10, "iter": 1500, "lr": 0.0, "memory": 10009, "data_time": 0.00305, "loss_rpn_cls": 0.00408, "loss_rpn_bbox": 0.01086, "loss_cls": 0.05396, "acc": 97.72798, "loss_bbox": 0.08246, "loss": 0.15137, "grad_norm": 1.08158, "time": 0.70519}
|
| 185 |
+
{"mode": "train", "epoch": 10, "iter": 2000, "lr": 0.0, "memory": 10009, "data_time": 0.00314, "loss_rpn_cls": 0.00389, "loss_rpn_bbox": 0.01112, "loss_cls": 0.05468, "acc": 97.70991, "loss_bbox": 0.08288, "loss": 0.15256, "grad_norm": 1.09361, "time": 0.70513}
|
| 186 |
+
{"mode": "train", "epoch": 10, "iter": 2500, "lr": 0.0, "memory": 10009, "data_time": 0.00309, "loss_rpn_cls": 0.00388, "loss_rpn_bbox": 0.01121, "loss_cls": 0.05566, "acc": 97.68081, "loss_bbox": 0.08443, "loss": 0.15519, "grad_norm": 1.08328, "time": 0.70546}
|
| 187 |
+
{"mode": "train", "epoch": 10, "iter": 3000, "lr": 0.0, "memory": 10009, "data_time": 0.0031, "loss_rpn_cls": 0.004, "loss_rpn_bbox": 0.01074, "loss_cls": 0.05571, "acc": 97.67822, "loss_bbox": 0.08369, "loss": 0.15414, "grad_norm": 1.10646, "time": 0.70534}
|
| 188 |
+
{"mode": "train", "epoch": 10, "iter": 3500, "lr": 0.0, "memory": 10009, "data_time": 0.00336, "loss_rpn_cls": 0.00415, "loss_rpn_bbox": 0.01122, "loss_cls": 0.05643, "acc": 97.65176, "loss_bbox": 0.0838, "loss": 0.1556, "grad_norm": 1.09534, "time": 0.70596}
|
| 189 |
+
{"mode": "train", "epoch": 10, "iter": 4000, "lr": 0.0, "memory": 10009, "data_time": 0.0029, "loss_rpn_cls": 0.00371, "loss_rpn_bbox": 0.01119, "loss_cls": 0.05531, "acc": 97.68486, "loss_bbox": 0.08163, "loss": 0.15184, "grad_norm": 1.10048, "time": 0.70512}
|
| 190 |
+
{"mode": "train", "epoch": 10, "iter": 4500, "lr": 0.0, "memory": 10009, "data_time": 0.00289, "loss_rpn_cls": 0.00382, "loss_rpn_bbox": 0.01138, "loss_cls": 0.05524, "acc": 97.67988, "loss_bbox": 0.08332, "loss": 0.15375, "grad_norm": NaN, "time": 0.70496}
|
| 191 |
+
{"mode": "train", "epoch": 10, "iter": 5000, "lr": 0.0, "memory": 10009, "data_time": 0.00288, "loss_rpn_cls": 0.00357, "loss_rpn_bbox": 0.01085, "loss_cls": 0.05408, "acc": 97.7229, "loss_bbox": 0.08338, "loss": 0.15188, "grad_norm": 1.10658, "time": 0.70486}
|
| 192 |
+
{"mode": "train", "epoch": 10, "iter": 5500, "lr": 0.0, "memory": 10009, "data_time": 0.00301, "loss_rpn_cls": 0.00427, "loss_rpn_bbox": 0.0111, "loss_cls": 0.05591, "acc": 97.66392, "loss_bbox": 0.08524, "loss": 0.15652, "grad_norm": 1.10742, "time": 0.70545}
|
| 193 |
+
{"mode": "train", "epoch": 10, "iter": 6000, "lr": 0.0, "memory": 10009, "data_time": 0.00293, "loss_rpn_cls": 0.00393, "loss_rpn_bbox": 0.01117, "loss_cls": 0.05514, "acc": 97.68887, "loss_bbox": 0.08468, "loss": 0.15491, "grad_norm": 1.11964, "time": 0.7053}
|
| 194 |
+
{"mode": "train", "epoch": 10, "iter": 6500, "lr": 0.0, "memory": 10009, "data_time": 0.00299, "loss_rpn_cls": 0.00376, "loss_rpn_bbox": 0.01106, "loss_cls": 0.05526, "acc": 97.66675, "loss_bbox": 0.08385, "loss": 0.15394, "grad_norm": NaN, "time": 0.70445}
|
| 195 |
+
{"mode": "train", "epoch": 10, "iter": 7000, "lr": 0.0, "memory": 10009, "data_time": 0.00306, "loss_rpn_cls": 0.00336, "loss_rpn_bbox": 0.01008, "loss_cls": 0.05288, "acc": 97.79194, "loss_bbox": 0.08038, "loss": 0.1467, "grad_norm": 1.06805, "time": 0.70536}
|
| 196 |
+
{"mode": "train", "epoch": 10, "iter": 7500, "lr": 0.0, "memory": 10009, "data_time": 0.00301, "loss_rpn_cls": 0.00388, "loss_rpn_bbox": 0.01073, "loss_cls": 0.05486, "acc": 97.67368, "loss_bbox": 0.08336, "loss": 0.15283, "grad_norm": 1.10242, "time": 0.70423}
|
| 197 |
+
{"mode": "train", "epoch": 10, "iter": 8000, "lr": 0.0, "memory": 10009, "data_time": 0.00298, "loss_rpn_cls": 0.00389, "loss_rpn_bbox": 0.01098, "loss_cls": 0.05431, "acc": 97.74551, "loss_bbox": 0.08222, "loss": 0.15141, "grad_norm": 1.10702, "time": 0.70494}
|
| 198 |
+
{"mode": "train", "epoch": 10, "iter": 8500, "lr": 0.0, "memory": 10009, "data_time": 0.00306, "loss_rpn_cls": 0.00372, "loss_rpn_bbox": 0.01069, "loss_cls": 0.0551, "acc": 97.67173, "loss_bbox": 0.08371, "loss": 0.15322, "grad_norm": Infinity, "time": 0.70463}
|
| 199 |
+
{"mode": "train", "epoch": 10, "iter": 9000, "lr": 0.0, "memory": 10009, "data_time": 0.00307, "loss_rpn_cls": 0.00388, "loss_rpn_bbox": 0.00996, "loss_cls": 0.05326, "acc": 97.77632, "loss_bbox": 0.08341, "loss": 0.15051, "grad_norm": NaN, "time": 0.70506}
|
| 200 |
+
{"mode": "train", "epoch": 10, "iter": 9500, "lr": 0.0, "memory": 10009, "data_time": 0.00305, "loss_rpn_cls": 0.00339, "loss_rpn_bbox": 0.01055, "loss_cls": 0.05413, "acc": 97.7187, "loss_bbox": 0.08359, "loss": 0.15166, "grad_norm": 1.09676, "time": 0.70465}
|
| 201 |
+
{"mode": "val", "epoch": 10, "iter": 1068, "lr": 0.0, "mAP": 0.71806}
|
| 202 |
+
{"mode": "train", "epoch": 11, "iter": 500, "lr": 0.0, "memory": 10009, "data_time": 0.00993, "loss_rpn_cls": 0.00358, "loss_rpn_bbox": 0.01072, "loss_cls": 0.05342, "acc": 97.76577, "loss_bbox": 0.08256, "loss": 0.15029, "grad_norm": 1.08447, "time": 0.71147}
|
| 203 |
+
{"mode": "train", "epoch": 11, "iter": 1000, "lr": 0.0, "memory": 10009, "data_time": 0.00317, "loss_rpn_cls": 0.00364, "loss_rpn_bbox": 0.01064, "loss_cls": 0.05456, "acc": 97.71826, "loss_bbox": 0.08111, "loss": 0.14995, "grad_norm": 1.09291, "time": 0.70473}
|
| 204 |
+
{"mode": "train", "epoch": 11, "iter": 1500, "lr": 0.0, "memory": 10009, "data_time": 0.00302, "loss_rpn_cls": 0.00372, "loss_rpn_bbox": 0.01046, "loss_cls": 0.05177, "acc": 97.85234, "loss_bbox": 0.07964, "loss": 0.14559, "grad_norm": 1.06566, "time": 0.70538}
|
| 205 |
+
{"mode": "train", "epoch": 11, "iter": 2000, "lr": 0.0, "memory": 10009, "data_time": 0.0031, "loss_rpn_cls": 0.00351, "loss_rpn_bbox": 0.01054, "loss_cls": 0.05183, "acc": 97.8043, "loss_bbox": 0.07928, "loss": 0.14516, "grad_norm": 1.07884, "time": 0.70506}
|
| 206 |
+
{"mode": "train", "epoch": 11, "iter": 2500, "lr": 0.0, "memory": 10009, "data_time": 0.00306, "loss_rpn_cls": 0.00432, "loss_rpn_bbox": 0.01178, "loss_cls": 0.05501, "acc": 97.68223, "loss_bbox": 0.08295, "loss": 0.15406, "grad_norm": Infinity, "time": 0.7046}
|
| 207 |
+
{"mode": "train", "epoch": 11, "iter": 3000, "lr": 0.0, "memory": 10009, "data_time": 0.00302, "loss_rpn_cls": 0.00349, "loss_rpn_bbox": 0.0108, "loss_cls": 0.05439, "acc": 97.71929, "loss_bbox": 0.08185, "loss": 0.15053, "grad_norm": 1.11171, "time": 0.70499}
|
| 208 |
+
{"mode": "train", "epoch": 11, "iter": 3500, "lr": 0.0, "memory": 10009, "data_time": 0.00308, "loss_rpn_cls": 0.0038, "loss_rpn_bbox": 0.01078, "loss_cls": 0.05348, "acc": 97.76387, "loss_bbox": 0.08163, "loss": 0.14969, "grad_norm": 1.10862, "time": 0.70411}
|
| 209 |
+
{"mode": "train", "epoch": 11, "iter": 4000, "lr": 0.0, "memory": 10009, "data_time": 0.00311, "loss_rpn_cls": 0.00364, "loss_rpn_bbox": 0.01079, "loss_cls": 0.05264, "acc": 97.80186, "loss_bbox": 0.08219, "loss": 0.14924, "grad_norm": 1.12278, "time": 0.70511}
|
| 210 |
+
{"mode": "train", "epoch": 11, "iter": 4500, "lr": 0.0, "memory": 10009, "data_time": 0.0031, "loss_rpn_cls": 0.00357, "loss_rpn_bbox": 0.01058, "loss_cls": 0.05312, "acc": 97.77192, "loss_bbox": 0.082, "loss": 0.14927, "grad_norm": 1.10774, "time": 0.70475}
|
| 211 |
+
{"mode": "train", "epoch": 11, "iter": 5000, "lr": 0.0, "memory": 10009, "data_time": 0.00299, "loss_rpn_cls": 0.00374, "loss_rpn_bbox": 0.01074, "loss_cls": 0.05332, "acc": 97.76885, "loss_bbox": 0.0818, "loss": 0.14959, "grad_norm": 1.14153, "time": 0.705}
|
| 212 |
+
{"mode": "train", "epoch": 11, "iter": 5500, "lr": 0.0, "memory": 10009, "data_time": 0.00308, "loss_rpn_cls": 0.004, "loss_rpn_bbox": 0.01122, "loss_cls": 0.05388, "acc": 97.73979, "loss_bbox": 0.08105, "loss": 0.15015, "grad_norm": 1.09937, "time": 0.70434}
|
| 213 |
+
{"mode": "train", "epoch": 11, "iter": 6000, "lr": 0.0, "memory": 10009, "data_time": 0.00309, "loss_rpn_cls": 0.00319, "loss_rpn_bbox": 0.01023, "loss_cls": 0.05108, "acc": 97.86206, "loss_bbox": 0.07845, "loss": 0.14295, "grad_norm": 1.07846, "time": 0.70489}
|
| 214 |
+
{"mode": "train", "epoch": 11, "iter": 6500, "lr": 0.0, "memory": 10009, "data_time": 0.00306, "loss_rpn_cls": 0.00338, "loss_rpn_bbox": 0.01059, "loss_cls": 0.05251, "acc": 97.77788, "loss_bbox": 0.08275, "loss": 0.14923, "grad_norm": NaN, "time": 0.70467}
|
| 215 |
+
{"mode": "train", "epoch": 11, "iter": 7000, "lr": 0.0, "memory": 10009, "data_time": 0.00306, "loss_rpn_cls": 0.00346, "loss_rpn_bbox": 0.01052, "loss_cls": 0.05154, "acc": 97.83223, "loss_bbox": 0.07954, "loss": 0.14507, "grad_norm": 1.09093, "time": 0.70452}
|
| 216 |
+
{"mode": "train", "epoch": 11, "iter": 7500, "lr": 0.0, "memory": 10009, "data_time": 0.00302, "loss_rpn_cls": 0.00337, "loss_rpn_bbox": 0.01088, "loss_cls": 0.05198, "acc": 97.81436, "loss_bbox": 0.0806, "loss": 0.14683, "grad_norm": NaN, "time": 0.70503}
|
| 217 |
+
{"mode": "train", "epoch": 11, "iter": 8000, "lr": 0.0, "memory": 10009, "data_time": 0.00295, "loss_rpn_cls": 0.00312, "loss_rpn_bbox": 0.01076, "loss_cls": 0.05211, "acc": 97.78564, "loss_bbox": 0.08148, "loss": 0.14747, "grad_norm": 1.10249, "time": 0.70475}
|
| 218 |
+
{"mode": "train", "epoch": 11, "iter": 8500, "lr": 0.0, "memory": 10009, "data_time": 0.00302, "loss_rpn_cls": 0.0035, "loss_rpn_bbox": 0.01046, "loss_cls": 0.05512, "acc": 97.67129, "loss_bbox": 0.08317, "loss": 0.15225, "grad_norm": 1.15187, "time": 0.7053}
|
| 219 |
+
{"mode": "train", "epoch": 11, "iter": 9000, "lr": 0.0, "memory": 10009, "data_time": 0.00306, "loss_rpn_cls": 0.00366, "loss_rpn_bbox": 0.01094, "loss_cls": 0.05509, "acc": 97.69233, "loss_bbox": 0.08481, "loss": 0.15451, "grad_norm": 1.12321, "time": 0.70453}
|
| 220 |
+
{"mode": "train", "epoch": 11, "iter": 9500, "lr": 0.0, "memory": 10009, "data_time": 0.00305, "loss_rpn_cls": 0.00345, "loss_rpn_bbox": 0.01031, "loss_cls": 0.05243, "acc": 97.80938, "loss_bbox": 0.08172, "loss": 0.1479, "grad_norm": 1.11, "time": 0.70508}
|
| 221 |
+
{"mode": "val", "epoch": 11, "iter": 1068, "lr": 0.0, "mAP": 0.72434}
|
| 222 |
+
{"mode": "train", "epoch": 12, "iter": 500, "lr": 0.0, "memory": 10009, "data_time": 0.01007, "loss_rpn_cls": 0.00302, "loss_rpn_bbox": 0.01048, "loss_cls": 0.05212, "acc": 97.79941, "loss_bbox": 0.08335, "loss": 0.14897, "grad_norm": 1.12888, "time": 0.71227}
|
| 223 |
+
{"mode": "train", "epoch": 12, "iter": 1000, "lr": 0.0, "memory": 10009, "data_time": 0.0031, "loss_rpn_cls": 0.00344, "loss_rpn_bbox": 0.01089, "loss_cls": 0.0534, "acc": 97.74932, "loss_bbox": 0.08156, "loss": 0.1493, "grad_norm": 1.12052, "time": 0.70505}
|
| 224 |
+
{"mode": "train", "epoch": 12, "iter": 1500, "lr": 0.0, "memory": 10009, "data_time": 0.00305, "loss_rpn_cls": 0.00351, "loss_rpn_bbox": 0.01095, "loss_cls": 0.05265, "acc": 97.81016, "loss_bbox": 0.08192, "loss": 0.14903, "grad_norm": 1.12187, "time": 0.70543}
|
| 225 |
+
{"mode": "train", "epoch": 12, "iter": 2000, "lr": 0.0, "memory": 10009, "data_time": 0.00305, "loss_rpn_cls": 0.00358, "loss_rpn_bbox": 0.01068, "loss_cls": 0.05105, "acc": 97.85352, "loss_bbox": 0.08012, "loss": 0.14543, "grad_norm": 1.0949, "time": 0.70484}
|
| 226 |
+
{"mode": "train", "epoch": 12, "iter": 2500, "lr": 0.0, "memory": 10009, "data_time": 0.00309, "loss_rpn_cls": 0.00355, "loss_rpn_bbox": 0.01052, "loss_cls": 0.05276, "acc": 97.77173, "loss_bbox": 0.08316, "loss": 0.14999, "grad_norm": 1.11161, "time": 0.70482}
|
| 227 |
+
{"mode": "train", "epoch": 12, "iter": 3000, "lr": 0.0, "memory": 10009, "data_time": 0.00305, "loss_rpn_cls": 0.00347, "loss_rpn_bbox": 0.01055, "loss_cls": 0.05195, "acc": 97.81763, "loss_bbox": 0.08121, "loss": 0.14717, "grad_norm": 1.11311, "time": 0.70448}
|
| 228 |
+
{"mode": "train", "epoch": 12, "iter": 3500, "lr": 0.0, "memory": 10009, "data_time": 0.00304, "loss_rpn_cls": 0.00338, "loss_rpn_bbox": 0.01077, "loss_cls": 0.05245, "acc": 97.80835, "loss_bbox": 0.08213, "loss": 0.14873, "grad_norm": 1.10862, "time": 0.70473}
|
| 229 |
+
{"mode": "train", "epoch": 12, "iter": 4000, "lr": 0.0, "memory": 10009, "data_time": 0.00301, "loss_rpn_cls": 0.00279, "loss_rpn_bbox": 0.00975, "loss_cls": 0.04994, "acc": 97.91138, "loss_bbox": 0.07697, "loss": 0.13945, "grad_norm": 1.07978, "time": 0.70436}
|
| 230 |
+
{"mode": "train", "epoch": 12, "iter": 4500, "lr": 0.0, "memory": 10009, "data_time": 0.00302, "loss_rpn_cls": 0.0033, "loss_rpn_bbox": 0.00993, "loss_cls": 0.05284, "acc": 97.77275, "loss_bbox": 0.08139, "loss": 0.14746, "grad_norm": NaN, "time": 0.70436}
|
| 231 |
+
{"mode": "train", "epoch": 12, "iter": 5000, "lr": 0.0, "memory": 10009, "data_time": 0.00305, "loss_rpn_cls": 0.00336, "loss_rpn_bbox": 0.01, "loss_cls": 0.05166, "acc": 97.83843, "loss_bbox": 0.07827, "loss": 0.14329, "grad_norm": 1.08559, "time": 0.70442}
|
| 232 |
+
{"mode": "train", "epoch": 12, "iter": 5500, "lr": 0.0, "memory": 10009, "data_time": 0.00307, "loss_rpn_cls": 0.00333, "loss_rpn_bbox": 0.01073, "loss_cls": 0.05257, "acc": 97.78047, "loss_bbox": 0.08102, "loss": 0.14765, "grad_norm": 1.11994, "time": 0.70424}
|
| 233 |
+
{"mode": "train", "epoch": 12, "iter": 6000, "lr": 0.0, "memory": 10009, "data_time": 0.0031, "loss_rpn_cls": 0.00359, "loss_rpn_bbox": 0.01083, "loss_cls": 0.05184, "acc": 97.83369, "loss_bbox": 0.08052, "loss": 0.14678, "grad_norm": 1.11324, "time": 0.70492}
|
| 234 |
+
{"mode": "train", "epoch": 12, "iter": 6500, "lr": 0.0, "memory": 10009, "data_time": 0.00305, "loss_rpn_cls": 0.00362, "loss_rpn_bbox": 0.01003, "loss_cls": 0.05318, "acc": 97.76509, "loss_bbox": 0.08026, "loss": 0.14709, "grad_norm": NaN, "time": 0.70432}
|
| 235 |
+
{"mode": "train", "epoch": 12, "iter": 7000, "lr": 0.0, "memory": 10009, "data_time": 0.00307, "loss_rpn_cls": 0.00322, "loss_rpn_bbox": 0.01013, "loss_cls": 0.04982, "acc": 97.90566, "loss_bbox": 0.07814, "loss": 0.14131, "grad_norm": 1.07412, "time": 0.70462}
|
| 236 |
+
{"mode": "train", "epoch": 12, "iter": 7500, "lr": 0.0, "memory": 10009, "data_time": 0.0031, "loss_rpn_cls": 0.00361, "loss_rpn_bbox": 0.0098, "loss_cls": 0.05257, "acc": 97.78535, "loss_bbox": 0.07989, "loss": 0.14587, "grad_norm": 1.0987, "time": 0.70449}
|
| 237 |
+
{"mode": "train", "epoch": 12, "iter": 8000, "lr": 0.0, "memory": 10009, "data_time": 0.00306, "loss_rpn_cls": 0.00342, "loss_rpn_bbox": 0.01055, "loss_cls": 0.05234, "acc": 97.80322, "loss_bbox": 0.07813, "loss": 0.14443, "grad_norm": 1.10172, "time": 0.70486}
|
| 238 |
+
{"mode": "train", "epoch": 12, "iter": 8500, "lr": 0.0, "memory": 10009, "data_time": 0.00307, "loss_rpn_cls": 0.00337, "loss_rpn_bbox": 0.00982, "loss_cls": 0.05132, "acc": 97.85327, "loss_bbox": 0.07973, "loss": 0.14424, "grad_norm": 1.09227, "time": 0.70446}
|
| 239 |
+
{"mode": "train", "epoch": 12, "iter": 9000, "lr": 0.0, "memory": 10009, "data_time": 0.00303, "loss_rpn_cls": 0.00345, "loss_rpn_bbox": 0.01016, "loss_cls": 0.05223, "acc": 97.80249, "loss_bbox": 0.08096, "loss": 0.14681, "grad_norm": NaN, "time": 0.7044}
|
| 240 |
+
{"mode": "train", "epoch": 12, "iter": 9500, "lr": 0.0, "memory": 10009, "data_time": 0.00305, "loss_rpn_cls": 0.00329, "loss_rpn_bbox": 0.01069, "loss_cls": 0.0527, "acc": 97.78486, "loss_bbox": 0.08108, "loss": 0.14776, "grad_norm": 1.09702, "time": 0.70477}
|
| 241 |
+
{"mode": "val", "epoch": 12, "iter": 1068, "lr": 0.0, "mAP": 0.72313}
|
ckpts/vitp_rsar_orcnn_7231/epoch_12.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:54800c340acf4ef30dc51ef73df16fb9e39808d907a774aaedc596d657a50168
|
| 3 |
+
size 1366683565
|
ckpts/vitp_rsar_orcnn_7231/vitp_rsar_orcnn.py
ADDED
|
@@ -0,0 +1,300 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
dataset_type = 'RSARDataset'
|
| 2 |
+
data_root = '/liyuxuan/DATA/RSAR/'
|
| 3 |
+
angle_version = 'le90'
|
| 4 |
+
img_norm_cfg = dict(
|
| 5 |
+
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
| 6 |
+
train_pipeline = [
|
| 7 |
+
dict(type='LoadImageFromFile'),
|
| 8 |
+
dict(type='LoadAnnotations', with_bbox=True),
|
| 9 |
+
dict(type='RResize', img_scale=(800, 800), keep_ratio=False),
|
| 10 |
+
dict(
|
| 11 |
+
type='RRandomFlip',
|
| 12 |
+
flip_ratio=[0.25, 0.25, 0.25],
|
| 13 |
+
direction=['horizontal', 'vertical', 'diagonal'],
|
| 14 |
+
version='le90'),
|
| 15 |
+
dict(
|
| 16 |
+
type='PolyRandomRotate',
|
| 17 |
+
rotate_ratio=0.5,
|
| 18 |
+
angles_range=180,
|
| 19 |
+
auto_bound=False,
|
| 20 |
+
rect_classes=[3],
|
| 21 |
+
version='le90'),
|
| 22 |
+
dict(
|
| 23 |
+
type='Normalize',
|
| 24 |
+
mean=[123.675, 116.28, 103.53],
|
| 25 |
+
std=[58.395, 57.12, 57.375],
|
| 26 |
+
to_rgb=True),
|
| 27 |
+
dict(type='Pad', size_divisor=32),
|
| 28 |
+
dict(type='DefaultFormatBundle'),
|
| 29 |
+
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
|
| 30 |
+
]
|
| 31 |
+
test_pipeline = [
|
| 32 |
+
dict(type='LoadImageFromFile'),
|
| 33 |
+
dict(
|
| 34 |
+
type='MultiScaleFlipAug',
|
| 35 |
+
img_scale=(800, 800),
|
| 36 |
+
flip=False,
|
| 37 |
+
transforms=[
|
| 38 |
+
dict(type='RResize', img_scale=(800, 800), keep_ratio=False),
|
| 39 |
+
dict(
|
| 40 |
+
type='Normalize',
|
| 41 |
+
mean=[123.675, 116.28, 103.53],
|
| 42 |
+
std=[58.395, 57.12, 57.375],
|
| 43 |
+
to_rgb=True),
|
| 44 |
+
dict(type='Pad', size_divisor=32),
|
| 45 |
+
dict(type='DefaultFormatBundle'),
|
| 46 |
+
dict(type='Collect', keys=['img'])
|
| 47 |
+
])
|
| 48 |
+
]
|
| 49 |
+
data = dict(
|
| 50 |
+
samples_per_gpu=1,
|
| 51 |
+
workers_per_gpu=4,
|
| 52 |
+
train=dict(
|
| 53 |
+
type='RSARDataset',
|
| 54 |
+
ann_file='/liyuxuan/DATA/RSAR/train/annfiles/',
|
| 55 |
+
img_prefix='/liyuxuan/DATA/RSAR/train/images/',
|
| 56 |
+
pipeline=[
|
| 57 |
+
dict(type='LoadImageFromFile'),
|
| 58 |
+
dict(type='LoadAnnotations', with_bbox=True),
|
| 59 |
+
dict(type='RResize', img_scale=(800, 800), keep_ratio=False),
|
| 60 |
+
dict(
|
| 61 |
+
type='RRandomFlip',
|
| 62 |
+
flip_ratio=[0.25, 0.25, 0.25],
|
| 63 |
+
direction=['horizontal', 'vertical', 'diagonal'],
|
| 64 |
+
version='le90'),
|
| 65 |
+
dict(
|
| 66 |
+
type='PolyRandomRotate',
|
| 67 |
+
rotate_ratio=0.5,
|
| 68 |
+
angles_range=180,
|
| 69 |
+
auto_bound=False,
|
| 70 |
+
rect_classes=[3],
|
| 71 |
+
version='le90'),
|
| 72 |
+
dict(
|
| 73 |
+
type='Normalize',
|
| 74 |
+
mean=[123.675, 116.28, 103.53],
|
| 75 |
+
std=[58.395, 57.12, 57.375],
|
| 76 |
+
to_rgb=True),
|
| 77 |
+
dict(type='Pad', size_divisor=32),
|
| 78 |
+
dict(type='DefaultFormatBundle'),
|
| 79 |
+
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
|
| 80 |
+
],
|
| 81 |
+
version='le90'),
|
| 82 |
+
val=dict(
|
| 83 |
+
type='RSARDataset',
|
| 84 |
+
ann_file='/liyuxuan/DATA/RSAR/test/annfiles/',
|
| 85 |
+
img_prefix='/liyuxuan/DATA/RSAR/test/images/',
|
| 86 |
+
pipeline=[
|
| 87 |
+
dict(type='LoadImageFromFile'),
|
| 88 |
+
dict(
|
| 89 |
+
type='MultiScaleFlipAug',
|
| 90 |
+
img_scale=(800, 800),
|
| 91 |
+
flip=False,
|
| 92 |
+
transforms=[
|
| 93 |
+
dict(
|
| 94 |
+
type='RResize', img_scale=(800, 800),
|
| 95 |
+
keep_ratio=False),
|
| 96 |
+
dict(
|
| 97 |
+
type='Normalize',
|
| 98 |
+
mean=[123.675, 116.28, 103.53],
|
| 99 |
+
std=[58.395, 57.12, 57.375],
|
| 100 |
+
to_rgb=True),
|
| 101 |
+
dict(type='Pad', size_divisor=32),
|
| 102 |
+
dict(type='DefaultFormatBundle'),
|
| 103 |
+
dict(type='Collect', keys=['img'])
|
| 104 |
+
])
|
| 105 |
+
],
|
| 106 |
+
version='le90'),
|
| 107 |
+
test=dict(
|
| 108 |
+
type='RSARDataset',
|
| 109 |
+
ann_file='/liyuxuan/DATA/RSAR/test/images/',
|
| 110 |
+
img_prefix='/liyuxuan/DATA/RSAR/test/images/',
|
| 111 |
+
pipeline=[
|
| 112 |
+
dict(type='LoadImageFromFile'),
|
| 113 |
+
dict(
|
| 114 |
+
type='MultiScaleFlipAug',
|
| 115 |
+
img_scale=(800, 800),
|
| 116 |
+
flip=False,
|
| 117 |
+
transforms=[
|
| 118 |
+
dict(
|
| 119 |
+
type='RResize', img_scale=(800, 800),
|
| 120 |
+
keep_ratio=False),
|
| 121 |
+
dict(
|
| 122 |
+
type='Normalize',
|
| 123 |
+
mean=[123.675, 116.28, 103.53],
|
| 124 |
+
std=[58.395, 57.12, 57.375],
|
| 125 |
+
to_rgb=True),
|
| 126 |
+
dict(type='Pad', size_divisor=32),
|
| 127 |
+
dict(type='DefaultFormatBundle'),
|
| 128 |
+
dict(type='Collect', keys=['img'])
|
| 129 |
+
])
|
| 130 |
+
],
|
| 131 |
+
version='le90'))
|
| 132 |
+
evaluation = dict(interval=1, metric='mAP')
|
| 133 |
+
optimizer = dict(
|
| 134 |
+
type='AdamW',
|
| 135 |
+
lr=2.5e-05,
|
| 136 |
+
betas=(0.9, 0.999),
|
| 137 |
+
weight_decay=0.05,
|
| 138 |
+
constructor='InternViTAdapterLayerDecayOptimizerConstructor',
|
| 139 |
+
paramwise_cfg=dict(num_layers=24, layer_decay_rate=0.95))
|
| 140 |
+
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
|
| 141 |
+
lr_config = dict(
|
| 142 |
+
policy='step',
|
| 143 |
+
warmup='linear',
|
| 144 |
+
warmup_iters=500,
|
| 145 |
+
warmup_ratio=0.3333333333333333,
|
| 146 |
+
step=[8, 11])
|
| 147 |
+
runner = dict(type='EpochBasedRunner', max_epochs=12)
|
| 148 |
+
checkpoint_config = dict(interval=1, max_keep_ckpts=1)
|
| 149 |
+
log_config = dict(interval=500, hooks=[dict(type='TextLoggerHook')])
|
| 150 |
+
dist_params = dict(backend='nccl')
|
| 151 |
+
log_level = 'INFO'
|
| 152 |
+
load_from = None
|
| 153 |
+
resume_from = None
|
| 154 |
+
workflow = [('train', 1)]
|
| 155 |
+
opencv_num_threads = 0
|
| 156 |
+
mp_start_method = 'fork'
|
| 157 |
+
pretrained = 'pretrained/ViTP_ViT_L_300M_rs.safetensors'
|
| 158 |
+
norm_cfg = dict(type='LN', requires_grad=True)
|
| 159 |
+
model = dict(
|
| 160 |
+
type='OrientedRCNN',
|
| 161 |
+
backbone=dict(
|
| 162 |
+
type='InternViTAdapter',
|
| 163 |
+
pretrain_size=448,
|
| 164 |
+
img_size=800,
|
| 165 |
+
patch_size=16,
|
| 166 |
+
embed_dim=1024,
|
| 167 |
+
depth=24,
|
| 168 |
+
num_heads=16,
|
| 169 |
+
mlp_ratio=4.0,
|
| 170 |
+
drop_path_rate=0.1,
|
| 171 |
+
init_values=0.1,
|
| 172 |
+
with_cp=True,
|
| 173 |
+
use_flash_attn=True,
|
| 174 |
+
qk_normalization=False,
|
| 175 |
+
layerscale_force_fp32=False,
|
| 176 |
+
with_fpn=False,
|
| 177 |
+
freeze_vit=False,
|
| 178 |
+
use_final_norm=True,
|
| 179 |
+
interaction_indexes=[[0, 7], [8, 11], [12, 15], [16, 23]],
|
| 180 |
+
cffn_ratio=0.25,
|
| 181 |
+
deform_ratio=0.25,
|
| 182 |
+
qkv_bias=True,
|
| 183 |
+
norm_type='layer_norm',
|
| 184 |
+
pretrained='pretrained/ViTP_ViT_L_300M_rs.safetensors',
|
| 185 |
+
pretrained_type='full',
|
| 186 |
+
only_feat_out=True),
|
| 187 |
+
neck=dict(
|
| 188 |
+
type='SimpleFPN',
|
| 189 |
+
in_channels=[1024, 1024, 1024, 1024],
|
| 190 |
+
out_channels=256,
|
| 191 |
+
norm_cfg=dict(type='LN', requires_grad=True),
|
| 192 |
+
use_residual=False,
|
| 193 |
+
num_outs=5),
|
| 194 |
+
rpn_head=dict(
|
| 195 |
+
type='OrientedRPNHead',
|
| 196 |
+
in_channels=256,
|
| 197 |
+
feat_channels=256,
|
| 198 |
+
version='le90',
|
| 199 |
+
anchor_generator=dict(
|
| 200 |
+
type='AnchorGenerator',
|
| 201 |
+
scales=[8],
|
| 202 |
+
ratios=[0.5, 1.0, 2.0],
|
| 203 |
+
strides=[4, 8, 16, 32, 64]),
|
| 204 |
+
bbox_coder=dict(
|
| 205 |
+
type='MidpointOffsetCoder',
|
| 206 |
+
angle_range='le90',
|
| 207 |
+
target_means=[0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
|
| 208 |
+
target_stds=[1.0, 1.0, 1.0, 1.0, 0.5, 0.5]),
|
| 209 |
+
loss_cls=dict(
|
| 210 |
+
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
|
| 211 |
+
loss_bbox=dict(
|
| 212 |
+
type='SmoothL1Loss', beta=0.1111111111111111, loss_weight=1.0)),
|
| 213 |
+
roi_head=dict(
|
| 214 |
+
type='OrientedStandardRoIHead',
|
| 215 |
+
bbox_roi_extractor=dict(
|
| 216 |
+
type='RotatedSingleRoIExtractor',
|
| 217 |
+
roi_layer=dict(
|
| 218 |
+
type='RoIAlignRotated',
|
| 219 |
+
out_size=7,
|
| 220 |
+
sample_num=2,
|
| 221 |
+
clockwise=True),
|
| 222 |
+
out_channels=256,
|
| 223 |
+
featmap_strides=[4, 8, 16, 32]),
|
| 224 |
+
bbox_head=dict(
|
| 225 |
+
type='RotatedShared2FCBBoxHead',
|
| 226 |
+
in_channels=256,
|
| 227 |
+
fc_out_channels=1024,
|
| 228 |
+
roi_feat_size=7,
|
| 229 |
+
num_classes=6,
|
| 230 |
+
bbox_coder=dict(
|
| 231 |
+
type='DeltaXYWHAOBBoxCoder',
|
| 232 |
+
angle_range='le90',
|
| 233 |
+
norm_factor=None,
|
| 234 |
+
edge_swap=True,
|
| 235 |
+
proj_xy=True,
|
| 236 |
+
target_means=(0.0, 0.0, 0.0, 0.0, 0.0),
|
| 237 |
+
target_stds=(0.1, 0.1, 0.2, 0.2, 0.1)),
|
| 238 |
+
reg_class_agnostic=True,
|
| 239 |
+
loss_cls=dict(
|
| 240 |
+
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
|
| 241 |
+
loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))),
|
| 242 |
+
train_cfg=dict(
|
| 243 |
+
rpn=dict(
|
| 244 |
+
assigner=dict(
|
| 245 |
+
type='MaxIoUAssigner',
|
| 246 |
+
pos_iou_thr=0.7,
|
| 247 |
+
neg_iou_thr=0.3,
|
| 248 |
+
min_pos_iou=0.3,
|
| 249 |
+
match_low_quality=True,
|
| 250 |
+
gpu_assign_thr=1000,
|
| 251 |
+
ignore_iof_thr=-1),
|
| 252 |
+
sampler=dict(
|
| 253 |
+
type='RandomSampler',
|
| 254 |
+
num=256,
|
| 255 |
+
pos_fraction=0.5,
|
| 256 |
+
neg_pos_ub=-1,
|
| 257 |
+
add_gt_as_proposals=False),
|
| 258 |
+
allowed_border=0,
|
| 259 |
+
pos_weight=-1,
|
| 260 |
+
debug=False),
|
| 261 |
+
rpn_proposal=dict(
|
| 262 |
+
nms_pre=2000,
|
| 263 |
+
max_per_img=2000,
|
| 264 |
+
nms=dict(type='nms', iou_threshold=0.8),
|
| 265 |
+
min_bbox_size=0),
|
| 266 |
+
rcnn=dict(
|
| 267 |
+
assigner=dict(
|
| 268 |
+
type='MaxIoUAssigner',
|
| 269 |
+
pos_iou_thr=0.5,
|
| 270 |
+
neg_iou_thr=0.5,
|
| 271 |
+
min_pos_iou=0.5,
|
| 272 |
+
match_low_quality=False,
|
| 273 |
+
gpu_assign_thr=1000,
|
| 274 |
+
iou_calculator=dict(type='RBboxOverlaps2D'),
|
| 275 |
+
ignore_iof_thr=-1),
|
| 276 |
+
sampler=dict(
|
| 277 |
+
type='RRandomSampler',
|
| 278 |
+
num=512,
|
| 279 |
+
pos_fraction=0.25,
|
| 280 |
+
neg_pos_ub=-1,
|
| 281 |
+
add_gt_as_proposals=True),
|
| 282 |
+
pos_weight=-1,
|
| 283 |
+
debug=False)),
|
| 284 |
+
test_cfg=dict(
|
| 285 |
+
rpn=dict(
|
| 286 |
+
nms_pre=2000,
|
| 287 |
+
max_per_img=2000,
|
| 288 |
+
nms=dict(type='nms', iou_threshold=0.8),
|
| 289 |
+
min_bbox_size=0),
|
| 290 |
+
rcnn=dict(
|
| 291 |
+
nms_pre=2000,
|
| 292 |
+
min_bbox_size=0,
|
| 293 |
+
score_thr=0.05,
|
| 294 |
+
nms=dict(iou_thr=0.1),
|
| 295 |
+
max_per_img=2000)))
|
| 296 |
+
fp16 = dict(loss_scale=dict(init_scale=512))
|
| 297 |
+
work_dir = './work_dirs/vitp_rsar_orcnn'
|
| 298 |
+
auto_resume = False
|
| 299 |
+
gpu_ids = range(0, 8)
|
| 300 |
+
device = 'cuda'
|
ckpts/vitp_s2looking_upernet_6989/20250915_140502/20250915_140502.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ckpts/vitp_s2looking_upernet_6989/best_checkpoint.pth.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5ae8a0e0f2b75ebdaf146614481221186d01ba5efd4131edc8e4318c341bacd6
|
| 3 |
+
size 1522950309
|
ckpts/vitp_s2looking_upernet_6989/vitp_s2looking_upernet.py
ADDED
|
@@ -0,0 +1,360 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
crop_size = (
|
| 2 |
+
512,
|
| 3 |
+
512,
|
| 4 |
+
)
|
| 5 |
+
data_preprocessor = dict(
|
| 6 |
+
bgr_to_rgb=True,
|
| 7 |
+
mean=[
|
| 8 |
+
123.675,
|
| 9 |
+
116.28,
|
| 10 |
+
103.53,
|
| 11 |
+
123.675,
|
| 12 |
+
116.28,
|
| 13 |
+
103.53,
|
| 14 |
+
],
|
| 15 |
+
pad_val=0,
|
| 16 |
+
seg_pad_val=255,
|
| 17 |
+
size_divisor=32,
|
| 18 |
+
std=[
|
| 19 |
+
58.395,
|
| 20 |
+
57.12,
|
| 21 |
+
57.375,
|
| 22 |
+
58.395,
|
| 23 |
+
57.12,
|
| 24 |
+
57.375,
|
| 25 |
+
],
|
| 26 |
+
test_cfg=dict(size_divisor=32),
|
| 27 |
+
type='DualInputSegDataPreProcessor')
|
| 28 |
+
data_root = '/defaultShare/pubdata/remote_sensing/S2Looking'
|
| 29 |
+
dataset_type = 'S2Looking_Dataset'
|
| 30 |
+
default_hooks = dict(
|
| 31 |
+
checkpoint=dict(by_epoch=False, interval=12000, type='CheckpointHook'),
|
| 32 |
+
logger=dict(interval=50, log_metric_by_epoch=False, type='LoggerHook'),
|
| 33 |
+
param_scheduler=dict(type='ParamSchedulerHook'),
|
| 34 |
+
sampler_seed=dict(type='DistSamplerSeedHook'),
|
| 35 |
+
timer=dict(type='IterTimerHook'),
|
| 36 |
+
visualization=dict(interval=1, type='CDVisualizationHook'))
|
| 37 |
+
default_scope = 'opencd'
|
| 38 |
+
env_cfg = dict(
|
| 39 |
+
cudnn_benchmark=True,
|
| 40 |
+
dist_cfg=dict(backend='nccl'),
|
| 41 |
+
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0))
|
| 42 |
+
fp16 = dict(loss_scale=dict(init_scale=512))
|
| 43 |
+
img_ratios = [
|
| 44 |
+
0.75,
|
| 45 |
+
1.0,
|
| 46 |
+
1.25,
|
| 47 |
+
]
|
| 48 |
+
launcher = 'pytorch'
|
| 49 |
+
load_from = None
|
| 50 |
+
log_level = 'INFO'
|
| 51 |
+
log_processor = dict(by_epoch=False)
|
| 52 |
+
model = dict(
|
| 53 |
+
backbone=dict(
|
| 54 |
+
cffn_ratio=0.25,
|
| 55 |
+
deform_ratio=0.25,
|
| 56 |
+
depth=24,
|
| 57 |
+
drop_path_rate=0.1,
|
| 58 |
+
embed_dim=1024,
|
| 59 |
+
freeze_vit=False,
|
| 60 |
+
img_size=512,
|
| 61 |
+
init_values=0.1,
|
| 62 |
+
interaction_indexes=[
|
| 63 |
+
[
|
| 64 |
+
0,
|
| 65 |
+
7,
|
| 66 |
+
],
|
| 67 |
+
[
|
| 68 |
+
8,
|
| 69 |
+
11,
|
| 70 |
+
],
|
| 71 |
+
[
|
| 72 |
+
12,
|
| 73 |
+
15,
|
| 74 |
+
],
|
| 75 |
+
[
|
| 76 |
+
16,
|
| 77 |
+
23,
|
| 78 |
+
],
|
| 79 |
+
],
|
| 80 |
+
layerscale_force_fp32=False,
|
| 81 |
+
mlp_ratio=4.0,
|
| 82 |
+
norm_type='layer_norm',
|
| 83 |
+
num_heads=16,
|
| 84 |
+
patch_size=16,
|
| 85 |
+
pretrain_size=448,
|
| 86 |
+
pretrained='pretrained/ViTP_ViT_L_300M_rs.safetensors',
|
| 87 |
+
pretrained_type='full',
|
| 88 |
+
qk_normalization=False,
|
| 89 |
+
qkv_bias=True,
|
| 90 |
+
type='InternViTAdapter',
|
| 91 |
+
use_final_norm=True,
|
| 92 |
+
use_flash_attn=False,
|
| 93 |
+
with_cp=True,
|
| 94 |
+
with_fpn=False),
|
| 95 |
+
data_preprocessor=dict(
|
| 96 |
+
bgr_to_rgb=True,
|
| 97 |
+
mean=[
|
| 98 |
+
123.675,
|
| 99 |
+
116.28,
|
| 100 |
+
103.53,
|
| 101 |
+
123.675,
|
| 102 |
+
116.28,
|
| 103 |
+
103.53,
|
| 104 |
+
],
|
| 105 |
+
pad_val=0,
|
| 106 |
+
seg_pad_val=255,
|
| 107 |
+
size_divisor=32,
|
| 108 |
+
std=[
|
| 109 |
+
58.395,
|
| 110 |
+
57.12,
|
| 111 |
+
57.375,
|
| 112 |
+
58.395,
|
| 113 |
+
57.12,
|
| 114 |
+
57.375,
|
| 115 |
+
],
|
| 116 |
+
test_cfg=dict(size_divisor=32),
|
| 117 |
+
type='DualInputSegDataPreProcessor'),
|
| 118 |
+
decode_head=dict(
|
| 119 |
+
align_corners=False,
|
| 120 |
+
channels=512,
|
| 121 |
+
dropout_ratio=0.1,
|
| 122 |
+
in_channels=[
|
| 123 |
+
2048,
|
| 124 |
+
2048,
|
| 125 |
+
2048,
|
| 126 |
+
2048,
|
| 127 |
+
],
|
| 128 |
+
in_index=[
|
| 129 |
+
0,
|
| 130 |
+
1,
|
| 131 |
+
2,
|
| 132 |
+
3,
|
| 133 |
+
],
|
| 134 |
+
loss_decode=dict(
|
| 135 |
+
loss_weight=1.0, type='mmseg.CrossEntropyLoss', use_sigmoid=False),
|
| 136 |
+
norm_cfg=dict(requires_grad=True, type='SyncBN'),
|
| 137 |
+
num_classes=2,
|
| 138 |
+
pool_scales=(
|
| 139 |
+
1,
|
| 140 |
+
2,
|
| 141 |
+
3,
|
| 142 |
+
6,
|
| 143 |
+
),
|
| 144 |
+
type='mmseg.UPerHead'),
|
| 145 |
+
neck=dict(policy='concat', type='FeatureFusionNeck'),
|
| 146 |
+
test_cfg=dict(crop_size=(
|
| 147 |
+
512,
|
| 148 |
+
512,
|
| 149 |
+
), mode='slide', stride=(
|
| 150 |
+
256,
|
| 151 |
+
256,
|
| 152 |
+
)),
|
| 153 |
+
train_cfg=dict(),
|
| 154 |
+
type='SiamEncoderDecoder')
|
| 155 |
+
norm_cfg = dict(requires_grad=True, type='SyncBN')
|
| 156 |
+
optim_wrapper = dict(
|
| 157 |
+
clip_grad=None,
|
| 158 |
+
constructor='InternViTAdapterLayerDecayOptimizerConstructor',
|
| 159 |
+
optimizer=dict(
|
| 160 |
+
betas=(
|
| 161 |
+
0.9,
|
| 162 |
+
0.999,
|
| 163 |
+
), lr=2e-05, type='AdamW', weight_decay=0.05),
|
| 164 |
+
paramwise_cfg=dict(layer_decay_rate=0.9, num_layers=24),
|
| 165 |
+
type='OptimWrapper')
|
| 166 |
+
optimizer = dict(lr=0.01, momentum=0.9, type='SGD', weight_decay=0.0005)
|
| 167 |
+
param_scheduler = [
|
| 168 |
+
dict(
|
| 169 |
+
begin=0, by_epoch=False, end=1000, start_factor=1e-06,
|
| 170 |
+
type='LinearLR'),
|
| 171 |
+
dict(
|
| 172 |
+
begin=1000,
|
| 173 |
+
by_epoch=False,
|
| 174 |
+
end=120000,
|
| 175 |
+
eta_min=0.0,
|
| 176 |
+
power=1.0,
|
| 177 |
+
type='PolyLR'),
|
| 178 |
+
]
|
| 179 |
+
pretrained = 'pretrained/ViTP_ViT_L_300M_rs.safetensors'
|
| 180 |
+
resume = False
|
| 181 |
+
test_cfg = dict(type='TestLoop')
|
| 182 |
+
test_dataloader = dict(
|
| 183 |
+
batch_size=1,
|
| 184 |
+
dataset=dict(
|
| 185 |
+
data_prefix=dict(
|
| 186 |
+
img_path_from='test/Image1',
|
| 187 |
+
img_path_to='test/Image2',
|
| 188 |
+
seg_map_path='test/label'),
|
| 189 |
+
data_root='/defaultShare/pubdata/remote_sensing/S2Looking',
|
| 190 |
+
pipeline=[
|
| 191 |
+
dict(type='MultiImgLoadImageFromFile'),
|
| 192 |
+
dict(type='MultiImgLoadAnnotations'),
|
| 193 |
+
dict(type='MultiImgPackSegInputs'),
|
| 194 |
+
],
|
| 195 |
+
type='S2Looking_Dataset'),
|
| 196 |
+
num_workers=8,
|
| 197 |
+
persistent_workers=True,
|
| 198 |
+
sampler=dict(shuffle=False, type='DefaultSampler'))
|
| 199 |
+
test_evaluator = dict(
|
| 200 |
+
iou_metrics=[
|
| 201 |
+
'mFscore',
|
| 202 |
+
'mIoU',
|
| 203 |
+
], type='mmseg.IoUMetric')
|
| 204 |
+
test_pipeline = [
|
| 205 |
+
dict(type='MultiImgLoadImageFromFile'),
|
| 206 |
+
dict(type='MultiImgLoadAnnotations'),
|
| 207 |
+
dict(type='MultiImgPackSegInputs'),
|
| 208 |
+
]
|
| 209 |
+
train_cfg = dict(
|
| 210 |
+
max_iters=120000, type='IterBasedTrainLoop', val_interval=12000)
|
| 211 |
+
train_dataloader = dict(
|
| 212 |
+
batch_size=1,
|
| 213 |
+
dataset=dict(
|
| 214 |
+
data_prefix=dict(
|
| 215 |
+
img_path_from='train/Image1',
|
| 216 |
+
img_path_to='train/Image2',
|
| 217 |
+
seg_map_path='train/label'),
|
| 218 |
+
data_root='/defaultShare/pubdata/remote_sensing/S2Looking',
|
| 219 |
+
pipeline=[
|
| 220 |
+
dict(type='MultiImgLoadImageFromFile'),
|
| 221 |
+
dict(type='MultiImgLoadAnnotations'),
|
| 222 |
+
dict(
|
| 223 |
+
degree=(
|
| 224 |
+
-20,
|
| 225 |
+
20,
|
| 226 |
+
),
|
| 227 |
+
flip_prob=0.5,
|
| 228 |
+
rotate_prob=0.5,
|
| 229 |
+
type='MultiImgRandomRotFlip'),
|
| 230 |
+
dict(
|
| 231 |
+
cat_max_ratio=0.75,
|
| 232 |
+
crop_size=(
|
| 233 |
+
512,
|
| 234 |
+
512,
|
| 235 |
+
),
|
| 236 |
+
type='MultiImgRandomCrop'),
|
| 237 |
+
dict(prob=0.5, type='MultiImgExchangeTime'),
|
| 238 |
+
dict(
|
| 239 |
+
brightness_delta=10,
|
| 240 |
+
contrast_range=(
|
| 241 |
+
0.8,
|
| 242 |
+
1.2,
|
| 243 |
+
),
|
| 244 |
+
hue_delta=10,
|
| 245 |
+
saturation_range=(
|
| 246 |
+
0.8,
|
| 247 |
+
1.2,
|
| 248 |
+
),
|
| 249 |
+
type='MultiImgPhotoMetricDistortion'),
|
| 250 |
+
dict(type='MultiImgPackSegInputs'),
|
| 251 |
+
],
|
| 252 |
+
type='S2Looking_Dataset'),
|
| 253 |
+
num_workers=8,
|
| 254 |
+
persistent_workers=True,
|
| 255 |
+
sampler=dict(shuffle=True, type='DefaultSampler'))
|
| 256 |
+
train_pipeline = [
|
| 257 |
+
dict(type='MultiImgLoadImageFromFile'),
|
| 258 |
+
dict(type='MultiImgLoadAnnotations'),
|
| 259 |
+
dict(
|
| 260 |
+
degree=(
|
| 261 |
+
-20,
|
| 262 |
+
20,
|
| 263 |
+
),
|
| 264 |
+
flip_prob=0.5,
|
| 265 |
+
rotate_prob=0.5,
|
| 266 |
+
type='MultiImgRandomRotFlip'),
|
| 267 |
+
dict(
|
| 268 |
+
cat_max_ratio=0.75, crop_size=(
|
| 269 |
+
512,
|
| 270 |
+
512,
|
| 271 |
+
), type='MultiImgRandomCrop'),
|
| 272 |
+
dict(prob=0.5, type='MultiImgExchangeTime'),
|
| 273 |
+
dict(
|
| 274 |
+
brightness_delta=10,
|
| 275 |
+
contrast_range=(
|
| 276 |
+
0.8,
|
| 277 |
+
1.2,
|
| 278 |
+
),
|
| 279 |
+
hue_delta=10,
|
| 280 |
+
saturation_range=(
|
| 281 |
+
0.8,
|
| 282 |
+
1.2,
|
| 283 |
+
),
|
| 284 |
+
type='MultiImgPhotoMetricDistortion'),
|
| 285 |
+
dict(type='MultiImgPackSegInputs'),
|
| 286 |
+
]
|
| 287 |
+
tta_model = dict(type='mmseg.SegTTAModel')
|
| 288 |
+
tta_pipeline = [
|
| 289 |
+
dict(backend_args=None, type='MultiImgLoadImageFromFile'),
|
| 290 |
+
dict(
|
| 291 |
+
transforms=[
|
| 292 |
+
[
|
| 293 |
+
dict(
|
| 294 |
+
keep_ratio=True, scale_factor=0.75, type='MultiImgResize'),
|
| 295 |
+
dict(keep_ratio=True, scale_factor=1.0, type='MultiImgResize'),
|
| 296 |
+
dict(
|
| 297 |
+
keep_ratio=True, scale_factor=1.25, type='MultiImgResize'),
|
| 298 |
+
],
|
| 299 |
+
[
|
| 300 |
+
dict(
|
| 301 |
+
direction='horizontal',
|
| 302 |
+
prob=0.0,
|
| 303 |
+
type='MultiImgRandomFlip'),
|
| 304 |
+
dict(
|
| 305 |
+
direction='horizontal',
|
| 306 |
+
prob=1.0,
|
| 307 |
+
type='MultiImgRandomFlip'),
|
| 308 |
+
],
|
| 309 |
+
[
|
| 310 |
+
dict(type='MultiImgLoadAnnotations'),
|
| 311 |
+
],
|
| 312 |
+
[
|
| 313 |
+
dict(type='MultiImgPackSegInputs'),
|
| 314 |
+
],
|
| 315 |
+
],
|
| 316 |
+
type='TestTimeAug'),
|
| 317 |
+
]
|
| 318 |
+
val_cfg = dict(type='ValLoop')
|
| 319 |
+
val_dataloader = dict(
|
| 320 |
+
batch_size=1,
|
| 321 |
+
dataset=dict(
|
| 322 |
+
data_prefix=dict(
|
| 323 |
+
img_path_from='val/Image1',
|
| 324 |
+
img_path_to='val/Image2',
|
| 325 |
+
seg_map_path='val/label'),
|
| 326 |
+
data_root='/defaultShare/pubdata/remote_sensing/S2Looking',
|
| 327 |
+
pipeline=[
|
| 328 |
+
dict(type='MultiImgLoadImageFromFile'),
|
| 329 |
+
dict(type='MultiImgLoadAnnotations'),
|
| 330 |
+
dict(type='MultiImgPackSegInputs'),
|
| 331 |
+
],
|
| 332 |
+
type='S2Looking_Dataset'),
|
| 333 |
+
num_workers=8,
|
| 334 |
+
persistent_workers=True,
|
| 335 |
+
sampler=dict(shuffle=False, type='DefaultSampler'))
|
| 336 |
+
val_evaluator = dict(
|
| 337 |
+
iou_metrics=[
|
| 338 |
+
'mFscore',
|
| 339 |
+
'mIoU',
|
| 340 |
+
], type='mmseg.IoUMetric')
|
| 341 |
+
val_pipeline = [
|
| 342 |
+
dict(type='MultiImgLoadImageFromFile'),
|
| 343 |
+
dict(keep_ratio=True, scale=(
|
| 344 |
+
1024,
|
| 345 |
+
1024,
|
| 346 |
+
), type='MultiImgResize'),
|
| 347 |
+
dict(type='MultiImgLoadAnnotations'),
|
| 348 |
+
dict(type='MultiImgPackSegInputs'),
|
| 349 |
+
]
|
| 350 |
+
vis_backends = [
|
| 351 |
+
dict(type='CDLocalVisBackend'),
|
| 352 |
+
]
|
| 353 |
+
visualizer = dict(
|
| 354 |
+
alpha=1.0,
|
| 355 |
+
name='visualizer',
|
| 356 |
+
type='CDLocalVisualizer',
|
| 357 |
+
vis_backends=[
|
| 358 |
+
dict(type='CDLocalVisBackend'),
|
| 359 |
+
])
|
| 360 |
+
work_dir = './work_dirs/vitp_s2looking_upernet'
|