PropVG / grefcoco /test_log.txt
Dmmm997's picture
Upload 21 files
a482a69 verified
2025-07-07 10:57:14,028 - PropVG - INFO - dataset = 'GRefCOCO'
data_root = './data/seqtr_type/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375])
train_pipeline = [
dict(
type='LoadImageAnnotationsFromFileGRES_TO',
max_token=50,
with_mask=True,
with_bbox=True,
dataset='GRefCOCO',
use_token_type='beit3',
refer_file='data/seqtr_type/annotations/grefs/coco_annotations.json',
object_area_filter=100,
object_area_rate_filter=[0.05, 0.8]),
dict(type='Resize', img_scale=(320, 320), keep_ratio=False),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375]),
dict(type='DefaultFormatBundle'),
dict(
type='CollectData',
keys=[
'img', 'ref_expr_inds', 'text_attention_mask', 'gt_mask_rle',
'gt_bbox', 'gt_mask_parts_rle'
],
meta_keys=[
'filename', 'expression', 'ori_shape', 'img_shape', 'pad_shape',
'scale_factor', 'gt_ori_mask', 'target', 'empty',
'refer_target_index', 'tokenized_words'
])
]
val_pipeline = [
dict(
type='LoadImageAnnotationsFromFileGRES_TO',
max_token=50,
with_mask=True,
with_bbox=True,
dataset='GRefCOCO',
use_token_type='beit3',
refer_file='data/seqtr_type/annotations/grefs/coco_annotations.json',
object_area_filter=100,
object_area_rate_filter=[0.05, 0.8]),
dict(type='Resize', img_scale=(320, 320), keep_ratio=False),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375]),
dict(type='DefaultFormatBundle'),
dict(
type='CollectData',
keys=[
'img', 'ref_expr_inds', 'text_attention_mask', 'gt_mask_rle',
'gt_bbox', 'gt_mask_parts_rle'
],
meta_keys=[
'filename', 'expression', 'ori_shape', 'img_shape', 'pad_shape',
'scale_factor', 'gt_ori_mask', 'target', 'empty',
'refer_target_index', 'tokenized_words'
])
]
test_pipeline = [
dict(
type='LoadImageAnnotationsFromFileGRES_TO',
max_token=50,
with_mask=True,
with_bbox=True,
dataset='GRefCOCO',
use_token_type='beit3',
refer_file='data/seqtr_type/annotations/grefs/coco_annotations.json',
object_area_filter=100,
object_area_rate_filter=[0.05, 0.8]),
dict(type='Resize', img_scale=(320, 320), keep_ratio=False),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375]),
dict(type='DefaultFormatBundle'),
dict(
type='CollectData',
keys=[
'img', 'ref_expr_inds', 'text_attention_mask', 'gt_mask_rle',
'gt_bbox', 'gt_mask_parts_rle'
],
meta_keys=[
'filename', 'expression', 'ori_shape', 'img_shape', 'pad_shape',
'scale_factor', 'gt_ori_mask', 'target', 'empty',
'refer_target_index', 'tokenized_words'
])
]
word_emb_cfg = dict(type='GloVe')
data = dict(
samples_per_gpu=16,
workers_per_gpu=4,
train=dict(
type='GRefCOCO',
which_set='train',
img_source=['coco'],
annsfile='./data/seqtr_type/annotations/grefs/instances.json',
imgsfile='./data/seqtr_type/images/mscoco/train2014',
pipeline=[
dict(
type='LoadImageAnnotationsFromFileGRES_TO',
max_token=50,
with_mask=True,
with_bbox=True,
dataset='GRefCOCO',
use_token_type='beit3',
refer_file=
'data/seqtr_type/annotations/grefs/coco_annotations.json',
object_area_filter=100,
object_area_rate_filter=[0.05, 0.8]),
dict(type='Resize', img_scale=(320, 320), keep_ratio=False),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375]),
dict(type='DefaultFormatBundle'),
dict(
type='CollectData',
keys=[
'img', 'ref_expr_inds', 'text_attention_mask',
'gt_mask_rle', 'gt_bbox', 'gt_mask_parts_rle'
],
meta_keys=[
'filename', 'expression', 'ori_shape', 'img_shape',
'pad_shape', 'scale_factor', 'gt_ori_mask', 'target',
'empty', 'refer_target_index', 'tokenized_words'
])
],
word_emb_cfg=dict(type='GloVe')),
val=dict(
type='GRefCOCO',
which_set='val',
img_source=['coco'],
annsfile='./data/seqtr_type/annotations/grefs/instances.json',
imgsfile='./data/seqtr_type/images/mscoco/train2014',
pipeline=[
dict(
type='LoadImageAnnotationsFromFileGRES_TO',
max_token=50,
with_mask=True,
with_bbox=True,
dataset='GRefCOCO',
use_token_type='beit3',
refer_file=
'data/seqtr_type/annotations/grefs/coco_annotations.json',
object_area_filter=100,
object_area_rate_filter=[0.05, 0.8]),
dict(type='Resize', img_scale=(320, 320), keep_ratio=False),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375]),
dict(type='DefaultFormatBundle'),
dict(
type='CollectData',
keys=[
'img', 'ref_expr_inds', 'text_attention_mask',
'gt_mask_rle', 'gt_bbox', 'gt_mask_parts_rle'
],
meta_keys=[
'filename', 'expression', 'ori_shape', 'img_shape',
'pad_shape', 'scale_factor', 'gt_ori_mask', 'target',
'empty', 'refer_target_index', 'tokenized_words'
])
],
word_emb_cfg=dict(type='GloVe')),
testA=dict(
type='GRefCOCO',
which_set='testA',
img_source=['coco'],
annsfile='./data/seqtr_type/annotations/grefs/instances.json',
imgsfile='./data/seqtr_type/images/mscoco/train2014',
pipeline=[
dict(
type='LoadImageAnnotationsFromFileGRES_TO',
max_token=50,
with_mask=True,
with_bbox=True,
dataset='GRefCOCO',
use_token_type='beit3',
refer_file=
'data/seqtr_type/annotations/grefs/coco_annotations.json',
object_area_filter=100,
object_area_rate_filter=[0.05, 0.8]),
dict(type='Resize', img_scale=(320, 320), keep_ratio=False),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375]),
dict(type='DefaultFormatBundle'),
dict(
type='CollectData',
keys=[
'img', 'ref_expr_inds', 'text_attention_mask',
'gt_mask_rle', 'gt_bbox', 'gt_mask_parts_rle'
],
meta_keys=[
'filename', 'expression', 'ori_shape', 'img_shape',
'pad_shape', 'scale_factor', 'gt_ori_mask', 'target',
'empty', 'refer_target_index', 'tokenized_words'
])
],
word_emb_cfg=dict(type='GloVe')),
testB=dict(
type='GRefCOCO',
which_set='testB',
img_source=['coco'],
annsfile='./data/seqtr_type/annotations/grefs/instances.json',
imgsfile='./data/seqtr_type/images/mscoco/train2014',
pipeline=[
dict(
type='LoadImageAnnotationsFromFileGRES_TO',
max_token=50,
with_mask=True,
with_bbox=True,
dataset='GRefCOCO',
use_token_type='beit3',
refer_file=
'data/seqtr_type/annotations/grefs/coco_annotations.json',
object_area_filter=100,
object_area_rate_filter=[0.05, 0.8]),
dict(type='Resize', img_scale=(320, 320), keep_ratio=False),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375]),
dict(type='DefaultFormatBundle'),
dict(
type='CollectData',
keys=[
'img', 'ref_expr_inds', 'text_attention_mask',
'gt_mask_rle', 'gt_bbox', 'gt_mask_parts_rle'
],
meta_keys=[
'filename', 'expression', 'ori_shape', 'img_shape',
'pad_shape', 'scale_factor', 'gt_ori_mask', 'target',
'empty', 'refer_target_index', 'tokenized_words'
])
],
word_emb_cfg=dict(type='GloVe')))
ema = False
ema_factor = 0.999
use_fp16 = False
seed = 6666
deterministic = True
log_level = 'INFO'
log_interval = 50
save_interval = -1
resume_from = None
load_from = 'work_dir/gres/PropVG-grefcoco.pth'
finetune_from = None
evaluate_interval = 1
start_evaluate_epoch = 0
start_save_checkpoint = 7
max_token = 50
img_size = 320
patch_size = 16
model = dict(
type='MIXGrefUniModel_OMG',
vis_enc=dict(
type='BEIT3',
img_size=320,
patch_size=16,
vit_type='base',
drop_path_rate=0.1,
vocab_size=64010,
freeze_layer=-1,
vision_embed_proj_interpolate=False,
pretrain='pretrain_weights/beit3_base_patch16_224.zip'),
lan_enc=None,
fusion=None,
head=dict(
type='GTMHead',
input_channels=768,
hidden_channels=256,
num_queries=20,
detr_loss=dict(
criterion=dict(loss_class=1.0, loss_bbox=5.0, loss_giou=2.0),
matcher=dict(cost_class=1.0, cost_bbox=5.0, cost_giou=2.0)),
loss_weight=dict(
mask=dict(dice=1.0, bce=1.0, nt=0.2, neg=0),
bbox=0.1,
allbbox=0.1,
refer=1.0),
MTD=dict(K=250)),
post_params=dict(
score_weighted=False,
mask_threshold=0.5,
score_threshold=0.7,
with_nms=False,
with_mask=True),
process_visual=False,
visualize_params=dict(row_columns=(4, 5)),
visual_mode='test')
grad_norm_clip = 0.15
lr = 0.0005
optimizer_config = dict(
type='Adam',
lr=0.0005,
lr_vis_enc=5e-05,
lr_lan_enc=0.0005,
betas=(0.9, 0.98),
eps=1e-09,
weight_decay=0,
amsgrad=True)
scheduler_config = dict(
type='MultiStepLRWarmUp',
warmup_epochs=1,
decay_steps=[7, 11],
decay_ratio=0.1,
max_epoch=12)
launcher = 'pytorch'
distributed = True
rank = 0
world_size = 4
2025-07-07 10:57:25,861 - PropVG - INFO - GRefCOCO-val size: 16870
2025-07-07 10:57:37,626 - PropVG - INFO - GRefCOCO-testA size: 18712
2025-07-07 10:57:49,703 - PropVG - INFO - GRefCOCO-testB size: 14933
2025-07-07 10:57:55,300 - PropVG - INFO - loaded checkpoint from work_dir/gres/PropVG-grefcoco.pth
2025-07-07 10:57:55,323 - PropVG - INFO - PropVG - evaluating set val
2025-07-07 10:59:51,470 - PropVG - INFO - ------------ validate ------------ time: 116.14, F1score: 72.16, Nacc: 72.83, Tacc: 96.93, gIoU: 73.29, cIoU: 69.23, MaskACC@0.7-0.9: [74.74, 60.99, 23.42
2025-07-07 10:59:52,918 - PropVG - INFO - PropVG - evaluating set testA
2025-07-07 11:01:57,887 - PropVG - INFO - ------------ validate ------------ time: 124.96, F1score: 68.77, Nacc: 69.87, Tacc: 96.56, gIoU: 74.43, cIoU: 74.20, MaskACC@0.7-0.9: [77.48, 65.93, 30.06
2025-07-07 11:01:59,563 - PropVG - INFO - PropVG - evaluating set testB
2025-07-07 11:03:41,160 - PropVG - INFO - ------------ validate ------------ time: 101.59, F1score: 59.02, Nacc: 64.97, Tacc: 91.68, gIoU: 65.87, cIoU: 64.76, MaskACC@0.7-0.9: [62.03, 51.61, 28.43
2025-07-07 11:03:42,844 - PropVG - INFO - sucessfully save the results to work_dir/gres/refer_output_thr0.7_no-nms_no-sw_0.5_250.xlsx !!!