PropVG / refcoco+ /test_log.txt

Upload 21 files

a482a69 verified 5 months ago

12.4 kB

	2025-07-07 11:09:02,802 - PropVG - INFO - dataset = 'RefCOCOPlusUNC'
	data_root = './data/seqtr_type/'
	img_norm_cfg = dict(
	mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375])
	train_pipeline = [
	dict(
	type='LoadImageAnnotationsFromFile_TO',
	max_token=20,
	with_mask=True,
	with_bbox=True,
	dataset='RefCOCOPlusUNC',
	use_token_type='beit3',
	refer_file='data/seqtr_type/annotations/mixed-seg/coco_all.json',
	object_area_filter=100,
	object_area_rate_filter=[0.05, 0.8]),
	dict(type='Resize', img_scale=(384, 384), keep_ratio=False),
	dict(
	type='Normalize',
	mean=[123.675, 116.28, 103.53],
	std=[58.395, 57.12, 57.375]),
	dict(type='DefaultFormatBundle'),
	dict(
	type='CollectData',
	keys=[
	'img', 'ref_expr_inds', 'text_attention_mask', 'gt_mask_rle',
	'gt_bbox'
	],
	meta_keys=[
	'filename', 'expression', 'ori_shape', 'img_shape', 'pad_shape',
	'scale_factor', 'gt_ori_mask', 'target', 'empty',
	'refer_target_index'
	])
	]
	val_pipeline = [
	dict(
	type='LoadImageAnnotationsFromFile_TO',
	max_token=20,
	with_mask=True,
	with_bbox=True,
	dataset='RefCOCOPlusUNC',
	use_token_type='beit3',
	refer_file='data/seqtr_type/annotations/mixed-seg/coco_all.json',
	object_area_filter=100,
	object_area_rate_filter=[0.05, 0.8]),
	dict(type='Resize', img_scale=(384, 384), keep_ratio=False),
	dict(
	type='Normalize',
	mean=[123.675, 116.28, 103.53],
	std=[58.395, 57.12, 57.375]),
	dict(type='DefaultFormatBundle'),
	dict(
	type='CollectData',
	keys=[
	'img', 'ref_expr_inds', 'text_attention_mask', 'gt_mask_rle',
	'gt_bbox'
	],
	meta_keys=[
	'filename', 'expression', 'ori_shape', 'img_shape', 'pad_shape',
	'scale_factor', 'gt_ori_mask', 'target', 'empty',
	'refer_target_index'
	])
	]
	test_pipeline = [
	dict(
	type='LoadImageAnnotationsFromFile_TO',
	max_token=20,
	with_mask=True,
	with_bbox=True,
	dataset='RefCOCOPlusUNC',
	use_token_type='beit3',
	refer_file='data/seqtr_type/annotations/mixed-seg/coco_all.json',
	object_area_filter=100,
	object_area_rate_filter=[0.05, 0.8]),
	dict(type='Resize', img_scale=(384, 384), keep_ratio=False),
	dict(
	type='Normalize',
	mean=[123.675, 116.28, 103.53],
	std=[58.395, 57.12, 57.375]),
	dict(type='DefaultFormatBundle'),
	dict(
	type='CollectData',
	keys=[
	'img', 'ref_expr_inds', 'text_attention_mask', 'gt_mask_rle',
	'gt_bbox'
	],
	meta_keys=[
	'filename', 'expression', 'ori_shape', 'img_shape', 'pad_shape',
	'scale_factor', 'gt_ori_mask', 'target', 'empty',
	'refer_target_index'
	])
	]
	word_emb_cfg = dict(type='GloVe')
	data = dict(
	samples_per_gpu=8,
	workers_per_gpu=4,
	train=dict(
	type='RefCOCOPlusUNC',
	which_set='train',
	img_source=['coco'],
	annsfile=
	'./data/seqtr_type/annotations/refcocoplus-unc/instances_withid.json',
	imgsfile='./data/seqtr_type/images/mscoco/train2014',
	pipeline=[
	dict(
	type='LoadImageAnnotationsFromFile_TO',
	max_token=20,
	with_mask=True,
	with_bbox=True,
	dataset='RefCOCOPlusUNC',
	use_token_type='beit3',
	refer_file=
	'data/seqtr_type/annotations/mixed-seg/coco_all.json',
	object_area_filter=100,
	object_area_rate_filter=[0.05, 0.8]),
	dict(type='Resize', img_scale=(384, 384), keep_ratio=False),
	dict(
	type='Normalize',
	mean=[123.675, 116.28, 103.53],
	std=[58.395, 57.12, 57.375]),
	dict(type='DefaultFormatBundle'),
	dict(
	type='CollectData',
	keys=[
	'img', 'ref_expr_inds', 'text_attention_mask',
	'gt_mask_rle', 'gt_bbox'
	],
	meta_keys=[
	'filename', 'expression', 'ori_shape', 'img_shape',
	'pad_shape', 'scale_factor', 'gt_ori_mask', 'target',
	'empty', 'refer_target_index'
	])
	],
	word_emb_cfg=dict(type='GloVe')),
	val=dict(
	type='RefCOCOPlusUNC',
	which_set='val',
	img_source=['coco'],
	annsfile=
	'./data/seqtr_type/annotations/refcocoplus-unc/instances_withid.json',
	imgsfile='./data/seqtr_type/images/mscoco/train2014',
	pipeline=[
	dict(
	type='LoadImageAnnotationsFromFile_TO',
	max_token=20,
	with_mask=True,
	with_bbox=True,
	dataset='RefCOCOPlusUNC',
	use_token_type='beit3',
	refer_file=
	'data/seqtr_type/annotations/mixed-seg/coco_all.json',
	object_area_filter=100,
	object_area_rate_filter=[0.05, 0.8]),
	dict(type='Resize', img_scale=(384, 384), keep_ratio=False),
	dict(
	type='Normalize',
	mean=[123.675, 116.28, 103.53],
	std=[58.395, 57.12, 57.375]),
	dict(type='DefaultFormatBundle'),
	dict(
	type='CollectData',
	keys=[
	'img', 'ref_expr_inds', 'text_attention_mask',
	'gt_mask_rle', 'gt_bbox'
	],
	meta_keys=[
	'filename', 'expression', 'ori_shape', 'img_shape',
	'pad_shape', 'scale_factor', 'gt_ori_mask', 'target',
	'empty', 'refer_target_index'
	])
	],
	word_emb_cfg=dict(type='GloVe')),
	testA=dict(
	type='RefCOCOPlusUNC',
	which_set='testA',
	img_source=['coco'],
	annsfile=
	'./data/seqtr_type/annotations/refcocoplus-unc/instances_withid.json',
	imgsfile='./data/seqtr_type/images/mscoco/train2014',
	pipeline=[
	dict(
	type='LoadImageAnnotationsFromFile_TO',
	max_token=20,
	with_mask=True,
	with_bbox=True,
	dataset='RefCOCOPlusUNC',
	use_token_type='beit3',
	refer_file=
	'data/seqtr_type/annotations/mixed-seg/coco_all.json',
	object_area_filter=100,
	object_area_rate_filter=[0.05, 0.8]),
	dict(type='Resize', img_scale=(384, 384), keep_ratio=False),
	dict(
	type='Normalize',
	mean=[123.675, 116.28, 103.53],
	std=[58.395, 57.12, 57.375]),
	dict(type='DefaultFormatBundle'),
	dict(
	type='CollectData',
	keys=[
	'img', 'ref_expr_inds', 'text_attention_mask',
	'gt_mask_rle', 'gt_bbox'
	],
	meta_keys=[
	'filename', 'expression', 'ori_shape', 'img_shape',
	'pad_shape', 'scale_factor', 'gt_ori_mask', 'target',
	'empty', 'refer_target_index'
	])
	],
	word_emb_cfg=dict(type='GloVe')),
	testB=dict(
	type='RefCOCOPlusUNC',
	which_set='testB',
	img_source=['coco'],
	annsfile=
	'./data/seqtr_type/annotations/refcocoplus-unc/instances_withid.json',
	imgsfile='./data/seqtr_type/images/mscoco/train2014',
	pipeline=[
	dict(
	type='LoadImageAnnotationsFromFile_TO',
	max_token=20,
	with_mask=True,
	with_bbox=True,
	dataset='RefCOCOPlusUNC',
	use_token_type='beit3',
	refer_file=
	'data/seqtr_type/annotations/mixed-seg/coco_all.json',
	object_area_filter=100,
	object_area_rate_filter=[0.05, 0.8]),
	dict(type='Resize', img_scale=(384, 384), keep_ratio=False),
	dict(
	type='Normalize',
	mean=[123.675, 116.28, 103.53],
	std=[58.395, 57.12, 57.375]),
	dict(type='DefaultFormatBundle'),
	dict(
	type='CollectData',
	keys=[
	'img', 'ref_expr_inds', 'text_attention_mask',
	'gt_mask_rle', 'gt_bbox'
	],
	meta_keys=[
	'filename', 'expression', 'ori_shape', 'img_shape',
	'pad_shape', 'scale_factor', 'gt_ori_mask', 'target',
	'empty', 'refer_target_index'
	])
	],
	word_emb_cfg=dict(type='GloVe')))
	ema = False
	ema_factor = 0.999
	use_fp16 = False
	seed = 6666
	deterministic = True
	log_level = 'INFO'
	log_interval = 50
	save_interval = -1
	resume_from = None
	load_from = 'work_dir/refcoco+/PropVG-refcoco+.pth'
	finetune_from = None
	evaluate_interval = 1
	start_evaluate_epoch = 0
	start_save_checkpoint = 20
	max_token = 20
	img_size = 384
	patch_size = 16
	model = dict(
	type='MIXRefUniModel_OMG',
	vis_enc=dict(
	type='BEIT3',
	img_size=384,
	patch_size=16,
	vit_type='base',
	drop_path_rate=0.1,
	vocab_size=64010,
	freeze_layer=-1,
	vision_embed_proj_interpolate=False,
	pretrain='pretrain_weights/beit3_base_patch16_224.zip'),
	lan_enc=None,
	fusion=None,
	head=dict(
	type='REFHead',
	input_channels=768,
	hidden_channels=256,
	num_queries=20,
	detr_loss=dict(
	criterion=dict(loss_class=1.0, loss_bbox=5.0, loss_giou=2.0),
	matcher=dict(cost_class=1.0, cost_bbox=5.0, cost_giou=2.0)),
	loss_weight=dict(
	mask=dict(dice=1.0, bce=1.0, nt=0.2, neg=0),
	bbox=0.1,
	allbbox=0.1,
	refer=1.0),
	MTD=dict(K=100)),
	post_params=dict(
	score_weighted=False,
	mask_threshold=0.5,
	score_threshold=0.7,
	with_nms=False,
	with_mask=True),
	process_visual=True,
	visualize_params=dict(row_columns=(4, 5)),
	visual_mode='test')
	grad_norm_clip = 0.15
	lr = 0.0005
	optimizer_config = dict(
	type='Adam',
	lr=0.0005,
	lr_vis_enc=5e-05,
	lr_lan_enc=0.0005,
	betas=(0.9, 0.98),
	eps=1e-09,
	weight_decay=0,
	amsgrad=True)
	scheduler_config = dict(
	type='MultiStepLRWarmUp',
	warmup_epochs=1,
	decay_steps=[21, 27],
	decay_ratio=0.1,
	max_epoch=30)
	launcher = 'pytorch'
	distributed = True
	rank = 0
	world_size = 4

	2025-07-07 11:09:07,978 - PropVG - INFO - RefCOCOPlusUNC-val size: 10758
	2025-07-07 11:09:13,867 - PropVG - INFO - RefCOCOPlusUNC-testA size: 5726
	2025-07-07 11:09:19,990 - PropVG - INFO - RefCOCOPlusUNC-testB size: 4889
	2025-07-07 11:09:24,879 - PropVG - INFO - loaded checkpoint from work_dir/refcoco+/PropVG-refcoco+.pth

	2025-07-07 11:09:24,886 - PropVG - INFO - PropVG - evaluating set val
	2025-07-07 11:11:17,140 - PropVG - INFO - ------------ validate ------------ time: 112.25, DetACC: 83.73, mIoU: 72.94, oIoU: 70.24, MaskACC@0.5-0.9: [83.12, 80.60, 76.04, 65.37, 33.26]DetACC@0.5-0.9: [83.73, 81.30, 77.10, 68.58, 42.65]
	2025-07-07 11:11:18,910 - PropVG - INFO - PropVG - evaluating set testA
	2025-07-07 11:12:32,835 - PropVG - INFO - ------------ validate ------------ time: 73.92, DetACC: 88.01, mIoU: 76.49, oIoU: 74.32, MaskACC@0.5-0.9: [88.04, 86.00, 81.37, 70.53, 33.52]DetACC@0.5-0.9: [88.01, 85.91, 82.12, 73.80, 47.14]
	2025-07-07 11:12:34,541 - PropVG - INFO - PropVG - evaluating set testB
	2025-07-07 11:13:39,576 - PropVG - INFO - ------------ validate ------------ time: 65.03, DetACC: 76.59, mIoU: 67.21, oIoU: 63.41, MaskACC@0.5-0.9: [75.57, 71.83, 66.95, 57.38, 33.87]DetACC@0.5-0.9: [76.59, 73.26, 68.11, 59.24, 36.12]
	2025-07-07 11:13:41,507 - PropVG - INFO - sucessfully save the results to work_dir/refcoco+/refer_output_thr0.7_no-nms_no-sw_0.5_100.xlsx !!!