backup evironment completed ! Loading pth from /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal ******************** All args: ************************************************* align_contiguous = False align_drop_z = 0 align_keep_percentile = 0.1 align_many_to_one = False align_one_to_many = False align_top_band_size = 0 att_hid_size = 512 aux_loss = True backbone = None base_cfg_path = cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml basic_ss_prob = 0 batch_size = 1 batch_size_for_eval = 1 bbox_loss_coef = 0 beta = 1 cap_dec_n_points = 4 cap_nheads = 1 cap_num_feature_levels = 4 cap_prob_clip = False caption_cost_type = loss caption_decoder_type = standard caption_loss_coef = 2 cfg_path = cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml cl_schedule_time = [0, 2] cl_schedule_val = [0, 0.1] clip_context_dim = 512 cls_loss_coef = 2 contrastive_hidden_size = 128 contrastive_loss_start_coef = 0.0 contrastive_loss_temperature = 0.1 cost_alpha = 0.25 cost_gamma = 2 count_loss_coef = 0.5 criteria_for_best_ckpt = overall current_lr = 5e-05 data_norm = 0 data_rescale = 1 debug = False dec_layers = 2 dec_n_points = 4 device = cuda dict_file = data/howto/vocabulary_howto_rate2_anet.json dict_file_val = data/howto/vocabulary_howto_rate2_anet.json dilation = False disable_contrastive_projection = 1 disable_cudnn = 0 disable_mid_caption_heads = False disable_rematch = False disable_tqdm = False drop_prob = 0.5 ec_alpha = 1.0 enable_bg_for_cl = True enable_contrastive = False enable_cross_video_cl = True enable_e2t_cl = True enc_layers = 2 enc_n_points = 4 eos_coef = 0.1 epoch = 20 eval_proposal_file = data/generated_proposals/dbg_trainval_top100.json event_context_dim = None feature_dim = 768 feature_sample_rate = 1 fix_xcw = 1 focal_alpha = 0.25 focal_gamma = 2.0 focal_mil = False frame_embedding_num = 100 ft_gt_percent = 1.0 giou_loss_coef = 4 gpu_id = [] grad_clip = 100.0 gt_file_for_auc = data/anet/captiondata/val_all.json gt_file_for_eval = ['data/anet/captiondata/val_1.json', 'data/anet/captiondata/val_2.json'] gt_file_for_para_eval = ['data/anet/captiondata/para/anet_entities_val_1_para.json', 'data/anet/captiondata/para/anet_entities_val_2_para.json'] gt_proposal_sample_num = 20 hidden_dim = 512 hidden_dropout_prob = 0.5 huggingface_cache_dir = .cache id = seq2-ft(mix)-gt_percent-1.0 id_ori = input_encoding_size = 512 invalid_video_json = [] iteration = 3 layer_norm_eps = 1e-12 learning_rate_decay_every = 3 learning_rate_decay_rate = 0.5 learning_rate_decay_start = 8 lloss_beta = 1 lloss_cross_entropy = 0 lloss_focal_loss = 0 lloss_gau_mask = 1 lr = 5e-05 lr_backbone = 2e-05 lr_backbone_names = ['None'] lr_linear_proj_mult = 0.1 lr_linear_proj_names = ['reference_points', 'sampling_offsets'] lr_proj = 0 map = True matcher_type = default max_caption_len = 50 max_eseq_length = 10 max_pos_num = 500 max_text_input_len = 32 merge_criterion = ins_cap_topk merge_k_boxes = 3 merge_mode = weighted_sum mil_loss_coef = 0 min_epoch_when_save = -1 nheads = 8 norm_ins_score = sigmoid nthreads = 4 num_classes = 1 num_feature_levels = 4 num_layers = 1 num_neg_box = 10 num_queries = 100 optimizer_type = adam position_embedding = sine position_embedding_scale = 6.283185307179586 pre_percent = 1.0 pretrain = None pretrain_path = pretrained_language_model = CLIP prior_anchor_duration_init = True prior_manner = all pseudo_box_aug = False pseudo_box_aug_mode = random_range pseudo_box_aug_num = 8 pseudo_box_aug_ratio = 0.02 pseudo_box_type = similarity_op_order_v2 random_anchor_init = True random_seed = False ref_rank_loss_coef = 0.0 refine_pseudo_box = False refine_pseudo_stage_num = 2 rnn_size = 512 sample_method = nearest save_all_checkpoint = 0 save_checkpoint_every = 1 save_dir = /mnt/data/pjlab-3090-sport/wuhao/logs/dibs scheduled_sampling_increase_every = 2 scheduled_sampling_increase_prob = 0.05 scheduled_sampling_max_prob = 0.25 scheduled_sampling_start = -1 seed = 777 self_iou_loss_coef = 0.0 set_cost_bbox = 0 set_cost_caption = 0 set_cost_cl = 0.0 set_cost_class = 2 set_cost_giou = 4 set_cost_sim = 1.0 share_caption_head = 1 soft_attention = 1 start_from = start_from_mode = last start_refine_epoch = -1 statistic_mode = mode text_encoder_learning_strategy = frozen text_feature_folder = ['/mnt/data/Gvlab/wuhao/features/howto100m/clip/text_proj', '/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/text/'] text_feature_folder_val = ['/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/text/'] text_hidden_dim = 768 top_frames = 30 train_caption_file = ['data/howto/captiondata/howto100m_train.json', 'data/anet/captiondata/train_modified.json'] train_proposal_file = data/generated_proposals/dbg_trainval_top100.json train_proposal_sample_num = 30 train_proposal_type = gt training_scheme = all transformer_dropout_prob = 0.1 transformer_ff_dim = 512 transformer_input_type = queries use_additional_cap_layer = False use_additional_score_layer = False use_anchor = 0 use_neg_pseudo_box = False use_pseudo_box = False use_query_box_for_refine = 0 val_caption_file = data/anet/captiondata/val_1.json visual_feature_folder = ['/mnt/data/Gvlab/wuhao/features/howto100m/clip/visual', '/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/visual/'] visual_feature_folder_val = ['/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/visual/'] visual_feature_type = ['CLIP'] vocab_size = 16221 vocab_size_val = 16221 weight_decay = 0.0001 weighted_mil_loss = False width_ratio = 1 width_th = 1 window_size = 2 with_box_refine = 1 wordRNN_input_feats_type = C ******************** Model structure: ****************************************** PDVC( (base_encoder): BaseEncoder( (pos_embed): PositionEmbeddingSine( (duration_embed_layer): Linear(in_features=256, out_features=256, bias=True) ) (input_proj): ModuleList( (0): Sequential( (0): Conv1d(768, 512, kernel_size=(1,), stride=(1,)) (1): GroupNorm(32, 512, eps=1e-05, affine=True) ) (1): Sequential( (0): Conv1d(768, 512, kernel_size=(3,), stride=(2,), padding=(1,)) (1): GroupNorm(32, 512, eps=1e-05, affine=True) ) (2): Sequential( (0): Conv1d(512, 512, kernel_size=(3,), stride=(2,), padding=(1,)) (1): GroupNorm(32, 512, eps=1e-05, affine=True) ) (3): Sequential( (0): Conv1d(512, 512, kernel_size=(3,), stride=(2,), padding=(1,)) (1): GroupNorm(32, 512, eps=1e-05, affine=True) ) ) ) (transformer): DeformableTransformer( (encoder): DeformableTransformerEncoder( (layers): ModuleList( (0): DeformableTransformerEncoderLayer( (self_attn): MSDeformAttn( (sampling_offsets): Linear(in_features=512, out_features=128, bias=True) (attention_weights): Linear(in_features=512, out_features=128, bias=True) (value_proj): Linear(in_features=512, out_features=512, bias=True) (output_proj): Linear(in_features=512, out_features=512, bias=True) ) (dropout1): Dropout(p=0.1, inplace=False) (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True) (linear1): Linear(in_features=512, out_features=512, bias=True) (dropout2): Dropout(p=0.1, inplace=False) (linear2): Linear(in_features=512, out_features=512, bias=True) (dropout3): Dropout(p=0.1, inplace=False) (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True) ) (1): DeformableTransformerEncoderLayer( (self_attn): MSDeformAttn( (sampling_offsets): Linear(in_features=512, out_features=128, bias=True) (attention_weights): Linear(in_features=512, out_features=128, bias=True) (value_proj): Linear(in_features=512, out_features=512, bias=True) (output_proj): Linear(in_features=512, out_features=512, bias=True) ) (dropout1): Dropout(p=0.1, inplace=False) (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True) (linear1): Linear(in_features=512, out_features=512, bias=True) (dropout2): Dropout(p=0.1, inplace=False) (linear2): Linear(in_features=512, out_features=512, bias=True) (dropout3): Dropout(p=0.1, inplace=False) (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True) ) ) ) (decoder): DeformableTransformerDecoder( (layers): ModuleList( (0): DeformableTransformerDecoderLayer( (cross_attn): MSDeformAttn( (sampling_offsets): Linear(in_features=512, out_features=128, bias=True) (attention_weights): Linear(in_features=512, out_features=128, bias=True) (value_proj): Linear(in_features=512, out_features=512, bias=True) (output_proj): Linear(in_features=512, out_features=512, bias=True) ) (dropout1): Dropout(p=0.1, inplace=False) (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True) (self_attn): MultiheadAttention( (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True) ) (dropout2): Dropout(p=0.1, inplace=False) (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True) (linear1): Linear(in_features=512, out_features=512, bias=True) (dropout3): Dropout(p=0.1, inplace=False) (linear2): Linear(in_features=512, out_features=512, bias=True) (dropout4): Dropout(p=0.1, inplace=False) (norm3): LayerNorm((512,), eps=1e-05, elementwise_affine=True) ) (1): DeformableTransformerDecoderLayer( (cross_attn): MSDeformAttn( (sampling_offsets): Linear(in_features=512, out_features=128, bias=True) (attention_weights): Linear(in_features=512, out_features=128, bias=True) (value_proj): Linear(in_features=512, out_features=512, bias=True) (output_proj): Linear(in_features=512, out_features=512, bias=True) ) (dropout1): Dropout(p=0.1, inplace=False) (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True) (self_attn): MultiheadAttention( (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True) ) (dropout2): Dropout(p=0.1, inplace=False) (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True) (linear1): Linear(in_features=512, out_features=512, bias=True) (dropout3): Dropout(p=0.1, inplace=False) (linear2): Linear(in_features=512, out_features=512, bias=True) (dropout4): Dropout(p=0.1, inplace=False) (norm3): LayerNorm((512,), eps=1e-05, elementwise_affine=True) ) ) (bbox_head): ModuleList( (0): MLP( (layers): ModuleList( (0): Linear(in_features=512, out_features=512, bias=True) (1): Linear(in_features=512, out_features=512, bias=True) (2): Linear(in_features=512, out_features=2, bias=True) ) ) (1): MLP( (layers): ModuleList( (0): Linear(in_features=512, out_features=512, bias=True) (1): Linear(in_features=512, out_features=512, bias=True) (2): Linear(in_features=512, out_features=2, bias=True) ) ) ) ) (pos_trans): Linear(in_features=512, out_features=1024, bias=True) (pos_trans_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (reference_points): Linear(in_features=512, out_features=1, bias=True) ) (caption_head): ModuleList( (0): LSTMDSACaptioner( (embed): Embedding(16222, 512) (logit): Linear(in_features=512, out_features=16222, bias=True) (dropout): Dropout(p=0.5, inplace=False) (core): ShowAttendTellCore( (rnn): LSTM(1536, 512, bias=False, dropout=0.5) (att_drop): Dropout(p=0.5, inplace=False) (deformable_att): MSDeformAttnCap( (sampling_offsets): Linear(in_features=1024, out_features=16, bias=True) (attention_weights): Linear(in_features=1024, out_features=16, bias=True) (value_proj): Linear(in_features=512, out_features=512, bias=True) (output_proj): Linear(in_features=512, out_features=512, bias=True) ) (ctx2att): Linear(in_features=512, out_features=512, bias=True) (h2att): Linear(in_features=512, out_features=512, bias=True) (alpha_net): Linear(in_features=512, out_features=1, bias=True) ) ) (1): LSTMDSACaptioner( (embed): Embedding(16222, 512) (logit): Linear(in_features=512, out_features=16222, bias=True) (dropout): Dropout(p=0.5, inplace=False) (core): ShowAttendTellCore( (rnn): LSTM(1536, 512, bias=False, dropout=0.5) (att_drop): Dropout(p=0.5, inplace=False) (deformable_att): MSDeformAttnCap( (sampling_offsets): Linear(in_features=1024, out_features=16, bias=True) (attention_weights): Linear(in_features=1024, out_features=16, bias=True) (value_proj): Linear(in_features=512, out_features=512, bias=True) (output_proj): Linear(in_features=512, out_features=512, bias=True) ) (ctx2att): Linear(in_features=512, out_features=512, bias=True) (h2att): Linear(in_features=512, out_features=512, bias=True) (alpha_net): Linear(in_features=512, out_features=1, bias=True) ) ) ) (query_embed): Embedding(100, 1024) (class_head): ModuleList( (0): Linear(in_features=512, out_features=1, bias=True) (1): Linear(in_features=512, out_features=1, bias=True) ) (class_refine_head): ModuleList( (0): Linear(in_features=512, out_features=1, bias=True) (1): Linear(in_features=512, out_features=1, bias=True) ) (count_head): ModuleList( (0): Linear(in_features=512, out_features=11, bias=True) (1): Linear(in_features=512, out_features=11, bias=True) ) (bbox_head): ModuleList( (0): MLP( (layers): ModuleList( (0): Linear(in_features=512, out_features=512, bias=True) (1): Linear(in_features=512, out_features=512, bias=True) (2): Linear(in_features=512, out_features=2, bias=True) ) ) (1): MLP( (layers): ModuleList( (0): Linear(in_features=512, out_features=512, bias=True) (1): Linear(in_features=512, out_features=512, bias=True) (2): Linear(in_features=512, out_features=2, bias=True) ) ) ) (contrastive_projection_event): ModuleList( (0): Identity() (1): Identity() ) (contrastive_projection_text): ModuleList( (0): Identity() (1): Identity() ) ) ******************** Strat training ! ****************************************** loss type: dict_keys(['loss_ce', 'loss_bbox', 'loss_giou', 'loss_counter', 'loss_caption', 'contrastive_loss', 'loss_ce_0', 'loss_bbox_0', 'loss_giou_0', 'loss_counter_0', 'loss_caption_0', 'contrastive_loss_0']) loss weights: dict_values([2, 0, 4, 0.5, 2, 0.0, 2, 0, 4, 0.5, 2, 0.0]) ID seq2-ft(mix)-gt_percent-1.0 iter 1000 (epoch 0), loss = OrderedDict([('loss_ce', 0.284), ('loss_counter', 0.126), ('loss_bbox', 0.117), ('loss_giou', 0.275), ('loss_self_iou', 0.126), ('cardinality_error', 3.775), ('loss_ce_0', 0.284), ('loss_counter_0', 0.126), ('loss_bbox_0', 0.118), ('loss_giou_0', 0.276), ('loss_self_iou_0', 0.126), ('cardinality_error_0', 3.775), ('loss_caption_0', 3.781), ('loss_caption', 3.778), ('total_loss', 18.585)]), time/iter = 0.182, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 2000 (epoch 0), loss = OrderedDict([('loss_ce', 0.287), ('loss_counter', 0.119), ('loss_bbox', 0.087), ('loss_giou', 0.239), ('loss_self_iou', 0.12), ('cardinality_error', 3.705), ('loss_ce_0', 0.289), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.087), ('loss_giou_0', 0.239), ('loss_self_iou_0', 0.121), ('cardinality_error_0', 3.705), ('loss_caption_0', 3.682), ('loss_caption', 3.675), ('total_loss', 17.896)]), time/iter = 0.180, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 3000 (epoch 0), loss = OrderedDict([('loss_ce', 0.291), ('loss_counter', 0.122), ('loss_bbox', 0.078), ('loss_giou', 0.227), ('loss_self_iou', 0.098), ('cardinality_error', 3.705), ('loss_ce_0', 0.292), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.078), ('loss_giou_0', 0.228), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.705), ('loss_caption_0', 3.668), ('loss_caption', 3.664), ('total_loss', 17.771)]), time/iter = 0.181, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 4000 (epoch 0), loss = OrderedDict([('loss_ce', 0.289), ('loss_counter', 0.126), ('loss_bbox', 0.078), ('loss_giou', 0.224), ('loss_self_iou', 0.1), ('cardinality_error', 3.784), ('loss_ce_0', 0.291), ('loss_counter_0', 0.127), ('loss_bbox_0', 0.078), ('loss_giou_0', 0.223), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.784), ('loss_caption_0', 3.624), ('loss_caption', 3.629), ('total_loss', 17.579)]), time/iter = 0.174, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 5000 (epoch 0), loss = OrderedDict([('loss_ce', 0.285), ('loss_counter', 0.121), ('loss_bbox', 0.08), ('loss_giou', 0.218), ('loss_self_iou', 0.114), ('cardinality_error', 3.674), ('loss_ce_0', 0.287), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.08), ('loss_giou_0', 0.218), ('loss_self_iou_0', 0.115), ('cardinality_error_0', 3.674), ('loss_caption_0', 3.629), ('loss_caption', 3.629), ('total_loss', 17.526)]), time/iter = 0.178, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 6000 (epoch 0), loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.13), ('loss_bbox', 0.076), ('loss_giou', 0.22), ('loss_self_iou', 0.098), ('cardinality_error', 3.786), ('loss_ce_0', 0.293), ('loss_counter_0', 0.129), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.22), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.786), ('loss_caption_0', 3.625), ('loss_caption', 3.622), ('total_loss', 17.555)]), time/iter = 0.182, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 7000 (epoch 0), loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.12), ('loss_bbox', 0.076), ('loss_giou', 0.215), ('loss_self_iou', 0.097), ('cardinality_error', 3.746), ('loss_ce_0', 0.293), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.215), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.746), ('loss_caption_0', 3.58), ('loss_caption', 3.576), ('total_loss', 17.319)]), time/iter = 0.179, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 8000 (epoch 0), loss = OrderedDict([('loss_ce', 0.288), ('loss_counter', 0.129), ('loss_bbox', 0.078), ('loss_giou', 0.218), ('loss_self_iou', 0.108), ('cardinality_error', 3.754), ('loss_ce_0', 0.288), ('loss_counter_0', 0.128), ('loss_bbox_0', 0.079), ('loss_giou_0', 0.218), ('loss_self_iou_0', 0.11), ('cardinality_error_0', 3.754), ('loss_caption_0', 3.546), ('loss_caption', 3.546), ('total_loss', 17.209)]), time/iter = 0.184, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 9000 (epoch 0), loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.12), ('loss_bbox', 0.078), ('loss_giou', 0.219), ('loss_self_iou', 0.1), ('cardinality_error', 3.685), ('loss_ce_0', 0.291), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.078), ('loss_giou_0', 0.219), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.685), ('loss_caption_0', 3.544), ('loss_caption', 3.54), ('total_loss', 17.2)]), time/iter = 0.180, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 10000 (epoch 0), loss = OrderedDict([('loss_ce', 0.293), ('loss_counter', 0.125), ('loss_bbox', 0.077), ('loss_giou', 0.22), ('loss_self_iou', 0.101), ('cardinality_error', 3.748), ('loss_ce_0', 0.293), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.078), ('loss_giou_0', 0.22), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.748), ('loss_caption_0', 3.582), ('loss_caption', 3.577), ('total_loss', 17.376)]), time/iter = 0.180, bad_vid = 0.000 Validation results of iter 10009: Bleu_1:0.15656016917085527 Bleu_2:0.08210369852679855 Bleu_3:0.042491746140277446 Bleu_4:0.021149866989626908 METEOR:0.08752782819459405 ROUGE_L:0.1577032846084498 CIDEr:0.2687260839927409 Recall:0.4986985069085389 Precision:0.548450952477792 soda_c:0.045070258467165024 para_Bleu_1:0.36987086578065714 para_Bleu_2:0.1987998709052068 para_Bleu_3:0.11671522868501899 para_Bleu_4:0.07164097958462183 para_METEOR:0.13901753612789455 para_ROUGE_L:0.2826680559963382 para_CIDEr:0.0956891322121665 overall score of iter 10009: 0.3063476479246829 Save model at iter 10009 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. Save Best-model at iter 10009 to checkpoint file. Save info to info.json ID seq2-ft(mix)-gt_percent-1.0 iter 11000 (epoch 1), loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.124), ('loss_bbox', 0.077), ('loss_giou', 0.217), ('loss_self_iou', 0.101), ('cardinality_error', 3.788), ('loss_ce_0', 0.292), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.217), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.788), ('loss_caption_0', 3.446), ('loss_caption', 3.443), ('total_loss', 16.802)]), time/iter = 0.707, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 12000 (epoch 1), loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.12), ('loss_bbox', 0.076), ('loss_giou', 0.214), ('loss_self_iou', 0.103), ('cardinality_error', 3.694), ('loss_ce_0', 0.291), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.075), ('loss_giou_0', 0.213), ('loss_self_iou_0', 0.103), ('cardinality_error_0', 3.694), ('loss_caption_0', 3.427), ('loss_caption', 3.428), ('total_loss', 16.701)]), time/iter = 0.191, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 13000 (epoch 1), loss = OrderedDict([('loss_ce', 0.291), ('loss_counter', 0.12), ('loss_bbox', 0.076), ('loss_giou', 0.217), ('loss_self_iou', 0.107), ('cardinality_error', 3.689), ('loss_ce_0', 0.291), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.217), ('loss_self_iou_0', 0.107), ('cardinality_error_0', 3.689), ('loss_caption_0', 3.464), ('loss_caption', 3.461), ('total_loss', 16.871)]), time/iter = 0.189, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 14000 (epoch 1), loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.118), ('loss_bbox', 0.073), ('loss_giou', 0.21), ('loss_self_iou', 0.1), ('cardinality_error', 3.663), ('loss_ce_0', 0.292), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.211), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.663), ('loss_caption_0', 3.414), ('loss_caption', 3.41), ('total_loss', 16.616)]), time/iter = 0.189, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 15000 (epoch 1), loss = OrderedDict([('loss_ce', 0.295), ('loss_counter', 0.127), ('loss_bbox', 0.076), ('loss_giou', 0.214), ('loss_self_iou', 0.103), ('cardinality_error', 3.828), ('loss_ce_0', 0.296), ('loss_counter_0', 0.127), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.215), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.828), ('loss_caption_0', 3.453), ('loss_caption', 3.453), ('total_loss', 16.836)]), time/iter = 0.191, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 16000 (epoch 1), loss = OrderedDict([('loss_ce', 0.296), ('loss_counter', 0.121), ('loss_bbox', 0.073), ('loss_giou', 0.206), ('loss_self_iou', 0.105), ('cardinality_error', 3.687), ('loss_ce_0', 0.297), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.072), ('loss_giou_0', 0.207), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.687), ('loss_caption_0', 3.461), ('loss_caption', 3.462), ('total_loss', 16.803)]), time/iter = 0.191, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 17000 (epoch 1), loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.127), ('loss_bbox', 0.073), ('loss_giou', 0.208), ('loss_self_iou', 0.102), ('cardinality_error', 3.791), ('loss_ce_0', 0.3), ('loss_counter_0', 0.127), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.209), ('loss_self_iou_0', 0.103), ('cardinality_error_0', 3.791), ('loss_caption_0', 3.469), ('loss_caption', 3.465), ('total_loss', 16.864)]), time/iter = 0.191, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 18000 (epoch 1), loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.119), ('loss_bbox', 0.074), ('loss_giou', 0.205), ('loss_self_iou', 0.107), ('cardinality_error', 3.68), ('loss_ce_0', 0.298), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.074), ('loss_giou_0', 0.206), ('loss_self_iou_0', 0.107), ('cardinality_error_0', 3.68), ('loss_caption_0', 3.478), ('loss_caption', 3.475), ('total_loss', 16.859)]), time/iter = 0.191, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 19000 (epoch 1), loss = OrderedDict([('loss_ce', 0.305), ('loss_counter', 0.126), ('loss_bbox', 0.073), ('loss_giou', 0.207), ('loss_self_iou', 0.099), ('cardinality_error', 3.752), ('loss_ce_0', 0.304), ('loss_counter_0', 0.126), ('loss_bbox_0', 0.072), ('loss_giou_0', 0.208), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.752), ('loss_caption_0', 3.396), ('loss_caption', 3.396), ('total_loss', 16.585)]), time/iter = 0.194, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 20000 (epoch 1), loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.128), ('loss_bbox', 0.071), ('loss_giou', 0.208), ('loss_self_iou', 0.101), ('cardinality_error', 3.804), ('loss_ce_0', 0.304), ('loss_counter_0', 0.128), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.208), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.804), ('loss_caption_0', 3.42), ('loss_caption', 3.419), ('total_loss', 16.684)]), time/iter = 0.189, bad_vid = 0.000 Validation results of iter 20018: Bleu_1:0.15965966113561106 Bleu_2:0.08785069799970043 Bleu_3:0.04739925348589703 Bleu_4:0.02377096308421814 METEOR:0.09062964515721111 ROUGE_L:0.1652647774491388 CIDEr:0.27366191469495676 Recall:0.45131293652113946 Precision:0.5379414954918249 soda_c:0.04303682007432423 para_Bleu_1:0.3640361416830845 para_Bleu_2:0.1986476696673755 para_Bleu_3:0.11814800235116821 para_Bleu_4:0.07336184523852665 para_METEOR:0.13911724177507803 para_ROUGE_L:0.28211794880017504 para_CIDEr:0.08634617454158834 overall score of iter 20018: 0.29882526155519307 Save model at iter 20018 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. Save info to info.json ID seq2-ft(mix)-gt_percent-1.0 iter 21000 (epoch 2), loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.122), ('loss_bbox', 0.071), ('loss_giou', 0.202), ('loss_self_iou', 0.101), ('cardinality_error', 3.666), ('loss_ce_0', 0.299), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.103), ('cardinality_error_0', 3.666), ('loss_caption_0', 3.344), ('loss_caption', 3.335), ('total_loss', 16.294)]), time/iter = 0.726, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 22000 (epoch 2), loss = OrderedDict([('loss_ce', 0.293), ('loss_counter', 0.119), ('loss_bbox', 0.073), ('loss_giou', 0.201), ('loss_self_iou', 0.109), ('cardinality_error', 3.752), ('loss_ce_0', 0.292), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.203), ('loss_self_iou_0', 0.11), ('cardinality_error_0', 3.752), ('loss_caption_0', 3.302), ('loss_caption', 3.304), ('total_loss', 16.116)]), time/iter = 0.188, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 23000 (epoch 2), loss = OrderedDict([('loss_ce', 0.299), ('loss_counter', 0.128), ('loss_bbox', 0.077), ('loss_giou', 0.208), ('loss_self_iou', 0.113), ('cardinality_error', 3.803), ('loss_ce_0', 0.299), ('loss_counter_0', 0.128), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.208), ('loss_self_iou_0', 0.112), ('cardinality_error_0', 3.803), ('loss_caption_0', 3.348), ('loss_caption', 3.34), ('total_loss', 16.363)]), time/iter = 0.190, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 24000 (epoch 2), loss = OrderedDict([('loss_ce', 0.293), ('loss_counter', 0.122), ('loss_bbox', 0.076), ('loss_giou', 0.207), ('loss_self_iou', 0.093), ('cardinality_error', 3.729), ('loss_ce_0', 0.294), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.207), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.729), ('loss_caption_0', 3.354), ('loss_caption', 3.351), ('total_loss', 16.364)]), time/iter = 0.191, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 25000 (epoch 2), loss = OrderedDict([('loss_ce', 0.294), ('loss_counter', 0.122), ('loss_bbox', 0.078), ('loss_giou', 0.213), ('loss_self_iou', 0.091), ('cardinality_error', 3.734), ('loss_ce_0', 0.295), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.077), ('loss_giou_0', 0.214), ('loss_self_iou_0', 0.09), ('cardinality_error_0', 3.734), ('loss_caption_0', 3.372), ('loss_caption', 3.372), ('total_loss', 16.494)]), time/iter = 0.192, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 26000 (epoch 2), loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.125), ('loss_bbox', 0.072), ('loss_giou', 0.203), ('loss_self_iou', 0.096), ('cardinality_error', 3.784), ('loss_ce_0', 0.299), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.204), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.784), ('loss_caption_0', 3.334), ('loss_caption', 3.333), ('total_loss', 16.279)]), time/iter = 0.191, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 27000 (epoch 2), loss = OrderedDict([('loss_ce', 0.289), ('loss_counter', 0.118), ('loss_bbox', 0.076), ('loss_giou', 0.203), ('loss_self_iou', 0.102), ('cardinality_error', 3.64), ('loss_ce_0', 0.291), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.203), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.64), ('loss_caption_0', 3.348), ('loss_caption', 3.345), ('total_loss', 16.287)]), time/iter = 0.191, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 28000 (epoch 2), loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.125), ('loss_bbox', 0.077), ('loss_giou', 0.201), ('loss_self_iou', 0.095), ('cardinality_error', 3.774), ('loss_ce_0', 0.293), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.774), ('loss_caption_0', 3.337), ('loss_caption', 3.333), ('total_loss', 16.249)]), time/iter = 0.194, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 29000 (epoch 2), loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.12), ('loss_bbox', 0.075), ('loss_giou', 0.204), ('loss_self_iou', 0.1), ('cardinality_error', 3.755), ('loss_ce_0', 0.299), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.074), ('loss_giou_0', 0.205), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.755), ('loss_caption_0', 3.315), ('loss_caption', 3.321), ('total_loss', 16.223)]), time/iter = 0.192, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 30000 (epoch 2), loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.119), ('loss_bbox', 0.071), ('loss_giou', 0.195), ('loss_self_iou', 0.103), ('cardinality_error', 3.72), ('loss_ce_0', 0.302), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.072), ('loss_giou_0', 0.196), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.72), ('loss_caption_0', 3.347), ('loss_caption', 3.349), ('total_loss', 16.283)]), time/iter = 0.195, bad_vid = 0.000 Validation results of iter 30027: Bleu_1:0.15440507165989542 Bleu_2:0.08178273697953425 Bleu_3:0.042600749568780155 Bleu_4:0.02119123483046711 METEOR:0.08563216148714695 ROUGE_L:0.156809182143994 CIDEr:0.25960752079137744 Recall:0.5075951227720545 Precision:0.571834112941489 soda_c:0.048597974030683 para_Bleu_1:0.3985431504573892 para_Bleu_2:0.22415947108296613 para_Bleu_3:0.1341003834690626 para_Bleu_4:0.08312155143550452 para_METEOR:0.1510085678983445 para_ROUGE_L:0.2957598062989384 para_CIDEr:0.12271570278513648 overall score of iter 30027: 0.3568458221189855 Save model at iter 30027 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. Save Best-model at iter 30027 to checkpoint file. Save info to info.json ID seq2-ft(mix)-gt_percent-1.0 iter 31000 (epoch 3), loss = OrderedDict([('loss_ce', 0.296), ('loss_counter', 0.123), ('loss_bbox', 0.073), ('loss_giou', 0.202), ('loss_self_iou', 0.114), ('cardinality_error', 3.772), ('loss_ce_0', 0.296), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.074), ('loss_giou_0', 0.203), ('loss_self_iou_0', 0.115), ('cardinality_error_0', 3.772), ('loss_caption_0', 3.24), ('loss_caption', 3.242), ('total_loss', 15.889)]), time/iter = 0.725, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 32000 (epoch 3), loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.117), ('loss_bbox', 0.069), ('loss_giou', 0.193), ('loss_self_iou', 0.093), ('cardinality_error', 3.66), ('loss_ce_0', 0.3), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.195), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.66), ('loss_caption_0', 3.251), ('loss_caption', 3.248), ('total_loss', 15.869)]), time/iter = 0.192, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 33000 (epoch 3), loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.126), ('loss_bbox', 0.07), ('loss_giou', 0.197), ('loss_self_iou', 0.102), ('cardinality_error', 3.787), ('loss_ce_0', 0.301), ('loss_counter_0', 0.126), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.199), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.787), ('loss_caption_0', 3.223), ('loss_caption', 3.225), ('total_loss', 15.81)]), time/iter = 0.189, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 34000 (epoch 3), loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.121), ('loss_bbox', 0.076), ('loss_giou', 0.201), ('loss_self_iou', 0.107), ('cardinality_error', 3.719), ('loss_ce_0', 0.296), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.077), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.108), ('cardinality_error_0', 3.719), ('loss_caption_0', 3.21), ('loss_caption', 3.206), ('total_loss', 15.752)]), time/iter = 0.189, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 35000 (epoch 3), loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.122), ('loss_bbox', 0.074), ('loss_giou', 0.201), ('loss_self_iou', 0.1), ('cardinality_error', 3.761), ('loss_ce_0', 0.304), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.761), ('loss_caption_0', 3.261), ('loss_caption', 3.267), ('total_loss', 16.006)]), time/iter = 0.187, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 36000 (epoch 3), loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.12), ('loss_bbox', 0.074), ('loss_giou', 0.202), ('loss_self_iou', 0.096), ('cardinality_error', 3.731), ('loss_ce_0', 0.302), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.075), ('loss_giou_0', 0.203), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.731), ('loss_caption_0', 3.322), ('loss_caption', 3.322), ('total_loss', 16.237)]), time/iter = 0.192, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 37000 (epoch 3), loss = OrderedDict([('loss_ce', 0.306), ('loss_counter', 0.12), ('loss_bbox', 0.069), ('loss_giou', 0.193), ('loss_self_iou', 0.088), ('cardinality_error', 3.747), ('loss_ce_0', 0.306), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.195), ('loss_self_iou_0', 0.089), ('cardinality_error_0', 3.747), ('loss_caption_0', 3.276), ('loss_caption', 3.278), ('total_loss', 16.005)]), time/iter = 0.194, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 38000 (epoch 3), loss = OrderedDict([('loss_ce', 0.295), ('loss_counter', 0.122), ('loss_bbox', 0.073), ('loss_giou', 0.198), ('loss_self_iou', 0.096), ('cardinality_error', 3.747), ('loss_ce_0', 0.295), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.074), ('loss_giou_0', 0.199), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.747), ('loss_caption_0', 3.26), ('loss_caption', 3.267), ('total_loss', 15.944)]), time/iter = 0.192, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 39000 (epoch 3), loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.12), ('loss_bbox', 0.073), ('loss_giou', 0.194), ('loss_self_iou', 0.096), ('cardinality_error', 3.714), ('loss_ce_0', 0.3), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.074), ('loss_giou_0', 0.196), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.714), ('loss_caption_0', 3.29), ('loss_caption', 3.284), ('total_loss', 16.029)]), time/iter = 0.192, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 40000 (epoch 3), loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.124), ('loss_bbox', 0.068), ('loss_giou', 0.187), ('loss_self_iou', 0.098), ('cardinality_error', 3.742), ('loss_ce_0', 0.302), ('loss_counter_0', 0.124), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.189), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.742), ('loss_caption_0', 3.255), ('loss_caption', 3.258), ('total_loss', 15.861)]), time/iter = 0.191, bad_vid = 0.000 Validation results of iter 40036: Bleu_1:0.16003947012491918 Bleu_2:0.08640386650819816 Bleu_3:0.045769192920880976 Bleu_4:0.023139762266241797 METEOR:0.08893476927946467 ROUGE_L:0.16285119298911696 CIDEr:0.27850058398714506 Recall:0.4974410652224822 Precision:0.571762083926507 soda_c:0.04898353247531122 para_Bleu_1:0.4116267700746525 para_Bleu_2:0.23315066082372427 para_Bleu_3:0.139785630195007 para_Bleu_4:0.08689414164874545 para_METEOR:0.15321412716959742 para_ROUGE_L:0.2993749803089721 para_CIDEr:0.12755194391496638 overall score of iter 40036: 0.3676602127333093 Save model at iter 40036 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. Save Best-model at iter 40036 to checkpoint file. Save info to info.json ID seq2-ft(mix)-gt_percent-1.0 iter 41000 (epoch 4), loss = OrderedDict([('loss_ce', 0.304), ('loss_counter', 0.122), ('loss_bbox', 0.071), ('loss_giou', 0.196), ('loss_self_iou', 0.094), ('cardinality_error', 3.73), ('loss_ce_0', 0.303), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.197), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.73), ('loss_caption_0', 3.159), ('loss_caption', 3.162), ('total_loss', 15.549)]), time/iter = 0.733, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 42000 (epoch 4), loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.117), ('loss_bbox', 0.072), ('loss_giou', 0.188), ('loss_self_iou', 0.097), ('cardinality_error', 3.698), ('loss_ce_0', 0.298), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.189), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.698), ('loss_caption_0', 3.191), ('loss_caption', 3.187), ('total_loss', 15.571)]), time/iter = 0.194, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 43000 (epoch 4), loss = OrderedDict([('loss_ce', 0.306), ('loss_counter', 0.12), ('loss_bbox', 0.07), ('loss_giou', 0.198), ('loss_self_iou', 0.089), ('cardinality_error', 3.785), ('loss_ce_0', 0.306), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.2), ('loss_self_iou_0', 0.087), ('cardinality_error_0', 3.785), ('loss_caption_0', 3.247), ('loss_caption', 3.249), ('total_loss', 15.93)]), time/iter = 0.195, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 44000 (epoch 4), loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.12), ('loss_bbox', 0.072), ('loss_giou', 0.194), ('loss_self_iou', 0.104), ('cardinality_error', 3.727), ('loss_ce_0', 0.302), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.072), ('loss_giou_0', 0.195), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.727), ('loss_caption_0', 3.228), ('loss_caption', 3.227), ('total_loss', 15.794)]), time/iter = 0.192, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 45000 (epoch 4), loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.12), ('loss_bbox', 0.07), ('loss_giou', 0.194), ('loss_self_iou', 0.094), ('cardinality_error', 3.684), ('loss_ce_0', 0.304), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.196), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.684), ('loss_caption_0', 3.138), ('loss_caption', 3.143), ('total_loss', 15.458)]), time/iter = 0.190, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 46000 (epoch 4), loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.123), ('loss_bbox', 0.071), ('loss_giou', 0.194), ('loss_self_iou', 0.107), ('cardinality_error', 3.8), ('loss_ce_0', 0.301), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.196), ('loss_self_iou_0', 0.107), ('cardinality_error_0', 3.8), ('loss_caption_0', 3.198), ('loss_caption', 3.202), ('total_loss', 15.69)]), time/iter = 0.192, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 47000 (epoch 4), loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.124), ('loss_bbox', 0.071), ('loss_giou', 0.193), ('loss_self_iou', 0.1), ('cardinality_error', 3.724), ('loss_ce_0', 0.302), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.072), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.724), ('loss_caption_0', 3.166), ('loss_caption', 3.167), ('total_loss', 15.544)]), time/iter = 0.191, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 48000 (epoch 4), loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.126), ('loss_bbox', 0.074), ('loss_giou', 0.194), ('loss_self_iou', 0.1), ('cardinality_error', 3.779), ('loss_ce_0', 0.303), ('loss_counter_0', 0.126), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.195), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.779), ('loss_caption_0', 3.197), ('loss_caption', 3.204), ('total_loss', 15.693)]), time/iter = 0.194, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 49000 (epoch 4), loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.117), ('loss_bbox', 0.072), ('loss_giou', 0.186), ('loss_self_iou', 0.103), ('cardinality_error', 3.67), ('loss_ce_0', 0.299), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.189), ('loss_self_iou_0', 0.103), ('cardinality_error_0', 3.67), ('loss_caption_0', 3.197), ('loss_caption', 3.193), ('total_loss', 15.597)]), time/iter = 0.191, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 50000 (epoch 4), loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.122), ('loss_bbox', 0.071), ('loss_giou', 0.191), ('loss_self_iou', 0.1), ('cardinality_error', 3.769), ('loss_ce_0', 0.303), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.769), ('loss_caption_0', 3.195), ('loss_caption', 3.196), ('total_loss', 15.646)]), time/iter = 0.193, bad_vid = 0.000 Validation results of iter 50045: Bleu_1:0.1612752203314224 Bleu_2:0.08712092952271142 Bleu_3:0.04643407984417907 Bleu_4:0.024237450149938583 METEOR:0.0888552980469009 ROUGE_L:0.16165678007821221 CIDEr:0.28844655875134945 Recall:0.5079771255793173 Precision:0.5707494407158785 soda_c:0.05143467092505771 para_Bleu_1:0.425828341023263 para_Bleu_2:0.2431293051387748 para_Bleu_3:0.14662751878582 para_Bleu_4:0.09131956416083617 para_METEOR:0.15868276543147294 para_ROUGE_L:0.30762031965083425 para_CIDEr:0.1438790695271004 overall score of iter 50045: 0.39388139911940956 Save model at iter 50045 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. Save Best-model at iter 50045 to checkpoint file. Save info to info.json ID seq2-ft(mix)-gt_percent-1.0 iter 51000 (epoch 5), loss = OrderedDict([('loss_ce', 0.304), ('loss_counter', 0.119), ('loss_bbox', 0.072), ('loss_giou', 0.19), ('loss_self_iou', 0.1), ('cardinality_error', 3.708), ('loss_ce_0', 0.304), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.708), ('loss_caption_0', 3.123), ('loss_caption', 3.122), ('total_loss', 15.345)]), time/iter = 0.739, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 52000 (epoch 5), loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.122), ('loss_bbox', 0.07), ('loss_giou', 0.195), ('loss_self_iou', 0.091), ('cardinality_error', 3.787), ('loss_ce_0', 0.302), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.198), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.787), ('loss_caption_0', 3.08), ('loss_caption', 3.08), ('total_loss', 15.224)]), time/iter = 0.193, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 53000 (epoch 5), loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.12), ('loss_bbox', 0.07), ('loss_giou', 0.192), ('loss_self_iou', 0.101), ('cardinality_error', 3.688), ('loss_ce_0', 0.302), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.688), ('loss_caption_0', 3.121), ('loss_caption', 3.125), ('total_loss', 15.366)]), time/iter = 0.196, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 54000 (epoch 5), loss = OrderedDict([('loss_ce', 0.304), ('loss_counter', 0.12), ('loss_bbox', 0.069), ('loss_giou', 0.184), ('loss_self_iou', 0.096), ('cardinality_error', 3.66), ('loss_ce_0', 0.303), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.66), ('loss_caption_0', 3.151), ('loss_caption', 3.158), ('total_loss', 15.44)]), time/iter = 0.193, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 55000 (epoch 5), loss = OrderedDict([('loss_ce', 0.314), ('loss_counter', 0.123), ('loss_bbox', 0.069), ('loss_giou', 0.186), ('loss_self_iou', 0.102), ('cardinality_error', 3.759), ('loss_ce_0', 0.314), ('loss_counter_0', 0.124), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.188), ('loss_self_iou_0', 0.103), ('cardinality_error_0', 3.759), ('loss_caption_0', 3.137), ('loss_caption', 3.138), ('total_loss', 15.427)]), time/iter = 0.193, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 56000 (epoch 5), loss = OrderedDict([('loss_ce', 0.304), ('loss_counter', 0.12), ('loss_bbox', 0.069), ('loss_giou', 0.186), ('loss_self_iou', 0.102), ('cardinality_error', 3.7), ('loss_ce_0', 0.303), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.189), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.7), ('loss_caption_0', 3.128), ('loss_caption', 3.132), ('total_loss', 15.353)]), time/iter = 0.191, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 57000 (epoch 5), loss = OrderedDict([('loss_ce', 0.308), ('loss_counter', 0.125), ('loss_bbox', 0.069), ('loss_giou', 0.192), ('loss_self_iou', 0.094), ('cardinality_error', 3.833), ('loss_ce_0', 0.308), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.833), ('loss_caption_0', 3.157), ('loss_caption', 3.154), ('total_loss', 15.516)]), time/iter = 0.192, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 58000 (epoch 5), loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.116), ('loss_bbox', 0.072), ('loss_giou', 0.192), ('loss_self_iou', 0.099), ('cardinality_error', 3.724), ('loss_ce_0', 0.3), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.724), ('loss_caption_0', 3.092), ('loss_caption', 3.088), ('total_loss', 15.209)]), time/iter = 0.189, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 59000 (epoch 5), loss = OrderedDict([('loss_ce', 0.305), ('loss_counter', 0.126), ('loss_bbox', 0.07), ('loss_giou', 0.187), ('loss_self_iou', 0.092), ('cardinality_error', 3.806), ('loss_ce_0', 0.304), ('loss_counter_0', 0.126), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.806), ('loss_caption_0', 3.204), ('loss_caption', 3.204), ('total_loss', 15.668)]), time/iter = 0.191, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 60000 (epoch 5), loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.119), ('loss_bbox', 0.073), ('loss_giou', 0.197), ('loss_self_iou', 0.102), ('cardinality_error', 3.73), ('loss_ce_0', 0.298), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.074), ('loss_giou_0', 0.198), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.73), ('loss_caption_0', 3.185), ('loss_caption', 3.179), ('total_loss', 15.62)]), time/iter = 0.192, bad_vid = 0.000 Validation results of iter 60054: Bleu_1:0.16203040821313286 Bleu_2:0.087418866671477 Bleu_3:0.04641401855891123 Bleu_4:0.023872355329811287 METEOR:0.08736154709181514 ROUGE_L:0.16095171754962678 CIDEr:0.3019460931650574 Recall:0.5237442505746305 Precision:0.5691986983933232 soda_c:0.05366939846142926 para_Bleu_1:0.4285515683378188 para_Bleu_2:0.24896313523930838 para_Bleu_3:0.15083849533584295 para_Bleu_4:0.09425440122753082 para_METEOR:0.15418242275887206 para_ROUGE_L:0.3037081433191389 para_CIDEr:0.16822639157343386 overall score of iter 60054: 0.41666321555983676 Save model at iter 60054 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. Save Best-model at iter 60054 to checkpoint file. Save info to info.json ID seq2-ft(mix)-gt_percent-1.0 iter 61000 (epoch 6), loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.117), ('loss_bbox', 0.068), ('loss_giou', 0.183), ('loss_self_iou', 0.099), ('cardinality_error', 3.687), ('loss_ce_0', 0.303), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.185), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.687), ('loss_caption_0', 3.025), ('loss_caption', 3.031), ('total_loss', 14.914)]), time/iter = 0.715, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 62000 (epoch 6), loss = OrderedDict([('loss_ce', 0.305), ('loss_counter', 0.125), ('loss_bbox', 0.068), ('loss_giou', 0.192), ('loss_self_iou', 0.088), ('cardinality_error', 3.809), ('loss_ce_0', 0.304), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.089), ('cardinality_error_0', 3.809), ('loss_caption_0', 3.067), ('loss_caption', 3.064), ('total_loss', 15.147)]), time/iter = 0.192, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 63000 (epoch 6), loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.113), ('loss_bbox', 0.072), ('loss_giou', 0.189), ('loss_self_iou', 0.102), ('cardinality_error', 3.636), ('loss_ce_0', 0.301), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.193), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.636), ('loss_caption_0', 3.09), ('loss_caption', 3.083), ('total_loss', 15.188)]), time/iter = 0.192, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 64000 (epoch 6), loss = OrderedDict([('loss_ce', 0.308), ('loss_counter', 0.12), ('loss_bbox', 0.067), ('loss_giou', 0.185), ('loss_self_iou', 0.105), ('cardinality_error', 3.738), ('loss_ce_0', 0.309), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.186), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.738), ('loss_caption_0', 3.09), ('loss_caption', 3.088), ('total_loss', 15.193)]), time/iter = 0.194, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 65000 (epoch 6), loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.123), ('loss_bbox', 0.069), ('loss_giou', 0.191), ('loss_self_iou', 0.094), ('cardinality_error', 3.735), ('loss_ce_0', 0.304), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.191), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.735), ('loss_caption_0', 3.087), ('loss_caption', 3.083), ('total_loss', 15.203)]), time/iter = 0.190, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 66000 (epoch 6), loss = OrderedDict([('loss_ce', 0.307), ('loss_counter', 0.121), ('loss_bbox', 0.069), ('loss_giou', 0.188), ('loss_self_iou', 0.095), ('cardinality_error', 3.753), ('loss_ce_0', 0.307), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.753), ('loss_caption_0', 3.093), ('loss_caption', 3.093), ('total_loss', 15.235)]), time/iter = 0.192, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 67000 (epoch 6), loss = OrderedDict([('loss_ce', 0.299), ('loss_counter', 0.123), ('loss_bbox', 0.071), ('loss_giou', 0.189), ('loss_self_iou', 0.099), ('cardinality_error', 3.781), ('loss_ce_0', 0.299), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.072), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.781), ('loss_caption_0', 3.104), ('loss_caption', 3.095), ('total_loss', 15.24)]), time/iter = 0.194, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 68000 (epoch 6), loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.118), ('loss_bbox', 0.073), ('loss_giou', 0.186), ('loss_self_iou', 0.102), ('cardinality_error', 3.702), ('loss_ce_0', 0.3), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.702), ('loss_caption_0', 3.092), ('loss_caption', 3.087), ('total_loss', 15.171)]), time/iter = 0.193, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 69000 (epoch 6), loss = OrderedDict([('loss_ce', 0.304), ('loss_counter', 0.116), ('loss_bbox', 0.068), ('loss_giou', 0.184), ('loss_self_iou', 0.087), ('cardinality_error', 3.705), ('loss_ce_0', 0.303), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.088), ('cardinality_error_0', 3.705), ('loss_caption_0', 3.087), ('loss_caption', 3.084), ('total_loss', 15.154)]), time/iter = 0.192, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 70000 (epoch 6), loss = OrderedDict([('loss_ce', 0.308), ('loss_counter', 0.119), ('loss_bbox', 0.07), ('loss_giou', 0.188), ('loss_self_iou', 0.104), ('cardinality_error', 3.763), ('loss_ce_0', 0.309), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.763), ('loss_caption_0', 3.137), ('loss_caption', 3.142), ('total_loss', 15.421)]), time/iter = 0.201, bad_vid = 0.000 Validation results of iter 70063: Bleu_1:0.17095715677415013 Bleu_2:0.0951967897773989 Bleu_3:0.05145074727592996 Bleu_4:0.026686223548170303 METEOR:0.09033289555302068 ROUGE_L:0.16939818741017104 CIDEr:0.33299543538258497 Recall:0.5001550726802355 Precision:0.5629321740898863 soda_c:0.05378783144134501 para_Bleu_1:0.44719474980697405 para_Bleu_2:0.2615784516531111 para_Bleu_3:0.15956746990786394 para_Bleu_4:0.09983770060804388 para_METEOR:0.15549284849496958 para_ROUGE_L:0.30852597622578265 para_CIDEr:0.18758102150887232 overall score of iter 70063: 0.4429115706118858 Save model at iter 70063 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. Save Best-model at iter 70063 to checkpoint file. Save info to info.json ID seq2-ft(mix)-gt_percent-1.0 iter 71000 (epoch 7), loss = OrderedDict([('loss_ce', 0.304), ('loss_counter', 0.115), ('loss_bbox', 0.067), ('loss_giou', 0.187), ('loss_self_iou', 0.091), ('cardinality_error', 3.724), ('loss_ce_0', 0.304), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.189), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.724), ('loss_caption_0', 2.994), ('loss_caption', 2.994), ('total_loss', 14.812)]), time/iter = 0.691, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 72000 (epoch 7), loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.118), ('loss_bbox', 0.07), ('loss_giou', 0.187), ('loss_self_iou', 0.099), ('cardinality_error', 3.665), ('loss_ce_0', 0.296), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.072), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.665), ('loss_caption_0', 2.995), ('loss_caption', 3.0), ('total_loss', 14.803)]), time/iter = 0.193, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 73000 (epoch 7), loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.122), ('loss_bbox', 0.067), ('loss_giou', 0.183), ('loss_self_iou', 0.099), ('cardinality_error', 3.762), ('loss_ce_0', 0.302), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.184), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.762), ('loss_caption_0', 3.03), ('loss_caption', 3.034), ('total_loss', 14.924)]), time/iter = 0.189, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 74000 (epoch 7), loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.12), ('loss_bbox', 0.067), ('loss_giou', 0.181), ('loss_self_iou', 0.093), ('cardinality_error', 3.722), ('loss_ce_0', 0.304), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.183), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.722), ('loss_caption_0', 3.061), ('loss_caption', 3.062), ('total_loss', 15.037)]), time/iter = 0.192, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 75000 (epoch 7), loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.124), ('loss_bbox', 0.069), ('loss_giou', 0.188), ('loss_self_iou', 0.097), ('cardinality_error', 3.835), ('loss_ce_0', 0.302), ('loss_counter_0', 0.124), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.835), ('loss_caption_0', 3.102), ('loss_caption', 3.108), ('total_loss', 15.261)]), time/iter = 0.195, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 76000 (epoch 7), loss = OrderedDict([('loss_ce', 0.304), ('loss_counter', 0.118), ('loss_bbox', 0.069), ('loss_giou', 0.19), ('loss_self_iou', 0.096), ('cardinality_error', 3.787), ('loss_ce_0', 0.305), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.787), ('loss_caption_0', 3.055), ('loss_caption', 3.056), ('total_loss', 15.081)]), time/iter = 0.192, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 77000 (epoch 7), loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.122), ('loss_bbox', 0.07), ('loss_giou', 0.191), ('loss_self_iou', 0.101), ('cardinality_error', 3.753), ('loss_ce_0', 0.3), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.753), ('loss_caption_0', 3.064), ('loss_caption', 3.063), ('total_loss', 15.105)]), time/iter = 0.191, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 78000 (epoch 7), loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.118), ('loss_bbox', 0.069), ('loss_giou', 0.192), ('loss_self_iou', 0.094), ('cardinality_error', 3.812), ('loss_ce_0', 0.302), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.812), ('loss_caption_0', 3.075), ('loss_caption', 3.081), ('total_loss', 15.186)]), time/iter = 0.192, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 79000 (epoch 7), loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.119), ('loss_bbox', 0.068), ('loss_giou', 0.184), ('loss_self_iou', 0.099), ('cardinality_error', 3.712), ('loss_ce_0', 0.304), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.712), ('loss_caption_0', 3.004), ('loss_caption', 3.004), ('total_loss', 14.833)]), time/iter = 0.189, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 80000 (epoch 7), loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.117), ('loss_bbox', 0.068), ('loss_giou', 0.184), ('loss_self_iou', 0.099), ('cardinality_error', 3.639), ('loss_ce_0', 0.298), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.185), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.639), ('loss_caption_0', 3.011), ('loss_caption', 3.021), ('total_loss', 14.846)]), time/iter = 0.189, bad_vid = 0.000 Validation results of iter 80072: Bleu_1:0.16525493799366836 Bleu_2:0.09017429361474327 Bleu_3:0.04843073565357156 Bleu_4:0.025752141227780294 METEOR:0.09042668571725655 ROUGE_L:0.1657835735936403 CIDEr:0.30766696683798356 Recall:0.5070758476264831 Precision:0.5698723815334497 soda_c:0.05193286444599829 para_Bleu_1:0.4299765573510605 para_Bleu_2:0.24998607326423264 para_Bleu_3:0.15168978606887273 para_Bleu_4:0.09540463753102806 para_METEOR:0.15913054274631774 para_ROUGE_L:0.30821511076520103 para_CIDEr:0.14655297481419807 overall score of iter 80072: 0.4010881550915439 Save model at iter 80072 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. Save info to info.json ID seq2-ft(mix)-gt_percent-1.0 iter 81000 (epoch 8), loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.116), ('loss_bbox', 0.064), ('loss_giou', 0.177), ('loss_self_iou', 0.098), ('cardinality_error', 3.664), ('loss_ce_0', 0.3), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.664), ('loss_caption_0', 2.972), ('loss_caption', 2.974), ('total_loss', 14.63)]), time/iter = 0.723, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 82000 (epoch 8), loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.113), ('loss_bbox', 0.067), ('loss_giou', 0.179), ('loss_self_iou', 0.098), ('cardinality_error', 3.692), ('loss_ce_0', 0.301), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.692), ('loss_caption_0', 2.914), ('loss_caption', 2.912), ('total_loss', 14.413)]), time/iter = 0.190, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 83000 (epoch 8), loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.117), ('loss_bbox', 0.067), ('loss_giou', 0.188), ('loss_self_iou', 0.097), ('cardinality_error', 3.764), ('loss_ce_0', 0.298), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.764), ('loss_caption_0', 2.939), ('loss_caption', 2.933), ('total_loss', 14.562)]), time/iter = 0.188, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 84000 (epoch 8), loss = OrderedDict([('loss_ce', 0.299), ('loss_counter', 0.119), ('loss_bbox', 0.066), ('loss_giou', 0.18), ('loss_self_iou', 0.086), ('cardinality_error', 3.724), ('loss_ce_0', 0.3), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.086), ('cardinality_error_0', 3.724), ('loss_caption_0', 2.964), ('loss_caption', 2.963), ('total_loss', 14.614)]), time/iter = 0.192, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 85000 (epoch 8), loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.114), ('loss_bbox', 0.066), ('loss_giou', 0.187), ('loss_self_iou', 0.094), ('cardinality_error', 3.73), ('loss_ce_0', 0.301), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.189), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.73), ('loss_caption_0', 2.942), ('loss_caption', 2.945), ('total_loss', 14.596)]), time/iter = 0.190, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 86000 (epoch 8), loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.118), ('loss_bbox', 0.067), ('loss_giou', 0.184), ('loss_self_iou', 0.096), ('cardinality_error', 3.764), ('loss_ce_0', 0.298), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.764), ('loss_caption_0', 2.989), ('loss_caption', 2.988), ('total_loss', 14.745)]), time/iter = 0.193, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 87000 (epoch 8), loss = OrderedDict([('loss_ce', 0.295), ('loss_counter', 0.119), ('loss_bbox', 0.067), ('loss_giou', 0.178), ('loss_self_iou', 0.096), ('cardinality_error', 3.692), ('loss_ce_0', 0.298), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.182), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.692), ('loss_caption_0', 2.93), ('loss_caption', 2.931), ('total_loss', 14.465)]), time/iter = 0.191, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 88000 (epoch 8), loss = OrderedDict([('loss_ce', 0.299), ('loss_counter', 0.117), ('loss_bbox', 0.068), ('loss_giou', 0.181), ('loss_self_iou', 0.102), ('cardinality_error', 3.74), ('loss_ce_0', 0.298), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.184), ('loss_self_iou_0', 0.105), ('cardinality_error_0', 3.74), ('loss_caption_0', 2.945), ('loss_caption', 2.939), ('total_loss', 14.538)]), time/iter = 0.189, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 89000 (epoch 8), loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.124), ('loss_bbox', 0.069), ('loss_giou', 0.186), ('loss_self_iou', 0.096), ('cardinality_error', 3.911), ('loss_ce_0', 0.303), ('loss_counter_0', 0.124), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.188), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.911), ('loss_caption_0', 2.981), ('loss_caption', 2.985), ('total_loss', 14.762)]), time/iter = 0.190, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 90000 (epoch 8), loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.113), ('loss_bbox', 0.066), ('loss_giou', 0.174), ('loss_self_iou', 0.099), ('cardinality_error', 3.667), ('loss_ce_0', 0.3), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.667), ('loss_caption_0', 2.946), ('loss_caption', 2.945), ('total_loss', 14.493)]), time/iter = 0.191, bad_vid = 0.000 Validation results of iter 90081: Bleu_1:0.1659435247550983 Bleu_2:0.09010888064116455 Bleu_3:0.04740925434645997 Bleu_4:0.023810200153797586 METEOR:0.0893691583245007 ROUGE_L:0.16481267120708817 CIDEr:0.3096929324572276 Recall:0.5271698247293078 Precision:0.5766981899532185 soda_c:0.05637593299631936 para_Bleu_1:0.4507795558374508 para_Bleu_2:0.2668765313566654 para_Bleu_3:0.16324000259413463 para_Bleu_4:0.10292908422008885 para_METEOR:0.163503434468027 para_ROUGE_L:0.3141109355407807 para_CIDEr:0.1830754815850521 overall score of iter 90081: 0.44950800027316795 Save model at iter 90081 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. Save Best-model at iter 90081 to checkpoint file. Save info to info.json ID seq2-ft(mix)-gt_percent-1.0 iter 91000 (epoch 9), loss = OrderedDict([('loss_ce', 0.296), ('loss_counter', 0.121), ('loss_bbox', 0.066), ('loss_giou', 0.179), ('loss_self_iou', 0.097), ('cardinality_error', 3.807), ('loss_ce_0', 0.298), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.182), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.807), ('loss_caption_0', 2.916), ('loss_caption', 2.914), ('total_loss', 14.411)]), time/iter = 0.724, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 92000 (epoch 9), loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.121), ('loss_bbox', 0.067), ('loss_giou', 0.179), ('loss_self_iou', 0.093), ('cardinality_error', 3.784), ('loss_ce_0', 0.298), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.182), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.784), ('loss_caption_0', 2.916), ('loss_caption', 2.915), ('total_loss', 14.422)]), time/iter = 0.192, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 93000 (epoch 9), loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.117), ('loss_bbox', 0.065), ('loss_giou', 0.18), ('loss_self_iou', 0.091), ('cardinality_error', 3.806), ('loss_ce_0', 0.3), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.183), ('loss_self_iou_0', 0.091), ('cardinality_error_0', 3.806), ('loss_caption_0', 2.9), ('loss_caption', 2.905), ('total_loss', 14.377)]), time/iter = 0.189, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 94000 (epoch 9), loss = OrderedDict([('loss_ce', 0.293), ('loss_counter', 0.109), ('loss_bbox', 0.068), ('loss_giou', 0.174), ('loss_self_iou', 0.105), ('cardinality_error', 3.616), ('loss_ce_0', 0.293), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.106), ('cardinality_error_0', 3.616), ('loss_caption_0', 2.912), ('loss_caption', 2.914), ('total_loss', 14.339)]), time/iter = 0.187, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 95000 (epoch 9), loss = OrderedDict([('loss_ce', 0.295), ('loss_counter', 0.12), ('loss_bbox', 0.066), ('loss_giou', 0.185), ('loss_self_iou', 0.093), ('cardinality_error', 3.805), ('loss_ce_0', 0.296), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.805), ('loss_caption_0', 2.938), ('loss_caption', 2.941), ('total_loss', 14.546)]), time/iter = 0.188, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 96000 (epoch 9), loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.114), ('loss_bbox', 0.069), ('loss_giou', 0.177), ('loss_self_iou', 0.103), ('cardinality_error', 3.684), ('loss_ce_0', 0.293), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.105), ('cardinality_error_0', 3.684), ('loss_caption_0', 2.928), ('loss_caption', 2.931), ('total_loss', 14.434)]), time/iter = 0.193, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 97000 (epoch 9), loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.111), ('loss_bbox', 0.066), ('loss_giou', 0.184), ('loss_self_iou', 0.095), ('cardinality_error', 3.693), ('loss_ce_0', 0.298), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.693), ('loss_caption_0', 2.902), ('loss_caption', 2.903), ('total_loss', 14.392)]), time/iter = 0.190, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 98000 (epoch 9), loss = OrderedDict([('loss_ce', 0.296), ('loss_counter', 0.115), ('loss_bbox', 0.068), ('loss_giou', 0.181), ('loss_self_iou', 0.089), ('cardinality_error', 3.738), ('loss_ce_0', 0.298), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.184), ('loss_self_iou_0', 0.09), ('cardinality_error_0', 3.738), ('loss_caption_0', 2.896), ('loss_caption', 2.902), ('total_loss', 14.361)]), time/iter = 0.188, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 99000 (epoch 9), loss = OrderedDict([('loss_ce', 0.295), ('loss_counter', 0.115), ('loss_bbox', 0.064), ('loss_giou', 0.174), ('loss_self_iou', 0.095), ('cardinality_error', 3.702), ('loss_ce_0', 0.296), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.702), ('loss_caption_0', 2.956), ('loss_caption', 2.956), ('total_loss', 14.525)]), time/iter = 0.195, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 100000 (epoch 9), loss = OrderedDict([('loss_ce', 0.296), ('loss_counter', 0.114), ('loss_bbox', 0.066), ('loss_giou', 0.177), ('loss_self_iou', 0.092), ('cardinality_error', 3.751), ('loss_ce_0', 0.298), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.751), ('loss_caption_0', 2.932), ('loss_caption', 2.932), ('total_loss', 14.453)]), time/iter = 0.191, bad_vid = 0.000 Validation results of iter 100090: Bleu_1:0.16664911544364056 Bleu_2:0.09023295213839283 Bleu_3:0.04763940550902772 Bleu_4:0.02409205514859969 METEOR:0.0878588871148787 ROUGE_L:0.16401896184386325 CIDEr:0.31947446694949533 Recall:0.5282742157284517 Precision:0.5750796556165633 soda_c:0.05745241491068406 para_Bleu_1:0.46204429574393835 para_Bleu_2:0.2749900961045832 para_Bleu_3:0.1683879565471281 para_Bleu_4:0.10624339593597942 para_METEOR:0.16245439213508253 para_ROUGE_L:0.3162965936511474 para_CIDEr:0.20803178964320856 overall score of iter 100090: 0.4767295777142705 Save model at iter 100090 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. Save Best-model at iter 100090 to checkpoint file. Save info to info.json ID seq2-ft(mix)-gt_percent-1.0 iter 101000 (epoch 10), loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.111), ('loss_bbox', 0.065), ('loss_giou', 0.173), ('loss_self_iou', 0.093), ('cardinality_error', 3.699), ('loss_ce_0', 0.292), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.699), ('loss_caption_0', 2.849), ('loss_caption', 2.847), ('total_loss', 14.064)]), time/iter = 0.713, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 102000 (epoch 10), loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.116), ('loss_bbox', 0.065), ('loss_giou', 0.174), ('loss_self_iou', 0.093), ('cardinality_error', 3.695), ('loss_ce_0', 0.293), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.695), ('loss_caption_0', 2.85), ('loss_caption', 2.848), ('total_loss', 14.087)]), time/iter = 0.189, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 103000 (epoch 10), loss = OrderedDict([('loss_ce', 0.293), ('loss_counter', 0.115), ('loss_bbox', 0.066), ('loss_giou', 0.173), ('loss_self_iou', 0.093), ('cardinality_error', 3.724), ('loss_ce_0', 0.293), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.724), ('loss_caption_0', 2.846), ('loss_caption', 2.854), ('total_loss', 14.092)]), time/iter = 0.190, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 104000 (epoch 10), loss = OrderedDict([('loss_ce', 0.289), ('loss_counter', 0.113), ('loss_bbox', 0.064), ('loss_giou', 0.178), ('loss_self_iou', 0.097), ('cardinality_error', 3.736), ('loss_ce_0', 0.29), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.736), ('loss_caption_0', 2.916), ('loss_caption', 2.913), ('total_loss', 14.362)]), time/iter = 0.191, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 105000 (epoch 10), loss = OrderedDict([('loss_ce', 0.288), ('loss_counter', 0.117), ('loss_bbox', 0.067), ('loss_giou', 0.18), ('loss_self_iou', 0.091), ('cardinality_error', 3.736), ('loss_ce_0', 0.29), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.183), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.736), ('loss_caption_0', 2.907), ('loss_caption', 2.902), ('total_loss', 14.342)]), time/iter = 0.189, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 106000 (epoch 10), loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.113), ('loss_bbox', 0.068), ('loss_giou', 0.184), ('loss_self_iou', 0.11), ('cardinality_error', 3.775), ('loss_ce_0', 0.293), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.11), ('cardinality_error_0', 3.775), ('loss_caption_0', 2.876), ('loss_caption', 2.875), ('total_loss', 14.264)]), time/iter = 0.189, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 107000 (epoch 10), loss = OrderedDict([('loss_ce', 0.291), ('loss_counter', 0.114), ('loss_bbox', 0.069), ('loss_giou', 0.178), ('loss_self_iou', 0.099), ('cardinality_error', 3.743), ('loss_ce_0', 0.291), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.183), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.743), ('loss_caption_0', 2.91), ('loss_caption', 2.909), ('total_loss', 14.358)]), time/iter = 0.191, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 108000 (epoch 10), loss = OrderedDict([('loss_ce', 0.295), ('loss_counter', 0.118), ('loss_bbox', 0.066), ('loss_giou', 0.177), ('loss_self_iou', 0.1), ('cardinality_error', 3.81), ('loss_ce_0', 0.296), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.81), ('loss_caption_0', 2.928), ('loss_caption', 2.93), ('total_loss', 14.446)]), time/iter = 0.194, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 109000 (epoch 10), loss = OrderedDict([('loss_ce', 0.294), ('loss_counter', 0.118), ('loss_bbox', 0.063), ('loss_giou', 0.178), ('loss_self_iou', 0.091), ('cardinality_error', 3.78), ('loss_ce_0', 0.296), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.182), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.78), ('loss_caption_0', 2.916), ('loss_caption', 2.912), ('total_loss', 14.396)]), time/iter = 0.193, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 110000 (epoch 10), loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.113), ('loss_bbox', 0.064), ('loss_giou', 0.178), ('loss_self_iou', 0.087), ('cardinality_error', 3.72), ('loss_ce_0', 0.297), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.184), ('loss_self_iou_0', 0.088), ('cardinality_error_0', 3.72), ('loss_caption_0', 2.948), ('loss_caption', 2.948), ('total_loss', 14.539)]), time/iter = 0.196, bad_vid = 0.000 Validation results of iter 110099: Bleu_1:0.1671778590456048 Bleu_2:0.09077014613023152 Bleu_3:0.0476684747303012 Bleu_4:0.02445564298599047 METEOR:0.08933235383587503 ROUGE_L:0.1654660162888944 CIDEr:0.31886265111118334 Recall:0.5314017615268335 Precision:0.5831469052945512 soda_c:0.05853263249839839 para_Bleu_1:0.46544090189732323 para_Bleu_2:0.2789325258737778 para_Bleu_3:0.17172911957785325 para_Bleu_4:0.10903514181091935 para_METEOR:0.16550159188298816 para_ROUGE_L:0.3181118223429575 para_CIDEr:0.2056618808195008 overall score of iter 110099: 0.4801986145134083 Save model at iter 110099 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. Save Best-model at iter 110099 to checkpoint file. Save info to info.json ID seq2-ft(mix)-gt_percent-1.0 iter 111000 (epoch 11), loss = OrderedDict([('loss_ce', 0.286), ('loss_counter', 0.114), ('loss_bbox', 0.066), ('loss_giou', 0.173), ('loss_self_iou', 0.095), ('cardinality_error', 3.718), ('loss_ce_0', 0.287), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.718), ('loss_caption_0', 2.867), ('loss_caption', 2.869), ('total_loss', 14.14)]), time/iter = 0.727, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 112000 (epoch 11), loss = OrderedDict([('loss_ce', 0.287), ('loss_counter', 0.111), ('loss_bbox', 0.064), ('loss_giou', 0.169), ('loss_self_iou', 0.098), ('cardinality_error', 3.725), ('loss_ce_0', 0.289), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.725), ('loss_caption_0', 2.844), ('loss_caption', 2.842), ('total_loss', 14.015)]), time/iter = 0.189, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 113000 (epoch 11), loss = OrderedDict([('loss_ce', 0.284), ('loss_counter', 0.111), ('loss_bbox', 0.064), ('loss_giou', 0.172), ('loss_self_iou', 0.097), ('cardinality_error', 3.734), ('loss_ce_0', 0.286), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.734), ('loss_caption_0', 2.837), ('loss_caption', 2.834), ('total_loss', 13.981)]), time/iter = 0.188, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 114000 (epoch 11), loss = OrderedDict([('loss_ce', 0.283), ('loss_counter', 0.112), ('loss_bbox', 0.064), ('loss_giou', 0.174), ('loss_self_iou', 0.096), ('cardinality_error', 3.739), ('loss_ce_0', 0.285), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.18), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.739), ('loss_caption_0', 2.855), ('loss_caption', 2.857), ('total_loss', 14.084)]), time/iter = 0.191, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 115000 (epoch 11), loss = OrderedDict([('loss_ce', 0.284), ('loss_counter', 0.111), ('loss_bbox', 0.064), ('loss_giou', 0.175), ('loss_self_iou', 0.092), ('cardinality_error', 3.74), ('loss_ce_0', 0.284), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.18), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.74), ('loss_caption_0', 2.823), ('loss_caption', 2.824), ('total_loss', 13.959)]), time/iter = 0.190, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 116000 (epoch 11), loss = OrderedDict([('loss_ce', 0.286), ('loss_counter', 0.113), ('loss_bbox', 0.065), ('loss_giou', 0.177), ('loss_self_iou', 0.088), ('cardinality_error', 3.753), ('loss_ce_0', 0.288), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.088), ('cardinality_error_0', 3.753), ('loss_caption_0', 2.846), ('loss_caption', 2.843), ('total_loss', 14.073)]), time/iter = 0.192, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 117000 (epoch 11), loss = OrderedDict([('loss_ce', 0.285), ('loss_counter', 0.113), ('loss_bbox', 0.064), ('loss_giou', 0.174), ('loss_self_iou', 0.096), ('cardinality_error', 3.755), ('loss_ce_0', 0.287), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.755), ('loss_caption_0', 2.804), ('loss_caption', 2.81), ('total_loss', 13.896)]), time/iter = 0.189, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 118000 (epoch 11), loss = OrderedDict([('loss_ce', 0.284), ('loss_counter', 0.109), ('loss_bbox', 0.066), ('loss_giou', 0.175), ('loss_self_iou', 0.093), ('cardinality_error', 3.715), ('loss_ce_0', 0.285), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.715), ('loss_caption_0', 2.863), ('loss_caption', 2.866), ('total_loss', 14.129)]), time/iter = 0.193, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 119000 (epoch 11), loss = OrderedDict([('loss_ce', 0.286), ('loss_counter', 0.114), ('loss_bbox', 0.064), ('loss_giou', 0.176), ('loss_self_iou', 0.098), ('cardinality_error', 3.735), ('loss_ce_0', 0.287), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.735), ('loss_caption_0', 2.844), ('loss_caption', 2.843), ('total_loss', 14.061)]), time/iter = 0.191, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 120000 (epoch 11), loss = OrderedDict([('loss_ce', 0.284), ('loss_counter', 0.113), ('loss_bbox', 0.065), ('loss_giou', 0.175), ('loss_self_iou', 0.101), ('cardinality_error', 3.755), ('loss_ce_0', 0.285), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.755), ('loss_caption_0', 2.868), ('loss_caption', 2.878), ('total_loss', 14.168)]), time/iter = 0.190, bad_vid = 0.000 Validation results of iter 120108: Bleu_1:0.16560019346009094 Bleu_2:0.08934946581658681 Bleu_3:0.04692472826903507 Bleu_4:0.023331060597699706 METEOR:0.08861943572471001 ROUGE_L:0.16392659155605854 CIDEr:0.31177527957257306 Recall:0.5248955646301546 Precision:0.5713061826316813 soda_c:0.056694173808073595 para_Bleu_1:0.45551540477127933 para_Bleu_2:0.2725270289009415 para_Bleu_3:0.16731081427102573 para_Bleu_4:0.10555679460767188 para_METEOR:0.1665724805603667 para_ROUGE_L:0.31619749898051375 para_CIDEr:0.19719071969736374 overall score of iter 120108: 0.4693199948654023 Save model at iter 120108 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. Save info to info.json ID seq2-ft(mix)-gt_percent-1.0 iter 121000 (epoch 12), loss = OrderedDict([('loss_ce', 0.283), ('loss_counter', 0.108), ('loss_bbox', 0.063), ('loss_giou', 0.166), ('loss_self_iou', 0.095), ('cardinality_error', 3.691), ('loss_ce_0', 0.284), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.691), ('loss_caption_0', 2.809), ('loss_caption', 2.808), ('total_loss', 13.835)]), time/iter = 0.727, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 122000 (epoch 12), loss = OrderedDict([('loss_ce', 0.28), ('loss_counter', 0.109), ('loss_bbox', 0.064), ('loss_giou', 0.17), ('loss_self_iou', 0.093), ('cardinality_error', 3.706), ('loss_ce_0', 0.281), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.706), ('loss_caption_0', 2.811), ('loss_caption', 2.814), ('total_loss', 13.867)]), time/iter = 0.190, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 123000 (epoch 12), loss = OrderedDict([('loss_ce', 0.28), ('loss_counter', 0.109), ('loss_bbox', 0.066), ('loss_giou', 0.172), ('loss_self_iou', 0.097), ('cardinality_error', 3.691), ('loss_ce_0', 0.281), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.691), ('loss_caption_0', 2.789), ('loss_caption', 2.797), ('total_loss', 13.808)]), time/iter = 0.190, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 124000 (epoch 12), loss = OrderedDict([('loss_ce', 0.282), ('loss_counter', 0.112), ('loss_bbox', 0.063), ('loss_giou', 0.17), ('loss_self_iou', 0.092), ('cardinality_error', 3.76), ('loss_ce_0', 0.281), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.76), ('loss_caption_0', 2.839), ('loss_caption', 2.842), ('total_loss', 13.984)]), time/iter = 0.194, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 125000 (epoch 12), loss = OrderedDict([('loss_ce', 0.281), ('loss_counter', 0.112), ('loss_bbox', 0.064), ('loss_giou', 0.174), ('loss_self_iou', 0.097), ('cardinality_error', 3.763), ('loss_ce_0', 0.282), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.763), ('loss_caption_0', 2.81), ('loss_caption', 2.815), ('total_loss', 13.898)]), time/iter = 0.189, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 126000 (epoch 12), loss = OrderedDict([('loss_ce', 0.282), ('loss_counter', 0.112), ('loss_bbox', 0.064), ('loss_giou', 0.177), ('loss_self_iou', 0.095), ('cardinality_error', 3.717), ('loss_ce_0', 0.283), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.183), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.717), ('loss_caption_0', 2.789), ('loss_caption', 2.787), ('total_loss', 13.835)]), time/iter = 0.189, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 127000 (epoch 12), loss = OrderedDict([('loss_ce', 0.277), ('loss_counter', 0.112), ('loss_bbox', 0.064), ('loss_giou', 0.172), ('loss_self_iou', 0.097), ('cardinality_error', 3.764), ('loss_ce_0', 0.277), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.764), ('loss_caption_0', 2.867), ('loss_caption', 2.871), ('total_loss', 14.097)]), time/iter = 0.191, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 128000 (epoch 12), loss = OrderedDict([('loss_ce', 0.281), ('loss_counter', 0.113), ('loss_bbox', 0.063), ('loss_giou', 0.173), ('loss_self_iou', 0.092), ('cardinality_error', 3.793), ('loss_ce_0', 0.283), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.793), ('loss_caption_0', 2.868), ('loss_caption', 2.863), ('total_loss', 14.111)]), time/iter = 0.190, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 129000 (epoch 12), loss = OrderedDict([('loss_ce', 0.279), ('loss_counter', 0.106), ('loss_bbox', 0.066), ('loss_giou', 0.175), ('loss_self_iou', 0.1), ('cardinality_error', 3.686), ('loss_ce_0', 0.283), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.686), ('loss_caption_0', 2.812), ('loss_caption', 2.813), ('total_loss', 13.903)]), time/iter = 0.191, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 130000 (epoch 12), loss = OrderedDict([('loss_ce', 0.283), ('loss_counter', 0.111), ('loss_bbox', 0.065), ('loss_giou', 0.174), ('loss_self_iou', 0.097), ('cardinality_error', 3.772), ('loss_ce_0', 0.286), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.772), ('loss_caption_0', 2.86), ('loss_caption', 2.861), ('total_loss', 14.105)]), time/iter = 0.190, bad_vid = 0.000 Validation results of iter 130117: Bleu_1:0.16778675341331784 Bleu_2:0.09082555766488616 Bleu_3:0.047445681271689716 Bleu_4:0.02375280793420285 METEOR:0.08883520478698428 ROUGE_L:0.16531435721130755 CIDEr:0.31778343902267087 Recall:0.5273619026669621 Precision:0.5698181479221706 soda_c:0.05753856798988932 para_Bleu_1:0.4610381779339771 para_Bleu_2:0.2761144617772928 para_Bleu_3:0.16915034097081671 para_Bleu_4:0.10654029953240575 para_METEOR:0.16638305166981465 para_ROUGE_L:0.31710573495570465 para_CIDEr:0.19601570682645908 overall score of iter 130117: 0.46893905802867947 Save model at iter 130117 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. Save info to info.json ID seq2-ft(mix)-gt_percent-1.0 iter 131000 (epoch 13), loss = OrderedDict([('loss_ce', 0.277), ('loss_counter', 0.107), ('loss_bbox', 0.062), ('loss_giou', 0.17), ('loss_self_iou', 0.092), ('cardinality_error', 3.75), ('loss_ce_0', 0.279), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.75), ('loss_caption_0', 2.817), ('loss_caption', 2.826), ('total_loss', 13.897)]), time/iter = 0.734, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 132000 (epoch 13), loss = OrderedDict([('loss_ce', 0.271), ('loss_counter', 0.109), ('loss_bbox', 0.065), ('loss_giou', 0.174), ('loss_self_iou', 0.089), ('cardinality_error', 3.814), ('loss_ce_0', 0.274), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.09), ('cardinality_error_0', 3.814), ('loss_caption_0', 2.778), ('loss_caption', 2.776), ('total_loss', 13.726)]), time/iter = 0.189, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 133000 (epoch 13), loss = OrderedDict([('loss_ce', 0.277), ('loss_counter', 0.113), ('loss_bbox', 0.064), ('loss_giou', 0.172), ('loss_self_iou', 0.095), ('cardinality_error', 3.773), ('loss_ce_0', 0.277), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.773), ('loss_caption_0', 2.843), ('loss_caption', 2.843), ('total_loss', 13.999)]), time/iter = 0.191, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 134000 (epoch 13), loss = OrderedDict([('loss_ce', 0.273), ('loss_counter', 0.108), ('loss_bbox', 0.065), ('loss_giou', 0.171), ('loss_self_iou', 0.101), ('cardinality_error', 3.743), ('loss_ce_0', 0.276), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.743), ('loss_caption_0', 2.786), ('loss_caption', 2.787), ('total_loss', 13.756)]), time/iter = 0.188, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 135000 (epoch 13), loss = OrderedDict([('loss_ce', 0.28), ('loss_counter', 0.115), ('loss_bbox', 0.061), ('loss_giou', 0.168), ('loss_self_iou', 0.096), ('cardinality_error', 3.794), ('loss_ce_0', 0.281), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.794), ('loss_caption_0', 2.785), ('loss_caption', 2.784), ('total_loss', 13.759)]), time/iter = 0.190, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 136000 (epoch 13), loss = OrderedDict([('loss_ce', 0.279), ('loss_counter', 0.106), ('loss_bbox', 0.065), ('loss_giou', 0.168), ('loss_self_iou', 0.092), ('cardinality_error', 3.653), ('loss_ce_0', 0.279), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.653), ('loss_caption_0', 2.828), ('loss_caption', 2.834), ('total_loss', 13.919)]), time/iter = 0.191, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 137000 (epoch 13), loss = OrderedDict([('loss_ce', 0.279), ('loss_counter', 0.105), ('loss_bbox', 0.065), ('loss_giou', 0.173), ('loss_self_iou', 0.099), ('cardinality_error', 3.654), ('loss_ce_0', 0.281), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.654), ('loss_caption_0', 2.79), ('loss_caption', 2.799), ('total_loss', 13.806)]), time/iter = 0.190, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 138000 (epoch 13), loss = OrderedDict([('loss_ce', 0.278), ('loss_counter', 0.109), ('loss_bbox', 0.064), ('loss_giou', 0.171), ('loss_self_iou', 0.095), ('cardinality_error', 3.714), ('loss_ce_0', 0.28), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.714), ('loss_caption_0', 2.835), ('loss_caption', 2.828), ('total_loss', 13.945)]), time/iter = 0.188, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 139000 (epoch 13), loss = OrderedDict([('loss_ce', 0.281), ('loss_counter', 0.115), ('loss_bbox', 0.062), ('loss_giou', 0.167), ('loss_self_iou', 0.098), ('cardinality_error', 3.813), ('loss_ce_0', 0.283), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.813), ('loss_caption_0', 2.83), ('loss_caption', 2.828), ('total_loss', 13.924)]), time/iter = 0.186, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 140000 (epoch 13), loss = OrderedDict([('loss_ce', 0.277), ('loss_counter', 0.107), ('loss_bbox', 0.063), ('loss_giou', 0.171), ('loss_self_iou', 0.09), ('cardinality_error', 3.664), ('loss_ce_0', 0.28), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.091), ('cardinality_error_0', 3.664), ('loss_caption_0', 2.821), ('loss_caption', 2.823), ('total_loss', 13.905)]), time/iter = 0.191, bad_vid = 0.000 Validation results of iter 140126: Bleu_1:0.16683698969676453 Bleu_2:0.09036855967772307 Bleu_3:0.047484441130632896 Bleu_4:0.023876859658376735 METEOR:0.08814626862844692 ROUGE_L:0.16473003568483396 CIDEr:0.3189568758512915 Recall:0.5281546209817979 Precision:0.5704333604501349 soda_c:0.057417105431783064 para_Bleu_1:0.4580706340663244 para_Bleu_2:0.27372623489326064 para_Bleu_3:0.16745128920972313 para_Bleu_4:0.10550306643408856 para_METEOR:0.16656454278617736 para_ROUGE_L:0.31631873012989425 para_CIDEr:0.19724321819057877 overall score of iter 140126: 0.46931082741084473 Save model at iter 140126 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. Save info to info.json ID seq2-ft(mix)-gt_percent-1.0 iter 141000 (epoch 14), loss = OrderedDict([('loss_ce', 0.268), ('loss_counter', 0.108), ('loss_bbox', 0.066), ('loss_giou', 0.171), ('loss_self_iou', 0.106), ('cardinality_error', 3.774), ('loss_ce_0', 0.27), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.108), ('cardinality_error_0', 3.774), ('loss_caption_0', 2.75), ('loss_caption', 2.748), ('total_loss', 13.572)]), time/iter = 0.739, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 142000 (epoch 14), loss = OrderedDict([('loss_ce', 0.27), ('loss_counter', 0.109), ('loss_bbox', 0.062), ('loss_giou', 0.173), ('loss_self_iou', 0.091), ('cardinality_error', 3.797), ('loss_ce_0', 0.272), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.091), ('cardinality_error_0', 3.797), ('loss_caption_0', 2.72), ('loss_caption', 2.722), ('total_loss', 13.492)]), time/iter = 0.186, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 143000 (epoch 14), loss = OrderedDict([('loss_ce', 0.265), ('loss_counter', 0.1), ('loss_bbox', 0.063), ('loss_giou', 0.162), ('loss_self_iou', 0.095), ('cardinality_error', 3.637), ('loss_ce_0', 0.268), ('loss_counter_0', 0.1), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.171), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.637), ('loss_caption_0', 2.782), ('loss_caption', 2.782), ('total_loss', 13.626)]), time/iter = 0.191, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 144000 (epoch 14), loss = OrderedDict([('loss_ce', 0.27), ('loss_counter', 0.112), ('loss_bbox', 0.062), ('loss_giou', 0.172), ('loss_self_iou', 0.094), ('cardinality_error', 3.831), ('loss_ce_0', 0.273), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.18), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.831), ('loss_caption_0', 2.793), ('loss_caption', 2.79), ('total_loss', 13.773)]), time/iter = 0.189, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 145000 (epoch 14), loss = OrderedDict([('loss_ce', 0.269), ('loss_counter', 0.101), ('loss_bbox', 0.061), ('loss_giou', 0.16), ('loss_self_iou', 0.093), ('cardinality_error', 3.665), ('loss_ce_0', 0.273), ('loss_counter_0', 0.101), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.168), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.665), ('loss_caption_0', 2.762), ('loss_caption', 2.767), ('total_loss', 13.554)]), time/iter = 0.189, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 146000 (epoch 14), loss = OrderedDict([('loss_ce', 0.275), ('loss_counter', 0.109), ('loss_bbox', 0.061), ('loss_giou', 0.164), ('loss_self_iou', 0.091), ('cardinality_error', 3.725), ('loss_ce_0', 0.276), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.172), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.725), ('loss_caption_0', 2.813), ('loss_caption', 2.813), ('total_loss', 13.811)]), time/iter = 0.191, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 147000 (epoch 14), loss = OrderedDict([('loss_ce', 0.272), ('loss_counter', 0.104), ('loss_bbox', 0.063), ('loss_giou', 0.171), ('loss_self_iou', 0.097), ('cardinality_error', 3.714), ('loss_ce_0', 0.273), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.714), ('loss_caption_0', 2.747), ('loss_caption', 2.745), ('total_loss', 13.578)]), time/iter = 0.189, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 148000 (epoch 14), loss = OrderedDict([('loss_ce', 0.271), ('loss_counter', 0.108), ('loss_bbox', 0.063), ('loss_giou', 0.168), ('loss_self_iou', 0.096), ('cardinality_error', 3.728), ('loss_ce_0', 0.274), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.728), ('loss_caption_0', 2.843), ('loss_caption', 2.84), ('total_loss', 13.944)]), time/iter = 0.192, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 149000 (epoch 14), loss = OrderedDict([('loss_ce', 0.269), ('loss_counter', 0.108), ('loss_bbox', 0.066), ('loss_giou', 0.169), ('loss_self_iou', 0.098), ('cardinality_error', 3.799), ('loss_ce_0', 0.273), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.799), ('loss_caption_0', 2.836), ('loss_caption', 2.836), ('total_loss', 13.926)]), time/iter = 0.196, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 150000 (epoch 14), loss = OrderedDict([('loss_ce', 0.27), ('loss_counter', 0.107), ('loss_bbox', 0.063), ('loss_giou', 0.169), ('loss_self_iou', 0.087), ('cardinality_error', 3.703), ('loss_ce_0', 0.272), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.088), ('cardinality_error_0', 3.703), ('loss_caption_0', 2.806), ('loss_caption', 2.806), ('total_loss', 13.795)]), time/iter = 0.193, bad_vid = 0.000 Validation results of iter 150135: Bleu_1:0.16662144072598145 Bleu_2:0.08988753231411394 Bleu_3:0.04690847145308288 Bleu_4:0.023224274927987735 METEOR:0.08725158341768323 ROUGE_L:0.16364893754496343 CIDEr:0.32028824475030926 Recall:0.5260420675803493 Precision:0.5630584367161506 soda_c:0.057565785652999135 para_Bleu_1:0.46764194087144684 para_Bleu_2:0.2801629240374498 para_Bleu_3:0.1713033186995987 para_Bleu_4:0.10750827268624512 para_METEOR:0.16742715934059368 para_ROUGE_L:0.31858424377772926 para_CIDEr:0.2089956210595351 overall score of iter 150135: 0.4839310530863739 Save model at iter 150135 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. Save Best-model at iter 150135 to checkpoint file. Save info to info.json ID seq2-ft(mix)-gt_percent-1.0 iter 151000 (epoch 15), loss = OrderedDict([('loss_ce', 0.264), ('loss_counter', 0.101), ('loss_bbox', 0.063), ('loss_giou', 0.163), ('loss_self_iou', 0.097), ('cardinality_error', 3.645), ('loss_ce_0', 0.266), ('loss_counter_0', 0.101), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.171), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.645), ('loss_caption_0', 2.762), ('loss_caption', 2.759), ('total_loss', 13.537)]), time/iter = 0.737, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 152000 (epoch 15), loss = OrderedDict([('loss_ce', 0.265), ('loss_counter', 0.103), ('loss_bbox', 0.06), ('loss_giou', 0.166), ('loss_self_iou', 0.087), ('cardinality_error', 3.722), ('loss_ce_0', 0.269), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.087), ('cardinality_error_0', 3.722), ('loss_caption_0', 2.762), ('loss_caption', 2.766), ('total_loss', 13.59)]), time/iter = 0.191, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 153000 (epoch 15), loss = OrderedDict([('loss_ce', 0.264), ('loss_counter', 0.111), ('loss_bbox', 0.062), ('loss_giou', 0.168), ('loss_self_iou', 0.083), ('cardinality_error', 3.813), ('loss_ce_0', 0.267), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.085), ('cardinality_error_0', 3.813), ('loss_caption_0', 2.777), ('loss_caption', 2.778), ('total_loss', 13.663)]), time/iter = 0.193, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 154000 (epoch 15), loss = OrderedDict([('loss_ce', 0.268), ('loss_counter', 0.106), ('loss_bbox', 0.061), ('loss_giou', 0.168), ('loss_self_iou', 0.092), ('cardinality_error', 3.769), ('loss_ce_0', 0.272), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.769), ('loss_caption_0', 2.787), ('loss_caption', 2.787), ('total_loss', 13.717)]), time/iter = 0.191, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 155000 (epoch 15), loss = OrderedDict([('loss_ce', 0.264), ('loss_counter', 0.104), ('loss_bbox', 0.063), ('loss_giou', 0.169), ('loss_self_iou', 0.09), ('cardinality_error', 3.714), ('loss_ce_0', 0.267), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.091), ('cardinality_error_0', 3.714), ('loss_caption_0', 2.758), ('loss_caption', 2.76), ('total_loss', 13.593)]), time/iter = 0.190, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 156000 (epoch 15), loss = OrderedDict([('loss_ce', 0.265), ('loss_counter', 0.106), ('loss_bbox', 0.064), ('loss_giou', 0.167), ('loss_self_iou', 0.102), ('cardinality_error', 3.675), ('loss_ce_0', 0.269), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.675), ('loss_caption_0', 2.741), ('loss_caption', 2.742), ('total_loss', 13.504)]), time/iter = 0.191, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 157000 (epoch 15), loss = OrderedDict([('loss_ce', 0.267), ('loss_counter', 0.104), ('loss_bbox', 0.065), ('loss_giou', 0.167), ('loss_self_iou', 0.103), ('cardinality_error', 3.722), ('loss_ce_0', 0.268), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.105), ('cardinality_error_0', 3.722), ('loss_caption_0', 2.777), ('loss_caption', 2.783), ('total_loss', 13.668)]), time/iter = 0.188, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 158000 (epoch 15), loss = OrderedDict([('loss_ce', 0.266), ('loss_counter', 0.106), ('loss_bbox', 0.062), ('loss_giou', 0.164), ('loss_self_iou', 0.099), ('cardinality_error', 3.758), ('loss_ce_0', 0.27), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.758), ('loss_caption_0', 2.815), ('loss_caption', 2.817), ('total_loss', 13.789)]), time/iter = 0.192, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 159000 (epoch 15), loss = OrderedDict([('loss_ce', 0.272), ('loss_counter', 0.108), ('loss_bbox', 0.062), ('loss_giou', 0.169), ('loss_self_iou', 0.098), ('cardinality_error', 3.729), ('loss_ce_0', 0.275), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.729), ('loss_caption_0', 2.783), ('loss_caption', 2.785), ('total_loss', 13.721)]), time/iter = 0.194, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 160000 (epoch 15), loss = OrderedDict([('loss_ce', 0.269), ('loss_counter', 0.109), ('loss_bbox', 0.063), ('loss_giou', 0.166), ('loss_self_iou', 0.098), ('cardinality_error', 3.816), ('loss_ce_0', 0.271), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.816), ('loss_caption_0', 2.78), ('loss_caption', 2.784), ('total_loss', 13.686)]), time/iter = 0.196, bad_vid = 0.000 Validation results of iter 160144: Bleu_1:0.16754398447821903 Bleu_2:0.08978801866243748 Bleu_3:0.046077601805781236 Bleu_4:0.02215727819941335 METEOR:0.08650894641812401 ROUGE_L:0.16425299709373153 CIDEr:0.3192637628790779 Recall:0.5308598805776927 Precision:0.5705477594739302 soda_c:0.059035206979637336 para_Bleu_1:0.4722129873397206 para_Bleu_2:0.2843271953295457 para_Bleu_3:0.17433620623201318 para_Bleu_4:0.10943737200004257 para_METEOR:0.16524483023272712 para_ROUGE_L:0.3180351825656492 para_CIDEr:0.2139382514781602 overall score of iter 160144: 0.4886204537109299 Save model at iter 160144 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. Save Best-model at iter 160144 to checkpoint file. Save info to info.json ID seq2-ft(mix)-gt_percent-1.0 iter 161000 (epoch 16), loss = OrderedDict([('loss_ce', 0.26), ('loss_counter', 0.103), ('loss_bbox', 0.061), ('loss_giou', 0.163), ('loss_self_iou', 0.097), ('cardinality_error', 3.695), ('loss_ce_0', 0.263), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.171), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.695), ('loss_caption_0', 2.766), ('loss_caption', 2.768), ('total_loss', 13.553)]), time/iter = 0.749, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 162000 (epoch 16), loss = OrderedDict([('loss_ce', 0.262), ('loss_counter', 0.103), ('loss_bbox', 0.063), ('loss_giou', 0.164), ('loss_self_iou', 0.091), ('cardinality_error', 3.694), ('loss_ce_0', 0.266), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.694), ('loss_caption_0', 2.768), ('loss_caption', 2.764), ('total_loss', 13.573)]), time/iter = 0.188, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 163000 (epoch 16), loss = OrderedDict([('loss_ce', 0.262), ('loss_counter', 0.105), ('loss_bbox', 0.064), ('loss_giou', 0.173), ('loss_self_iou', 0.097), ('cardinality_error', 3.769), ('loss_ce_0', 0.266), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.769), ('loss_caption_0', 2.765), ('loss_caption', 2.766), ('total_loss', 13.63)]), time/iter = 0.189, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 164000 (epoch 16), loss = OrderedDict([('loss_ce', 0.265), ('loss_counter', 0.11), ('loss_bbox', 0.061), ('loss_giou', 0.164), ('loss_self_iou', 0.092), ('cardinality_error', 3.774), ('loss_ce_0', 0.269), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.774), ('loss_caption_0', 2.772), ('loss_caption', 2.776), ('total_loss', 13.625)]), time/iter = 0.190, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 165000 (epoch 16), loss = OrderedDict([('loss_ce', 0.264), ('loss_counter', 0.102), ('loss_bbox', 0.063), ('loss_giou', 0.164), ('loss_self_iou', 0.092), ('cardinality_error', 3.699), ('loss_ce_0', 0.267), ('loss_counter_0', 0.102), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.699), ('loss_caption_0', 2.711), ('loss_caption', 2.716), ('total_loss', 13.368)]), time/iter = 0.187, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 166000 (epoch 16), loss = OrderedDict([('loss_ce', 0.264), ('loss_counter', 0.105), ('loss_bbox', 0.061), ('loss_giou', 0.163), ('loss_self_iou', 0.094), ('cardinality_error', 3.72), ('loss_ce_0', 0.268), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.72), ('loss_caption_0', 2.754), ('loss_caption', 2.755), ('total_loss', 13.534)]), time/iter = 0.193, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 167000 (epoch 16), loss = OrderedDict([('loss_ce', 0.261), ('loss_counter', 0.101), ('loss_bbox', 0.062), ('loss_giou', 0.168), ('loss_self_iou', 0.095), ('cardinality_error', 3.712), ('loss_ce_0', 0.266), ('loss_counter_0', 0.1), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.712), ('loss_caption_0', 2.771), ('loss_caption', 2.772), ('total_loss', 13.617)]), time/iter = 0.192, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 168000 (epoch 16), loss = OrderedDict([('loss_ce', 0.265), ('loss_counter', 0.108), ('loss_bbox', 0.062), ('loss_giou', 0.168), ('loss_self_iou', 0.09), ('cardinality_error', 3.816), ('loss_ce_0', 0.269), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.816), ('loss_caption_0', 2.814), ('loss_caption', 2.82), ('total_loss', 13.826)]), time/iter = 0.193, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 169000 (epoch 16), loss = OrderedDict([('loss_ce', 0.258), ('loss_counter', 0.106), ('loss_bbox', 0.064), ('loss_giou', 0.166), ('loss_self_iou', 0.106), ('cardinality_error', 3.697), ('loss_ce_0', 0.261), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.107), ('cardinality_error_0', 3.697), ('loss_caption_0', 2.769), ('loss_caption', 2.775), ('total_loss', 13.598)]), time/iter = 0.191, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 170000 (epoch 16), loss = OrderedDict([('loss_ce', 0.268), ('loss_counter', 0.105), ('loss_bbox', 0.062), ('loss_giou', 0.165), ('loss_self_iou', 0.093), ('cardinality_error', 3.799), ('loss_ce_0', 0.272), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.799), ('loss_caption_0', 2.794), ('loss_caption', 2.798), ('total_loss', 13.727)]), time/iter = 0.191, bad_vid = 0.000 Validation results of iter 170153: Bleu_1:0.16584280243722227 Bleu_2:0.08889969905794425 Bleu_3:0.04569298286173284 Bleu_4:0.021992960199339176 METEOR:0.08570833880397384 ROUGE_L:0.16234979503724006 CIDEr:0.3170462149966731 Recall:0.5273397281824633 Precision:0.5648989898989865 soda_c:0.058539462474976364 para_Bleu_1:0.4735378044184376 para_Bleu_2:0.2855599966961999 para_Bleu_3:0.17485842077678387 para_Bleu_4:0.10998333079246524 para_METEOR:0.16580782598840993 para_ROUGE_L:0.3184105968751349 para_CIDEr:0.2144083270960459 overall score of iter 170153: 0.4901994838769211 Save model at iter 170153 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. Save Best-model at iter 170153 to checkpoint file. Save info to info.json ID seq2-ft(mix)-gt_percent-1.0 iter 171000 (epoch 17), loss = OrderedDict([('loss_ce', 0.256), ('loss_counter', 0.101), ('loss_bbox', 0.062), ('loss_giou', 0.161), ('loss_self_iou', 0.094), ('cardinality_error', 3.694), ('loss_ce_0', 0.261), ('loss_counter_0', 0.101), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.169), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.694), ('loss_caption_0', 2.772), ('loss_caption', 2.77), ('total_loss', 13.544)]), time/iter = 0.745, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 172000 (epoch 17), loss = OrderedDict([('loss_ce', 0.258), ('loss_counter', 0.1), ('loss_bbox', 0.063), ('loss_giou', 0.165), ('loss_self_iou', 0.096), ('cardinality_error', 3.667), ('loss_ce_0', 0.262), ('loss_counter_0', 0.1), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.667), ('loss_caption_0', 2.741), ('loss_caption', 2.743), ('total_loss', 13.47)]), time/iter = 0.191, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 173000 (epoch 17), loss = OrderedDict([('loss_ce', 0.258), ('loss_counter', 0.104), ('loss_bbox', 0.062), ('loss_giou', 0.165), ('loss_self_iou', 0.09), ('cardinality_error', 3.753), ('loss_ce_0', 0.261), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.091), ('cardinality_error_0', 3.753), ('loss_caption_0', 2.786), ('loss_caption', 2.785), ('total_loss', 13.646)]), time/iter = 0.193, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 174000 (epoch 17), loss = OrderedDict([('loss_ce', 0.259), ('loss_counter', 0.107), ('loss_bbox', 0.06), ('loss_giou', 0.166), ('loss_self_iou', 0.094), ('cardinality_error', 3.832), ('loss_ce_0', 0.261), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.832), ('loss_caption_0', 2.733), ('loss_caption', 2.738), ('total_loss', 13.457)]), time/iter = 0.191, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 175000 (epoch 17), loss = OrderedDict([('loss_ce', 0.255), ('loss_counter', 0.103), ('loss_bbox', 0.06), ('loss_giou', 0.163), ('loss_self_iou', 0.098), ('cardinality_error', 3.731), ('loss_ce_0', 0.259), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.062), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.731), ('loss_caption_0', 2.745), ('loss_caption', 2.744), ('total_loss', 13.454)]), time/iter = 0.191, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 176000 (epoch 17), loss = OrderedDict([('loss_ce', 0.261), ('loss_counter', 0.103), ('loss_bbox', 0.06), ('loss_giou', 0.164), ('loss_self_iou', 0.095), ('cardinality_error', 3.795), ('loss_ce_0', 0.264), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.795), ('loss_caption_0', 2.761), ('loss_caption', 2.77), ('total_loss', 13.575)]), time/iter = 0.192, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 177000 (epoch 17), loss = OrderedDict([('loss_ce', 0.255), ('loss_counter', 0.1), ('loss_bbox', 0.063), ('loss_giou', 0.161), ('loss_self_iou', 0.096), ('cardinality_error', 3.652), ('loss_ce_0', 0.261), ('loss_counter_0', 0.1), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.169), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.652), ('loss_caption_0', 2.743), ('loss_caption', 2.745), ('total_loss', 13.43)]), time/iter = 0.190, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 178000 (epoch 17), loss = OrderedDict([('loss_ce', 0.255), ('loss_counter', 0.103), ('loss_bbox', 0.063), ('loss_giou', 0.164), ('loss_self_iou', 0.103), ('cardinality_error', 3.664), ('loss_ce_0', 0.26), ('loss_counter_0', 0.102), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.664), ('loss_caption_0', 2.682), ('loss_caption', 2.68), ('total_loss', 13.211)]), time/iter = 0.188, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 179000 (epoch 17), loss = OrderedDict([('loss_ce', 0.261), ('loss_counter', 0.105), ('loss_bbox', 0.06), ('loss_giou', 0.164), ('loss_self_iou', 0.09), ('cardinality_error', 3.825), ('loss_ce_0', 0.266), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.091), ('cardinality_error_0', 3.825), ('loss_caption_0', 2.788), ('loss_caption', 2.796), ('total_loss', 13.671)]), time/iter = 0.193, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 180000 (epoch 17), loss = OrderedDict([('loss_ce', 0.255), ('loss_counter', 0.102), ('loss_bbox', 0.064), ('loss_giou', 0.166), ('loss_self_iou', 0.093), ('cardinality_error', 3.729), ('loss_ce_0', 0.261), ('loss_counter_0', 0.102), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.729), ('loss_caption_0', 2.781), ('loss_caption', 2.775), ('total_loss', 13.608)]), time/iter = 0.192, bad_vid = 0.000 Validation results of iter 180162: Bleu_1:0.16720622564646215 Bleu_2:0.08946643461131876 Bleu_3:0.04568137095423273 Bleu_4:0.022039722503534608 METEOR:0.08588931176535387 ROUGE_L:0.16315869782389542 CIDEr:0.32099741016990446 Recall:0.5265047853249455 Precision:0.5647345942647923 soda_c:0.05847424883094643 para_Bleu_1:0.47508155945278135 para_Bleu_2:0.2858233856765029 para_Bleu_3:0.17499503512152859 para_Bleu_4:0.11002968407978216 para_METEOR:0.16541373751181562 para_ROUGE_L:0.3190110890037882 para_CIDEr:0.21421557986951392 overall score of iter 180162: 0.4896590014611117 Save model at iter 180162 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. Save info to info.json ID seq2-ft(mix)-gt_percent-1.0 iter 181000 (epoch 18), loss = OrderedDict([('loss_ce', 0.256), ('loss_counter', 0.102), ('loss_bbox', 0.061), ('loss_giou', 0.163), ('loss_self_iou', 0.094), ('cardinality_error', 3.781), ('loss_ce_0', 0.261), ('loss_counter_0', 0.102), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.172), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.781), ('loss_caption_0', 2.743), ('loss_caption', 2.746), ('total_loss', 13.452)]), time/iter = 0.750, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 182000 (epoch 18), loss = OrderedDict([('loss_ce', 0.255), ('loss_counter', 0.101), ('loss_bbox', 0.062), ('loss_giou', 0.164), ('loss_self_iou', 0.1), ('cardinality_error', 3.726), ('loss_ce_0', 0.26), ('loss_counter_0', 0.101), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.726), ('loss_caption_0', 2.748), ('loss_caption', 2.746), ('total_loss', 13.472)]), time/iter = 0.189, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 183000 (epoch 18), loss = OrderedDict([('loss_ce', 0.256), ('loss_counter', 0.102), ('loss_bbox', 0.061), ('loss_giou', 0.163), ('loss_self_iou', 0.097), ('cardinality_error', 3.722), ('loss_ce_0', 0.26), ('loss_counter_0', 0.102), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.722), ('loss_caption_0', 2.729), ('loss_caption', 2.734), ('total_loss', 13.405)]), time/iter = 0.191, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 184000 (epoch 18), loss = OrderedDict([('loss_ce', 0.253), ('loss_counter', 0.104), ('loss_bbox', 0.061), ('loss_giou', 0.161), ('loss_self_iou', 0.098), ('cardinality_error', 3.726), ('loss_ce_0', 0.257), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.17), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.726), ('loss_caption_0', 2.783), ('loss_caption', 2.787), ('total_loss', 13.591)]), time/iter = 0.191, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 185000 (epoch 18), loss = OrderedDict([('loss_ce', 0.255), ('loss_counter', 0.098), ('loss_bbox', 0.063), ('loss_giou', 0.165), ('loss_self_iou', 0.087), ('cardinality_error', 3.667), ('loss_ce_0', 0.26), ('loss_counter_0', 0.098), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.088), ('cardinality_error_0', 3.667), ('loss_caption_0', 2.718), ('loss_caption', 2.716), ('total_loss', 13.354)]), time/iter = 0.188, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 186000 (epoch 18), loss = OrderedDict([('loss_ce', 0.254), ('loss_counter', 0.099), ('loss_bbox', 0.062), ('loss_giou', 0.166), ('loss_self_iou', 0.093), ('cardinality_error', 3.776), ('loss_ce_0', 0.259), ('loss_counter_0', 0.099), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.776), ('loss_caption_0', 2.75), ('loss_caption', 2.75), ('total_loss', 13.494)]), time/iter = 0.194, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 187000 (epoch 18), loss = OrderedDict([('loss_ce', 0.258), ('loss_counter', 0.109), ('loss_bbox', 0.062), ('loss_giou', 0.165), ('loss_self_iou', 0.089), ('cardinality_error', 3.803), ('loss_ce_0', 0.264), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.09), ('cardinality_error_0', 3.803), ('loss_caption_0', 2.788), ('loss_caption', 2.791), ('total_loss', 13.678)]), time/iter = 0.198, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 188000 (epoch 18), loss = OrderedDict([('loss_ce', 0.253), ('loss_counter', 0.1), ('loss_bbox', 0.062), ('loss_giou', 0.163), ('loss_self_iou', 0.091), ('cardinality_error', 3.71), ('loss_ce_0', 0.259), ('loss_counter_0', 0.1), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.71), ('loss_caption_0', 2.745), ('loss_caption', 2.743), ('total_loss', 13.444)]), time/iter = 0.192, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 189000 (epoch 18), loss = OrderedDict([('loss_ce', 0.25), ('loss_counter', 0.105), ('loss_bbox', 0.064), ('loss_giou', 0.165), ('loss_self_iou', 0.1), ('cardinality_error', 3.748), ('loss_ce_0', 0.256), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.748), ('loss_caption_0', 2.751), ('loss_caption', 2.753), ('total_loss', 13.484)]), time/iter = 0.188, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 190000 (epoch 18), loss = OrderedDict([('loss_ce', 0.257), ('loss_counter', 0.104), ('loss_bbox', 0.06), ('loss_giou', 0.161), ('loss_self_iou', 0.098), ('cardinality_error', 3.742), ('loss_ce_0', 0.264), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.172), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.742), ('loss_caption_0', 2.729), ('loss_caption', 2.73), ('total_loss', 13.395)]), time/iter = 0.189, bad_vid = 0.000 Validation results of iter 190171: Bleu_1:0.1662475028889873 Bleu_2:0.08895418147726737 Bleu_3:0.04559170272578064 Bleu_4:0.021869443641790748 METEOR:0.0853620749347768 ROUGE_L:0.16226693807975517 CIDEr:0.3203697867996399 Recall:0.5243080966273422 Precision:0.5592002237136435 soda_c:0.058066485957305666 para_Bleu_1:0.47302383939773723 para_Bleu_2:0.2848420020452884 para_Bleu_3:0.17477626094199183 para_Bleu_4:0.11005159892431456 para_METEOR:0.16474042555391544 para_ROUGE_L:0.31754161420686944 para_CIDEr:0.2082818020277855 overall score of iter 190171: 0.4830738265060155 Save model at iter 190171 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. Save info to info.json ID seq2-ft(mix)-gt_percent-1.0 iter 191000 (epoch 19), loss = OrderedDict([('loss_ce', 0.251), ('loss_counter', 0.099), ('loss_bbox', 0.062), ('loss_giou', 0.167), ('loss_self_iou', 0.086), ('cardinality_error', 3.653), ('loss_ce_0', 0.257), ('loss_counter_0', 0.099), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.087), ('cardinality_error_0', 3.653), ('loss_caption_0', 2.754), ('loss_caption', 2.752), ('total_loss', 13.501)]), time/iter = 0.755, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 192000 (epoch 19), loss = OrderedDict([('loss_ce', 0.252), ('loss_counter', 0.1), ('loss_bbox', 0.061), ('loss_giou', 0.164), ('loss_self_iou', 0.094), ('cardinality_error', 3.767), ('loss_ce_0', 0.258), ('loss_counter_0', 0.1), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.767), ('loss_caption_0', 2.717), ('loss_caption', 2.72), ('total_loss', 13.343)]), time/iter = 0.188, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 193000 (epoch 19), loss = OrderedDict([('loss_ce', 0.25), ('loss_counter', 0.106), ('loss_bbox', 0.06), ('loss_giou', 0.164), ('loss_self_iou', 0.093), ('cardinality_error', 3.847), ('loss_ce_0', 0.256), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.847), ('loss_caption_0', 2.754), ('loss_caption', 2.759), ('total_loss', 13.499)]), time/iter = 0.190, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 194000 (epoch 19), loss = OrderedDict([('loss_ce', 0.256), ('loss_counter', 0.102), ('loss_bbox', 0.061), ('loss_giou', 0.165), ('loss_self_iou', 0.097), ('cardinality_error', 3.775), ('loss_ce_0', 0.262), ('loss_counter_0', 0.102), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.775), ('loss_caption_0', 2.769), ('loss_caption', 2.772), ('total_loss', 13.587)]), time/iter = 0.192, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 195000 (epoch 19), loss = OrderedDict([('loss_ce', 0.257), ('loss_counter', 0.106), ('loss_bbox', 0.062), ('loss_giou', 0.165), ('loss_self_iou', 0.089), ('cardinality_error', 3.794), ('loss_ce_0', 0.261), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.089), ('cardinality_error_0', 3.794), ('loss_caption_0', 2.751), ('loss_caption', 2.751), ('total_loss', 13.506)]), time/iter = 0.191, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 196000 (epoch 19), loss = OrderedDict([('loss_ce', 0.251), ('loss_counter', 0.095), ('loss_bbox', 0.061), ('loss_giou', 0.162), ('loss_self_iou', 0.1), ('cardinality_error', 3.652), ('loss_ce_0', 0.258), ('loss_counter_0', 0.095), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.172), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.652), ('loss_caption_0', 2.743), ('loss_caption', 2.735), ('total_loss', 13.403)]), time/iter = 0.191, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 197000 (epoch 19), loss = OrderedDict([('loss_ce', 0.251), ('loss_counter', 0.104), ('loss_bbox', 0.061), ('loss_giou', 0.162), ('loss_self_iou', 0.091), ('cardinality_error', 3.759), ('loss_ce_0', 0.258), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.171), ('loss_self_iou_0', 0.091), ('cardinality_error_0', 3.759), ('loss_caption_0', 2.74), ('loss_caption', 2.743), ('total_loss', 13.418)]), time/iter = 0.191, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 198000 (epoch 19), loss = OrderedDict([('loss_ce', 0.249), ('loss_counter', 0.098), ('loss_bbox', 0.062), ('loss_giou', 0.162), ('loss_self_iou', 0.092), ('cardinality_error', 3.664), ('loss_ce_0', 0.255), ('loss_counter_0', 0.098), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.171), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.664), ('loss_caption_0', 2.718), ('loss_caption', 2.72), ('total_loss', 13.31)]), time/iter = 0.190, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 199000 (epoch 19), loss = OrderedDict([('loss_ce', 0.252), ('loss_counter', 0.101), ('loss_bbox', 0.062), ('loss_giou', 0.162), ('loss_self_iou', 0.101), ('cardinality_error', 3.736), ('loss_ce_0', 0.257), ('loss_counter_0', 0.101), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.736), ('loss_caption_0', 2.759), ('loss_caption', 2.76), ('total_loss', 13.502)]), time/iter = 0.193, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 200000 (epoch 19), loss = OrderedDict([('loss_ce', 0.253), ('loss_counter', 0.102), ('loss_bbox', 0.061), ('loss_giou', 0.159), ('loss_self_iou', 0.098), ('cardinality_error', 3.701), ('loss_ce_0', 0.259), ('loss_counter_0', 0.102), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.17), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.701), ('loss_caption_0', 2.766), ('loss_caption', 2.771), ('total_loss', 13.518)]), time/iter = 0.190, bad_vid = 0.000 Validation results of iter 200180: Bleu_1:0.16600244771432068 Bleu_2:0.08859363359362551 Bleu_3:0.045174799285766926 Bleu_4:0.021453706973694267 METEOR:0.08469975853590762 ROUGE_L:0.1615333099598977 CIDEr:0.3178372173219055 Recall:0.5270524681293403 Precision:0.5612365263371945 soda_c:0.05852570981425518 para_Bleu_1:0.47641872729084495 para_Bleu_2:0.28679556025023933 para_Bleu_3:0.1757988669447671 para_Bleu_4:0.11061748158923715 para_METEOR:0.1647238014039032 para_ROUGE_L:0.3182336912910021 para_CIDEr:0.21852415031403352 overall score of iter 200180: 0.4938654333071738 Save model at iter 200180 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. Save Best-model at iter 200180 to checkpoint file. Save info to info.json Best epoch: 10 Best Model Performance: Bleu_1:0.1671778590456048 Bleu_2:0.09077014613023152 Bleu_3:0.0476684747303012 Bleu_4:0.02445564298599047 METEOR:0.08933235383587503 ROUGE_L:0.1654660162888944 CIDEr:0.31886265111118334 Recall:0.5314017615268335 Precision:0.5831469052945512 soda_c:0.05853263249839839 para_Bleu_1:0.46544090189732323 para_Bleu_2:0.2789325258737778 para_Bleu_3:0.17172911957785325 para_Bleu_4:0.10903514181091935 para_METEOR:0.16550159188298816 para_ROUGE_L:0.3181118223429575 para_CIDEr:0.2056618808195008 avg_proposal_number:-1 Best Overall Score epoch10: 1.5812763042668414