backup evironment completed ! Loading pth from /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal ******************** All args: ************************************************* align_contiguous = False align_drop_z = 0 align_keep_percentile = 0.1 align_many_to_one = False align_one_to_many = False align_top_band_size = 0 att_hid_size = 512 aux_loss = True backbone = None base_cfg_path = cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml basic_ss_prob = 0 batch_size = 1 batch_size_for_eval = 1 bbox_loss_coef = 0 beta = 1 cap_dec_n_points = 4 cap_nheads = 1 cap_num_feature_levels = 4 cap_prob_clip = False caption_cost_type = loss caption_decoder_type = standard caption_loss_coef = 2 cfg_path = cfgs_ft_gt/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml cl_schedule_time = [0, 2] cl_schedule_val = [0, 0.1] clip_context_dim = 512 cls_loss_coef = 2 contrastive_hidden_size = 128 contrastive_loss_start_coef = 0.0 contrastive_loss_temperature = 0.1 cost_alpha = 0.25 cost_gamma = 2 count_loss_coef = 0.5 criteria_for_best_ckpt = overall current_lr = 5e-05 data_norm = 0 data_rescale = 1 debug = False dec_layers = 2 dec_n_points = 4 device = cuda dict_file = data/howto/vocabulary_howto_rate2_yc2.json dict_file_val = data/howto/vocabulary_howto_rate2_yc2.json dilation = False disable_contrastive_projection = 1 disable_cudnn = 0 disable_mid_caption_heads = False disable_rematch = False disable_tqdm = False drop_prob = 0.5 ec_alpha = 1.0 enable_bg_for_cl = True enable_contrastive = False enable_cross_video_cl = True enable_e2t_cl = True enc_layers = 2 enc_n_points = 4 eos_coef = 0.1 epoch = 20 event_context_dim = None feature_dim = 768 feature_sample_rate = 1 fix_xcw = 1 focal_alpha = 0.25 focal_gamma = 2.0 focal_mil = False frame_embedding_num = 200 ft_gt_percent = 1.0 giou_loss_coef = 4 gpu_id = [] grad_clip = 100.0 gt_file_for_auc = data/anet/captiondata/val_all.json gt_file_for_eval = ['data/yc2/captiondata/yc2_val.json'] gt_file_for_para_eval = ['data/yc2/captiondata/para/para_yc2_val.json'] gt_proposal_sample_num = 20 hidden_dim = 512 hidden_dropout_prob = 0.5 huggingface_cache_dir = .cache id = seq2-ft(mix)-gt_percent-1.0 id_ori = input_encoding_size = 512 invalid_video_json = [] iteration = 3 layer_norm_eps = 1e-12 learning_rate_decay_every = 3 learning_rate_decay_rate = 0.5 learning_rate_decay_start = 8 lloss_beta = 1 lloss_cross_entropy = 0 lloss_focal_loss = 0 lloss_gau_mask = 1 lr = 5e-05 lr_backbone = 2e-05 lr_backbone_names = ['None'] lr_linear_proj_mult = 0.1 lr_linear_proj_names = ['reference_points', 'sampling_offsets'] lr_proj = 0 map = True matcher_type = default max_caption_len = 50 max_eseq_length = 20 max_pos_num = 500 max_text_input_len = 32 merge_criterion = ins_cap_topk merge_k_boxes = 3 merge_mode = weighted_sum mil_loss_coef = 0 min_epoch_when_save = -1 nheads = 8 norm_ins_score = sigmoid nthreads = 4 num_classes = 1 num_feature_levels = 4 num_layers = 1 num_neg_box = 10 num_queries = 100 optimizer_type = adam position_embedding = sine position_embedding_scale = 6.283185307179586 pre_percent = 1.0 pretrain = None pretrain_path = pretrained_language_model = UniVL prior_anchor_duration_init = True prior_manner = all pseudo_box_aug = False pseudo_box_aug_mode = random_range pseudo_box_aug_num = 8 pseudo_box_aug_ratio = 0.02 pseudo_box_type = similarity_op_order_v2 random_anchor_init = True random_seed = False ref_rank_loss_coef = 0.0 refine_pseudo_box = False refine_pseudo_stage_num = 2 rnn_size = 512 sample_method = nearest save_all_checkpoint = 0 save_checkpoint_every = 1 save_dir = /mnt/data/pjlab-3090-sport/wuhao/logs/dibs scheduled_sampling_increase_every = 2 scheduled_sampling_increase_prob = 0.05 scheduled_sampling_max_prob = 0.25 scheduled_sampling_start = -1 seed = 777 self_iou_loss_coef = 0.0 set_cost_bbox = 0 set_cost_caption = 0 set_cost_cl = 0.0 set_cost_class = 2 set_cost_giou = 4 set_cost_sim = 1.0 share_caption_head = 1 soft_attention = 1 start_from = start_from_mode = last start_refine_epoch = -1 statistic_mode = mode test = False text_encoder_learning_strategy = frozen text_feature_folder = ['/mnt/data/Gvlab/wuhao/features/howto100m/UniVL/text', '/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_text/'] text_feature_folder_val = ['/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_text/'] text_hidden_dim = 768 top_frames = 25 train_caption_file = ['data/howto/captiondata/howto100m_train.json', 'data/yc2/captiondata/yc2_train.json'] train_proposal_sample_num = 30 train_proposal_type = gt training_scheme = all transformer_dropout_prob = 0.1 transformer_ff_dim = 512 transformer_input_type = queries use_additional_cap_layer = False use_additional_score_layer = False use_anchor = 0 use_neg_pseudo_box = False use_pseudo_box = False use_query_box_for_refine = 0 val_caption_file = data/yc2/captiondata/yc2_val.json visual_feature_folder = ['/mnt/data/Gvlab/wuhao/features/howto100m/UniVL/visual', '/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_visual/'] visual_feature_folder_val = ['/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_visual/'] visual_feature_type = ['UniVL'] vocab_size = 14538 vocab_size_val = 14538 weight_decay = 0.0001 weighted_mil_loss = False width_ratio = 1 width_th = 1 window_size = 3 with_box_refine = 1 wordRNN_input_feats_type = C ******************** Model structure: ****************************************** PDVC( (base_encoder): BaseEncoder( (pos_embed): PositionEmbeddingSine( (duration_embed_layer): Linear(in_features=256, out_features=256, bias=True) ) (input_proj): ModuleList( (0): Sequential( (0): Conv1d(768, 512, kernel_size=(1,), stride=(1,)) (1): GroupNorm(32, 512, eps=1e-05, affine=True) ) (1): Sequential( (0): Conv1d(768, 512, kernel_size=(3,), stride=(2,), padding=(1,)) (1): GroupNorm(32, 512, eps=1e-05, affine=True) ) (2): Sequential( (0): Conv1d(512, 512, kernel_size=(3,), stride=(2,), padding=(1,)) (1): GroupNorm(32, 512, eps=1e-05, affine=True) ) (3): Sequential( (0): Conv1d(512, 512, kernel_size=(3,), stride=(2,), padding=(1,)) (1): GroupNorm(32, 512, eps=1e-05, affine=True) ) ) ) (transformer): DeformableTransformer( (encoder): DeformableTransformerEncoder( (layers): ModuleList( (0): DeformableTransformerEncoderLayer( (self_attn): MSDeformAttn( (sampling_offsets): Linear(in_features=512, out_features=128, bias=True) (attention_weights): Linear(in_features=512, out_features=128, bias=True) (value_proj): Linear(in_features=512, out_features=512, bias=True) (output_proj): Linear(in_features=512, out_features=512, bias=True) ) (dropout1): Dropout(p=0.1, inplace=False) (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True) (linear1): Linear(in_features=512, out_features=512, bias=True) (dropout2): Dropout(p=0.1, inplace=False) (linear2): Linear(in_features=512, out_features=512, bias=True) (dropout3): Dropout(p=0.1, inplace=False) (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True) ) (1): DeformableTransformerEncoderLayer( (self_attn): MSDeformAttn( (sampling_offsets): Linear(in_features=512, out_features=128, bias=True) (attention_weights): Linear(in_features=512, out_features=128, bias=True) (value_proj): Linear(in_features=512, out_features=512, bias=True) (output_proj): Linear(in_features=512, out_features=512, bias=True) ) (dropout1): Dropout(p=0.1, inplace=False) (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True) (linear1): Linear(in_features=512, out_features=512, bias=True) (dropout2): Dropout(p=0.1, inplace=False) (linear2): Linear(in_features=512, out_features=512, bias=True) (dropout3): Dropout(p=0.1, inplace=False) (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True) ) ) ) (decoder): DeformableTransformerDecoder( (layers): ModuleList( (0): DeformableTransformerDecoderLayer( (cross_attn): MSDeformAttn( (sampling_offsets): Linear(in_features=512, out_features=128, bias=True) (attention_weights): Linear(in_features=512, out_features=128, bias=True) (value_proj): Linear(in_features=512, out_features=512, bias=True) (output_proj): Linear(in_features=512, out_features=512, bias=True) ) (dropout1): Dropout(p=0.1, inplace=False) (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True) (self_attn): MultiheadAttention( (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True) ) (dropout2): Dropout(p=0.1, inplace=False) (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True) (linear1): Linear(in_features=512, out_features=512, bias=True) (dropout3): Dropout(p=0.1, inplace=False) (linear2): Linear(in_features=512, out_features=512, bias=True) (dropout4): Dropout(p=0.1, inplace=False) (norm3): LayerNorm((512,), eps=1e-05, elementwise_affine=True) ) (1): DeformableTransformerDecoderLayer( (cross_attn): MSDeformAttn( (sampling_offsets): Linear(in_features=512, out_features=128, bias=True) (attention_weights): Linear(in_features=512, out_features=128, bias=True) (value_proj): Linear(in_features=512, out_features=512, bias=True) (output_proj): Linear(in_features=512, out_features=512, bias=True) ) (dropout1): Dropout(p=0.1, inplace=False) (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True) (self_attn): MultiheadAttention( (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True) ) (dropout2): Dropout(p=0.1, inplace=False) (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True) (linear1): Linear(in_features=512, out_features=512, bias=True) (dropout3): Dropout(p=0.1, inplace=False) (linear2): Linear(in_features=512, out_features=512, bias=True) (dropout4): Dropout(p=0.1, inplace=False) (norm3): LayerNorm((512,), eps=1e-05, elementwise_affine=True) ) ) (bbox_head): ModuleList( (0): MLP( (layers): ModuleList( (0): Linear(in_features=512, out_features=512, bias=True) (1): Linear(in_features=512, out_features=512, bias=True) (2): Linear(in_features=512, out_features=2, bias=True) ) ) (1): MLP( (layers): ModuleList( (0): Linear(in_features=512, out_features=512, bias=True) (1): Linear(in_features=512, out_features=512, bias=True) (2): Linear(in_features=512, out_features=2, bias=True) ) ) ) ) (pos_trans): Linear(in_features=512, out_features=1024, bias=True) (pos_trans_norm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True) (reference_points): Linear(in_features=512, out_features=1, bias=True) ) (caption_head): ModuleList( (0): LSTMDSACaptioner( (embed): Embedding(14539, 512) (logit): Linear(in_features=512, out_features=14539, bias=True) (dropout): Dropout(p=0.5, inplace=False) (core): ShowAttendTellCore( (rnn): LSTM(1536, 512, bias=False, dropout=0.5) (att_drop): Dropout(p=0.5, inplace=False) (deformable_att): MSDeformAttnCap( (sampling_offsets): Linear(in_features=1024, out_features=16, bias=True) (attention_weights): Linear(in_features=1024, out_features=16, bias=True) (value_proj): Linear(in_features=512, out_features=512, bias=True) (output_proj): Linear(in_features=512, out_features=512, bias=True) ) (ctx2att): Linear(in_features=512, out_features=512, bias=True) (h2att): Linear(in_features=512, out_features=512, bias=True) (alpha_net): Linear(in_features=512, out_features=1, bias=True) ) ) (1): LSTMDSACaptioner( (embed): Embedding(14539, 512) (logit): Linear(in_features=512, out_features=14539, bias=True) (dropout): Dropout(p=0.5, inplace=False) (core): ShowAttendTellCore( (rnn): LSTM(1536, 512, bias=False, dropout=0.5) (att_drop): Dropout(p=0.5, inplace=False) (deformable_att): MSDeformAttnCap( (sampling_offsets): Linear(in_features=1024, out_features=16, bias=True) (attention_weights): Linear(in_features=1024, out_features=16, bias=True) (value_proj): Linear(in_features=512, out_features=512, bias=True) (output_proj): Linear(in_features=512, out_features=512, bias=True) ) (ctx2att): Linear(in_features=512, out_features=512, bias=True) (h2att): Linear(in_features=512, out_features=512, bias=True) (alpha_net): Linear(in_features=512, out_features=1, bias=True) ) ) ) (query_embed): Embedding(100, 1024) (class_head): ModuleList( (0): Linear(in_features=512, out_features=1, bias=True) (1): Linear(in_features=512, out_features=1, bias=True) ) (class_refine_head): ModuleList( (0): Linear(in_features=512, out_features=1, bias=True) (1): Linear(in_features=512, out_features=1, bias=True) ) (count_head): ModuleList( (0): Linear(in_features=512, out_features=21, bias=True) (1): Linear(in_features=512, out_features=21, bias=True) ) (bbox_head): ModuleList( (0): MLP( (layers): ModuleList( (0): Linear(in_features=512, out_features=512, bias=True) (1): Linear(in_features=512, out_features=512, bias=True) (2): Linear(in_features=512, out_features=2, bias=True) ) ) (1): MLP( (layers): ModuleList( (0): Linear(in_features=512, out_features=512, bias=True) (1): Linear(in_features=512, out_features=512, bias=True) (2): Linear(in_features=512, out_features=2, bias=True) ) ) ) (contrastive_projection_event): ModuleList( (0): Identity() (1): Identity() ) (contrastive_projection_text): ModuleList( (0): Identity() (1): Identity() ) ) ******************** Strat training ! ****************************************** loss type: dict_keys(['loss_ce', 'loss_bbox', 'loss_giou', 'loss_counter', 'loss_caption', 'contrastive_loss', 'loss_ce_0', 'loss_bbox_0', 'loss_giou_0', 'loss_counter_0', 'loss_caption_0', 'contrastive_loss_0']) loss weights: dict_values([2, 0, 4, 0.5, 2, 0.0, 2, 0, 4, 0.5, 2, 0.0]) ID seq2-ft(mix)-gt_percent-1.0 iter 133 (epoch 0), loss = OrderedDict([('loss_ce', 0.336), ('loss_counter', 0.129), ('loss_bbox', 0.039), ('loss_giou', 0.368), ('loss_self_iou', 0.028), ('cardinality_error', 7.797), ('loss_ce_0', 0.337), ('loss_counter_0', 0.13), ('loss_bbox_0', 0.041), ('loss_giou_0', 0.381), ('loss_self_iou_0', 0.03), ('cardinality_error_0', 7.797), ('loss_caption_0', 2.755), ('loss_caption', 2.681), ('total_loss', 15.341)]), time/iter = 0.172, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 266 (epoch 0), loss = OrderedDict([('loss_ce', 0.324), ('loss_counter', 0.129), ('loss_bbox', 0.036), ('loss_giou', 0.369), ('loss_self_iou', 0.018), ('cardinality_error', 7.812), ('loss_ce_0', 0.341), ('loss_counter_0', 0.132), ('loss_bbox_0', 0.039), ('loss_giou_0', 0.38), ('loss_self_iou_0', 0.019), ('cardinality_error_0', 7.812), ('loss_caption_0', 2.803), ('loss_caption', 2.638), ('total_loss', 15.341)]), time/iter = 0.159, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 399 (epoch 0), loss = OrderedDict([('loss_ce', 0.312), ('loss_counter', 0.13), ('loss_bbox', 0.039), ('loss_giou', 0.375), ('loss_self_iou', 0.02), ('cardinality_error', 7.835), ('loss_ce_0', 0.324), ('loss_counter_0', 0.132), ('loss_bbox_0', 0.043), ('loss_giou_0', 0.395), ('loss_self_iou_0', 0.021), ('cardinality_error_0', 7.835), ('loss_caption_0', 2.81), ('loss_caption', 2.676), ('total_loss', 15.459)]), time/iter = 0.162, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 532 (epoch 0), loss = OrderedDict([('loss_ce', 0.307), ('loss_counter', 0.133), ('loss_bbox', 0.044), ('loss_giou', 0.394), ('loss_self_iou', 0.02), ('cardinality_error', 7.902), ('loss_ce_0', 0.319), ('loss_counter_0', 0.133), ('loss_bbox_0', 0.05), ('loss_giou_0', 0.421), ('loss_self_iou_0', 0.026), ('cardinality_error_0', 7.902), ('loss_caption_0', 2.817), ('loss_caption', 2.654), ('total_loss', 15.588)]), time/iter = 0.167, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 665 (epoch 0), loss = OrderedDict([('loss_ce', 0.312), ('loss_counter', 0.135), ('loss_bbox', 0.034), ('loss_giou', 0.345), ('loss_self_iou', 0.017), ('cardinality_error', 7.805), ('loss_ce_0', 0.319), ('loss_counter_0', 0.131), ('loss_bbox_0', 0.038), ('loss_giou_0', 0.372), ('loss_self_iou_0', 0.019), ('cardinality_error_0', 7.805), ('loss_caption_0', 2.758), ('loss_caption', 2.635), ('total_loss', 15.049)]), time/iter = 0.157, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 798 (epoch 0), loss = OrderedDict([('loss_ce', 0.321), ('loss_counter', 0.125), ('loss_bbox', 0.03), ('loss_giou', 0.319), ('loss_self_iou', 0.015), ('cardinality_error', 7.774), ('loss_ce_0', 0.331), ('loss_counter_0', 0.124), ('loss_bbox_0', 0.032), ('loss_giou_0', 0.344), ('loss_self_iou_0', 0.015), ('cardinality_error_0', 7.774), ('loss_caption_0', 2.66), ('loss_caption', 2.559), ('total_loss', 14.519)]), time/iter = 0.155, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 931 (epoch 0), loss = OrderedDict([('loss_ce', 0.327), ('loss_counter', 0.122), ('loss_bbox', 0.027), ('loss_giou', 0.306), ('loss_self_iou', 0.011), ('cardinality_error', 7.865), ('loss_ce_0', 0.346), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.029), ('loss_giou_0', 0.327), ('loss_self_iou_0', 0.012), ('cardinality_error_0', 7.865), ('loss_caption_0', 2.54), ('loss_caption', 2.468), ('total_loss', 14.017)]), time/iter = 0.154, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 1064 (epoch 0), loss = OrderedDict([('loss_ce', 0.331), ('loss_counter', 0.121), ('loss_bbox', 0.027), ('loss_giou', 0.292), ('loss_self_iou', 0.01), ('cardinality_error', 7.579), ('loss_ce_0', 0.345), ('loss_counter_0', 0.127), ('loss_bbox_0', 0.028), ('loss_giou_0', 0.311), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.579), ('loss_caption_0', 2.639), ('loss_caption', 2.626), ('total_loss', 14.419)]), time/iter = 0.163, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 1197 (epoch 0), loss = OrderedDict([('loss_ce', 0.325), ('loss_counter', 0.118), ('loss_bbox', 0.026), ('loss_giou', 0.296), ('loss_self_iou', 0.011), ('cardinality_error', 7.241), ('loss_ce_0', 0.339), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.028), ('loss_giou_0', 0.317), ('loss_self_iou_0', 0.011), ('cardinality_error_0', 7.241), ('loss_caption_0', 2.501), ('loss_caption', 2.496), ('total_loss', 13.892)]), time/iter = 0.154, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 1330 (epoch 0), loss = OrderedDict([('loss_ce', 0.327), ('loss_counter', 0.126), ('loss_bbox', 0.026), ('loss_giou', 0.304), ('loss_self_iou', 0.011), ('cardinality_error', 7.94), ('loss_ce_0', 0.334), ('loss_counter_0', 0.127), ('loss_bbox_0', 0.029), ('loss_giou_0', 0.332), ('loss_self_iou_0', 0.011), ('cardinality_error_0', 7.94), ('loss_caption_0', 2.635), ('loss_caption', 2.619), ('total_loss', 14.504)]), time/iter = 0.158, bad_vid = 0.000 Validation results of iter 1333: Bleu_1:0.16894357888730638 Bleu_2:0.09902176620134434 Bleu_3:0.05312286436412136 Bleu_4:0.026212861867102137 METEOR:0.0791142699299577 ROUGE_L:0.15563765109454591 CIDEr:0.4087091055845523 Recall:0.1991554685892762 Precision:0.40083793546594454 soda_c:0.05642652494419026 para_Bleu_1:0.28013834967939705 para_Bleu_2:0.16393959632782257 para_Bleu_3:0.09809744775628881 para_Bleu_4:0.060378126412557326 para_METEOR:0.1286956339033507 para_ROUGE_L:0.29903071052996405 para_CIDEr:0.14675303603221324 overall score of iter 1333: 0.3358267963481213 Save model at iter 1333 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. Save Best-model at iter 1333 to checkpoint file. Save info to info.json ID seq2-ft(mix)-gt_percent-1.0 iter 1463 (epoch 1), loss = OrderedDict([('loss_ce', 0.322), ('loss_counter', 0.128), ('loss_bbox', 0.026), ('loss_giou', 0.301), ('loss_self_iou', 0.011), ('cardinality_error', 7.699), ('loss_ce_0', 0.335), ('loss_counter_0', 0.129), ('loss_bbox_0', 0.026), ('loss_giou_0', 0.316), ('loss_self_iou_0', 0.011), ('cardinality_error_0', 7.699), ('loss_caption_0', 2.448), ('loss_caption', 2.462), ('total_loss', 13.729)]), time/iter = 0.660, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 1596 (epoch 1), loss = OrderedDict([('loss_ce', 0.311), ('loss_counter', 0.126), ('loss_bbox', 0.022), ('loss_giou', 0.284), ('loss_self_iou', 0.01), ('cardinality_error', 8.233), ('loss_ce_0', 0.322), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.024), ('loss_giou_0', 0.31), ('loss_self_iou_0', 0.01), ('cardinality_error_0', 8.233), ('loss_caption_0', 2.348), ('loss_caption', 2.348), ('total_loss', 13.16)]), time/iter = 0.153, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 1729 (epoch 1), loss = OrderedDict([('loss_ce', 0.311), ('loss_counter', 0.124), ('loss_bbox', 0.023), ('loss_giou', 0.273), ('loss_self_iou', 0.01), ('cardinality_error', 7.632), ('loss_ce_0', 0.32), ('loss_counter_0', 0.124), ('loss_bbox_0', 0.026), ('loss_giou_0', 0.307), ('loss_self_iou_0', 0.012), ('cardinality_error_0', 7.632), ('loss_caption_0', 2.363), ('loss_caption', 2.353), ('total_loss', 13.14)]), time/iter = 0.156, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 1862 (epoch 1), loss = OrderedDict([('loss_ce', 0.316), ('loss_counter', 0.12), ('loss_bbox', 0.023), ('loss_giou', 0.268), ('loss_self_iou', 0.01), ('cardinality_error', 7.609), ('loss_ce_0', 0.32), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.025), ('loss_giou_0', 0.29), ('loss_self_iou_0', 0.01), ('cardinality_error_0', 7.609), ('loss_caption_0', 2.439), ('loss_caption', 2.419), ('total_loss', 13.343)]), time/iter = 0.159, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 1995 (epoch 1), loss = OrderedDict([('loss_ce', 0.314), ('loss_counter', 0.122), ('loss_bbox', 0.022), ('loss_giou', 0.281), ('loss_self_iou', 0.009), ('cardinality_error', 7.541), ('loss_ce_0', 0.322), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.025), ('loss_giou_0', 0.309), ('loss_self_iou_0', 0.011), ('cardinality_error_0', 7.541), ('loss_caption_0', 2.503), ('loss_caption', 2.503), ('total_loss', 13.766)]), time/iter = 0.159, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 2128 (epoch 1), loss = OrderedDict([('loss_ce', 0.316), ('loss_counter', 0.126), ('loss_bbox', 0.024), ('loss_giou', 0.284), ('loss_self_iou', 0.009), ('cardinality_error', 7.789), ('loss_ce_0', 0.324), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.026), ('loss_giou_0', 0.301), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.789), ('loss_caption_0', 2.5), ('loss_caption', 2.493), ('total_loss', 13.73)]), time/iter = 0.161, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 2261 (epoch 1), loss = OrderedDict([('loss_ce', 0.31), ('loss_counter', 0.122), ('loss_bbox', 0.023), ('loss_giou', 0.285), ('loss_self_iou', 0.012), ('cardinality_error', 7.902), ('loss_ce_0', 0.316), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.025), ('loss_giou_0', 0.304), ('loss_self_iou_0', 0.011), ('cardinality_error_0', 7.902), ('loss_caption_0', 2.425), ('loss_caption', 2.424), ('total_loss', 13.426)]), time/iter = 0.156, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 2394 (epoch 1), loss = OrderedDict([('loss_ce', 0.315), ('loss_counter', 0.126), ('loss_bbox', 0.025), ('loss_giou', 0.29), ('loss_self_iou', 0.011), ('cardinality_error', 7.534), ('loss_ce_0', 0.323), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.026), ('loss_giou_0', 0.308), ('loss_self_iou_0', 0.01), ('cardinality_error_0', 7.534), ('loss_caption_0', 2.439), ('loss_caption', 2.435), ('total_loss', 13.54)]), time/iter = 0.154, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 2527 (epoch 1), loss = OrderedDict([('loss_ce', 0.313), ('loss_counter', 0.125), ('loss_bbox', 0.023), ('loss_giou', 0.276), ('loss_self_iou', 0.009), ('cardinality_error', 7.647), ('loss_ce_0', 0.319), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.025), ('loss_giou_0', 0.296), ('loss_self_iou_0', 0.01), ('cardinality_error_0', 7.647), ('loss_caption_0', 2.454), ('loss_caption', 2.455), ('total_loss', 13.492)]), time/iter = 0.157, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 2660 (epoch 1), loss = OrderedDict([('loss_ce', 0.313), ('loss_counter', 0.131), ('loss_bbox', 0.023), ('loss_giou', 0.273), ('loss_self_iou', 0.01), ('cardinality_error', 8.0), ('loss_ce_0', 0.317), ('loss_counter_0', 0.128), ('loss_bbox_0', 0.026), ('loss_giou_0', 0.294), ('loss_self_iou_0', 0.01), ('cardinality_error_0', 8.0), ('loss_caption_0', 2.464), ('loss_caption', 2.451), ('total_loss', 13.487)]), time/iter = 0.167, bad_vid = 0.000 Validation results of iter 2666: Bleu_1:0.18247710374533507 Bleu_2:0.10433126216854799 Bleu_3:0.05471515540980739 Bleu_4:0.025315544998990337 METEOR:0.08392673175891194 ROUGE_L:0.16810710582244187 CIDEr:0.48711946137609907 Recall:0.23104975652842194 Precision:0.4442690424090867 soda_c:0.06454827356060923 para_Bleu_1:0.27953804293947354 para_Bleu_2:0.1635778619591909 para_Bleu_3:0.09761782578266559 para_Bleu_4:0.060085255296605154 para_METEOR:0.13134445752685775 para_ROUGE_L:0.3040652157082556 para_CIDEr:0.15701615141849948 overall score of iter 2666: 0.34844586424196233 Save model at iter 2666 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. Save Best-model at iter 2666 to checkpoint file. Save info to info.json ID seq2-ft(mix)-gt_percent-1.0 iter 2793 (epoch 2), loss = OrderedDict([('loss_ce', 0.309), ('loss_counter', 0.119), ('loss_bbox', 0.021), ('loss_giou', 0.26), ('loss_self_iou', 0.01), ('cardinality_error', 7.556), ('loss_ce_0', 0.312), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.024), ('loss_giou_0', 0.285), ('loss_self_iou_0', 0.011), ('cardinality_error_0', 7.556), ('loss_caption_0', 2.27), ('loss_caption', 2.276), ('total_loss', 12.632)]), time/iter = 0.666, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 2926 (epoch 2), loss = OrderedDict([('loss_ce', 0.313), ('loss_counter', 0.121), ('loss_bbox', 0.023), ('loss_giou', 0.266), ('loss_self_iou', 0.008), ('cardinality_error', 7.444), ('loss_ce_0', 0.317), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.025), ('loss_giou_0', 0.287), ('loss_self_iou_0', 0.01), ('cardinality_error_0', 7.444), ('loss_caption_0', 2.276), ('loss_caption', 2.291), ('total_loss', 12.726)]), time/iter = 0.157, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 3059 (epoch 2), loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.127), ('loss_bbox', 0.02), ('loss_giou', 0.272), ('loss_self_iou', 0.008), ('cardinality_error', 8.135), ('loss_ce_0', 0.302), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.296), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 8.135), ('loss_caption_0', 2.364), ('loss_caption', 2.364), ('total_loss', 13.057)]), time/iter = 0.165, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 3192 (epoch 2), loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.122), ('loss_bbox', 0.022), ('loss_giou', 0.266), ('loss_self_iou', 0.008), ('cardinality_error', 7.699), ('loss_ce_0', 0.306), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.286), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.699), ('loss_caption_0', 2.367), ('loss_caption', 2.381), ('total_loss', 13.038)]), time/iter = 0.177, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 3325 (epoch 2), loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.123), ('loss_bbox', 0.021), ('loss_giou', 0.274), ('loss_self_iou', 0.009), ('cardinality_error', 7.932), ('loss_ce_0', 0.3), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.291), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.932), ('loss_caption_0', 2.323), ('loss_caption', 2.33), ('total_loss', 12.887)]), time/iter = 0.155, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 3458 (epoch 2), loss = OrderedDict([('loss_ce', 0.31), ('loss_counter', 0.124), ('loss_bbox', 0.021), ('loss_giou', 0.277), ('loss_self_iou', 0.01), ('cardinality_error', 7.865), ('loss_ce_0', 0.31), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.295), ('loss_self_iou_0', 0.011), ('cardinality_error_0', 7.865), ('loss_caption_0', 2.351), ('loss_caption', 2.341), ('total_loss', 13.038)]), time/iter = 0.153, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 3591 (epoch 2), loss = OrderedDict([('loss_ce', 0.306), ('loss_counter', 0.114), ('loss_bbox', 0.022), ('loss_giou', 0.263), ('loss_self_iou', 0.009), ('cardinality_error', 7.586), ('loss_ce_0', 0.308), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.024), ('loss_giou_0', 0.285), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.586), ('loss_caption_0', 2.222), ('loss_caption', 2.223), ('total_loss', 12.425)]), time/iter = 0.154, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 3724 (epoch 2), loss = OrderedDict([('loss_ce', 0.305), ('loss_counter', 0.123), ('loss_bbox', 0.023), ('loss_giou', 0.265), ('loss_self_iou', 0.009), ('cardinality_error', 7.624), ('loss_ce_0', 0.307), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.024), ('loss_giou_0', 0.279), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.624), ('loss_caption_0', 2.38), ('loss_caption', 2.368), ('total_loss', 13.014)]), time/iter = 0.159, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 3857 (epoch 2), loss = OrderedDict([('loss_ce', 0.306), ('loss_counter', 0.115), ('loss_bbox', 0.021), ('loss_giou', 0.264), ('loss_self_iou', 0.009), ('cardinality_error', 7.489), ('loss_ce_0', 0.312), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.279), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.489), ('loss_caption_0', 2.343), ('loss_caption', 2.344), ('total_loss', 12.897)]), time/iter = 0.155, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 3990 (epoch 2), loss = OrderedDict([('loss_ce', 0.299), ('loss_counter', 0.134), ('loss_bbox', 0.02), ('loss_giou', 0.268), ('loss_self_iou', 0.012), ('cardinality_error', 8.301), ('loss_ce_0', 0.299), ('loss_counter_0', 0.131), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.289), ('loss_self_iou_0', 0.013), ('cardinality_error_0', 8.301), ('loss_caption_0', 2.327), ('loss_caption', 2.346), ('total_loss', 12.9)]), time/iter = 0.154, bad_vid = 0.000 Validation results of iter 3999: Bleu_1:0.18812761655735627 Bleu_2:0.11394688266117041 Bleu_3:0.06350983100569632 Bleu_4:0.03295035253718016 METEOR:0.08673497362280043 ROUGE_L:0.17099683701262633 CIDEr:0.534654554166069 Recall:0.2545535313519452 Precision:0.4357073390990242 soda_c:0.06940030844072555 para_Bleu_1:0.31911536052560924 para_Bleu_2:0.19074275606485158 para_Bleu_3:0.11503629156908896 para_Bleu_4:0.07096292455051724 para_METEOR:0.14141970569772275 para_ROUGE_L:0.3133292457236414 para_CIDEr:0.18756071216976763 overall score of iter 3999: 0.3999433424180076 Save model at iter 3999 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. Save Best-model at iter 3999 to checkpoint file. Save info to info.json ID seq2-ft(mix)-gt_percent-1.0 iter 4123 (epoch 3), loss = OrderedDict([('loss_ce', 0.305), ('loss_counter', 0.129), ('loss_bbox', 0.021), ('loss_giou', 0.256), ('loss_self_iou', 0.008), ('cardinality_error', 7.925), ('loss_ce_0', 0.307), ('loss_counter_0', 0.126), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.275), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.925), ('loss_caption_0', 2.272), ('loss_caption', 2.28), ('total_loss', 12.579)]), time/iter = 0.678, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 4256 (epoch 3), loss = OrderedDict([('loss_ce', 0.308), ('loss_counter', 0.121), ('loss_bbox', 0.02), ('loss_giou', 0.256), ('loss_self_iou', 0.008), ('cardinality_error', 7.632), ('loss_ce_0', 0.31), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.276), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.632), ('loss_caption_0', 2.247), ('loss_caption', 2.252), ('total_loss', 12.484)]), time/iter = 0.159, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 4389 (epoch 3), loss = OrderedDict([('loss_ce', 0.305), ('loss_counter', 0.12), ('loss_bbox', 0.021), ('loss_giou', 0.26), ('loss_self_iou', 0.011), ('cardinality_error', 7.526), ('loss_ce_0', 0.309), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.272), ('loss_self_iou_0', 0.01), ('cardinality_error_0', 7.526), ('loss_caption_0', 2.194), ('loss_caption', 2.205), ('total_loss', 12.273)]), time/iter = 0.161, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 4522 (epoch 3), loss = OrderedDict([('loss_ce', 0.305), ('loss_counter', 0.115), ('loss_bbox', 0.019), ('loss_giou', 0.248), ('loss_self_iou', 0.007), ('cardinality_error', 7.519), ('loss_ce_0', 0.303), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.262), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.519), ('loss_caption_0', 2.335), ('loss_caption', 2.326), ('total_loss', 12.689)]), time/iter = 0.170, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 4655 (epoch 3), loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.122), ('loss_bbox', 0.02), ('loss_giou', 0.263), ('loss_self_iou', 0.008), ('cardinality_error', 7.97), ('loss_ce_0', 0.298), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.285), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.97), ('loss_caption_0', 2.254), ('loss_caption', 2.267), ('total_loss', 12.545)]), time/iter = 0.153, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 4788 (epoch 3), loss = OrderedDict([('loss_ce', 0.308), ('loss_counter', 0.118), ('loss_bbox', 0.021), ('loss_giou', 0.253), ('loss_self_iou', 0.008), ('cardinality_error', 7.481), ('loss_ce_0', 0.308), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.268), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.481), ('loss_caption_0', 2.208), ('loss_caption', 2.195), ('total_loss', 12.24)]), time/iter = 0.151, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 4921 (epoch 3), loss = OrderedDict([('loss_ce', 0.306), ('loss_counter', 0.12), ('loss_bbox', 0.019), ('loss_giou', 0.262), ('loss_self_iou', 0.01), ('cardinality_error', 7.842), ('loss_ce_0', 0.305), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.284), ('loss_self_iou_0', 0.011), ('cardinality_error_0', 7.842), ('loss_caption_0', 2.186), ('loss_caption', 2.196), ('total_loss', 12.289)]), time/iter = 0.162, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 5054 (epoch 3), loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.121), ('loss_bbox', 0.022), ('loss_giou', 0.26), ('loss_self_iou', 0.009), ('cardinality_error', 7.887), ('loss_ce_0', 0.305), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.271), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.887), ('loss_caption_0', 2.242), ('loss_caption', 2.239), ('total_loss', 12.422)]), time/iter = 0.170, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 5187 (epoch 3), loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.124), ('loss_bbox', 0.021), ('loss_giou', 0.262), ('loss_self_iou', 0.009), ('cardinality_error', 7.932), ('loss_ce_0', 0.305), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.277), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.932), ('loss_caption_0', 2.25), ('loss_caption', 2.246), ('total_loss', 12.483)]), time/iter = 0.166, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 5320 (epoch 3), loss = OrderedDict([('loss_ce', 0.299), ('loss_counter', 0.12), ('loss_bbox', 0.022), ('loss_giou', 0.26), ('loss_self_iou', 0.006), ('cardinality_error', 7.729), ('loss_ce_0', 0.298), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.024), ('loss_giou_0', 0.279), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.729), ('loss_caption_0', 2.287), ('loss_caption', 2.298), ('total_loss', 12.64)]), time/iter = 0.161, bad_vid = 0.000 Validation results of iter 5332: Bleu_1:0.19536023703614988 Bleu_2:0.11676341716851109 Bleu_3:0.06337153157323498 Bleu_4:0.031788948303475714 METEOR:0.09287502887069582 ROUGE_L:0.18168372139225142 CIDEr:0.5345089450528974 Recall:0.26186565000159123 Precision:0.4578470702650138 soda_c:0.06891495599002981 para_Bleu_1:0.3645537642333956 para_Bleu_2:0.21504928179111618 para_Bleu_3:0.1297486406737134 para_Bleu_4:0.08010111193897063 para_METEOR:0.1518569517959942 para_ROUGE_L:0.3241825281759821 para_CIDEr:0.22211083978975357 overall score of iter 5332: 0.4540689035247184 Save model at iter 5332 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. Save Best-model at iter 5332 to checkpoint file. Save info to info.json ID seq2-ft(mix)-gt_percent-1.0 iter 5453 (epoch 4), loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.113), ('loss_bbox', 0.022), ('loss_giou', 0.25), ('loss_self_iou', 0.011), ('cardinality_error', 7.519), ('loss_ce_0', 0.298), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.269), ('loss_self_iou_0', 0.011), ('cardinality_error_0', 7.519), ('loss_caption_0', 2.175), ('loss_caption', 2.176), ('total_loss', 12.088)]), time/iter = 0.716, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 5586 (epoch 4), loss = OrderedDict([('loss_ce', 0.294), ('loss_counter', 0.12), ('loss_bbox', 0.018), ('loss_giou', 0.252), ('loss_self_iou', 0.007), ('cardinality_error', 7.662), ('loss_ce_0', 0.292), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.274), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.662), ('loss_caption_0', 2.16), ('loss_caption', 2.132), ('total_loss', 11.979)]), time/iter = 0.162, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 5719 (epoch 4), loss = OrderedDict([('loss_ce', 0.305), ('loss_counter', 0.13), ('loss_bbox', 0.02), ('loss_giou', 0.255), ('loss_self_iou', 0.008), ('cardinality_error', 8.451), ('loss_ce_0', 0.302), ('loss_counter_0', 0.127), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.273), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 8.451), ('loss_caption_0', 2.166), ('loss_caption', 2.164), ('total_loss', 12.113)]), time/iter = 0.156, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 5852 (epoch 4), loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.12), ('loss_bbox', 0.019), ('loss_giou', 0.246), ('loss_self_iou', 0.007), ('cardinality_error', 7.835), ('loss_ce_0', 0.302), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.267), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.835), ('loss_caption_0', 2.122), ('loss_caption', 2.111), ('total_loss', 11.841)]), time/iter = 0.157, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 5985 (epoch 4), loss = OrderedDict([('loss_ce', 0.304), ('loss_counter', 0.122), ('loss_bbox', 0.02), ('loss_giou', 0.243), ('loss_self_iou', 0.009), ('cardinality_error', 7.474), ('loss_ce_0', 0.298), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.263), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.474), ('loss_caption_0', 2.149), ('loss_caption', 2.14), ('total_loss', 11.926)]), time/iter = 0.155, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 6118 (epoch 4), loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.113), ('loss_bbox', 0.018), ('loss_giou', 0.241), ('loss_self_iou', 0.008), ('cardinality_error', 7.639), ('loss_ce_0', 0.302), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.259), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.639), ('loss_caption_0', 2.235), ('loss_caption', 2.215), ('total_loss', 12.218)]), time/iter = 0.160, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 6251 (epoch 4), loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.125), ('loss_bbox', 0.02), ('loss_giou', 0.251), ('loss_self_iou', 0.007), ('cardinality_error', 7.857), ('loss_ce_0', 0.301), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.268), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.857), ('loss_caption_0', 2.235), ('loss_caption', 2.226), ('total_loss', 12.328)]), time/iter = 0.155, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 6384 (epoch 4), loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.124), ('loss_bbox', 0.02), ('loss_giou', 0.246), ('loss_self_iou', 0.006), ('cardinality_error', 7.82), ('loss_ce_0', 0.301), ('loss_counter_0', 0.124), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.265), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.82), ('loss_caption_0', 2.208), ('loss_caption', 2.183), ('total_loss', 12.157)]), time/iter = 0.159, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 6517 (epoch 4), loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.12), ('loss_bbox', 0.02), ('loss_giou', 0.256), ('loss_self_iou', 0.008), ('cardinality_error', 7.872), ('loss_ce_0', 0.295), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.271), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.872), ('loss_caption_0', 2.135), ('loss_caption', 2.155), ('total_loss', 11.99)]), time/iter = 0.162, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 6650 (epoch 4), loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.112), ('loss_bbox', 0.021), ('loss_giou', 0.244), ('loss_self_iou', 0.008), ('cardinality_error', 7.398), ('loss_ce_0', 0.297), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.26), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.398), ('loss_caption_0', 2.205), ('loss_caption', 2.202), ('total_loss', 12.127)]), time/iter = 0.156, bad_vid = 0.000 Validation results of iter 6665: Bleu_1:0.19366491706119263 Bleu_2:0.1161802397372496 Bleu_3:0.06381908710297783 Bleu_4:0.0310996008751752 METEOR:0.0900086447067842 ROUGE_L:0.1772625018945245 CIDEr:0.5329339889166991 Recall:0.27822837264850414 Precision:0.4414053002674447 soda_c:0.0725148309247326 para_Bleu_1:0.36779729697992286 para_Bleu_2:0.2189609464261768 para_Bleu_3:0.13170237886801614 para_Bleu_4:0.08102932652379062 para_METEOR:0.15287168689015676 para_ROUGE_L:0.32609559286330886 para_CIDEr:0.24981796796266917 overall score of iter 6665: 0.48371898137661656 Save model at iter 6665 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. Save Best-model at iter 6665 to checkpoint file. Save info to info.json ID seq2-ft(mix)-gt_percent-1.0 iter 6783 (epoch 5), loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.117), ('loss_bbox', 0.019), ('loss_giou', 0.24), ('loss_self_iou', 0.007), ('cardinality_error', 7.586), ('loss_ce_0', 0.29), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.257), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.586), ('loss_caption_0', 2.02), ('loss_caption', 2.014), ('total_loss', 11.332)]), time/iter = 0.689, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 6916 (epoch 5), loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.118), ('loss_bbox', 0.021), ('loss_giou', 0.249), ('loss_self_iou', 0.008), ('cardinality_error', 7.519), ('loss_ce_0', 0.302), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.264), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.519), ('loss_caption_0', 2.118), ('loss_caption', 2.101), ('total_loss', 11.817)]), time/iter = 0.162, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 7049 (epoch 5), loss = OrderedDict([('loss_ce', 0.294), ('loss_counter', 0.119), ('loss_bbox', 0.019), ('loss_giou', 0.25), ('loss_self_iou', 0.007), ('cardinality_error', 7.699), ('loss_ce_0', 0.292), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.265), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.699), ('loss_caption_0', 2.105), ('loss_caption', 2.111), ('total_loss', 11.78)]), time/iter = 0.172, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 7182 (epoch 5), loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.115), ('loss_bbox', 0.021), ('loss_giou', 0.242), ('loss_self_iou', 0.008), ('cardinality_error', 7.594), ('loss_ce_0', 0.288), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.257), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.594), ('loss_caption_0', 2.194), ('loss_caption', 2.195), ('total_loss', 12.045)]), time/iter = 0.160, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 7315 (epoch 5), loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.123), ('loss_bbox', 0.02), ('loss_giou', 0.254), ('loss_self_iou', 0.009), ('cardinality_error', 8.301), ('loss_ce_0', 0.291), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.268), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 8.301), ('loss_caption_0', 2.096), ('loss_caption', 2.09), ('total_loss', 11.741)]), time/iter = 0.153, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 7448 (epoch 5), loss = OrderedDict([('loss_ce', 0.296), ('loss_counter', 0.12), ('loss_bbox', 0.019), ('loss_giou', 0.234), ('loss_self_iou', 0.006), ('cardinality_error', 7.677), ('loss_ce_0', 0.292), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.251), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.677), ('loss_caption_0', 2.076), ('loss_caption', 2.063), ('total_loss', 11.513)]), time/iter = 0.152, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 7581 (epoch 5), loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.116), ('loss_bbox', 0.019), ('loss_giou', 0.238), ('loss_self_iou', 0.008), ('cardinality_error', 7.534), ('loss_ce_0', 0.295), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.253), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.534), ('loss_caption_0', 2.114), ('loss_caption', 2.112), ('total_loss', 11.718)]), time/iter = 0.154, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 7714 (epoch 5), loss = OrderedDict([('loss_ce', 0.295), ('loss_counter', 0.117), ('loss_bbox', 0.018), ('loss_giou', 0.235), ('loss_self_iou', 0.008), ('cardinality_error', 7.677), ('loss_ce_0', 0.291), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.253), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.677), ('loss_caption_0', 2.167), ('loss_caption', 2.179), ('total_loss', 11.932)]), time/iter = 0.160, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 7847 (epoch 5), loss = OrderedDict([('loss_ce', 0.293), ('loss_counter', 0.118), ('loss_bbox', 0.019), ('loss_giou', 0.252), ('loss_self_iou', 0.009), ('cardinality_error', 8.053), ('loss_ce_0', 0.289), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.269), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 8.053), ('loss_caption_0', 2.106), ('loss_caption', 2.115), ('total_loss', 11.804)]), time/iter = 0.155, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 7980 (epoch 5), loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.118), ('loss_bbox', 0.019), ('loss_giou', 0.249), ('loss_self_iou', 0.007), ('cardinality_error', 7.902), ('loss_ce_0', 0.295), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.268), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.902), ('loss_caption_0', 2.151), ('loss_caption', 2.153), ('total_loss', 11.979)]), time/iter = 0.158, bad_vid = 0.000 Validation results of iter 7998: Bleu_1:0.19874944106127662 Bleu_2:0.12266046915797622 Bleu_3:0.07150852984916518 Bleu_4:0.036185181004552064 METEOR:0.09274687098087099 ROUGE_L:0.18413336093424784 CIDEr:0.5727051685734265 Recall:0.259037909270404 Precision:0.451289465457956 soda_c:0.07263494732248185 para_Bleu_1:0.32307562783294125 para_Bleu_2:0.1944214796418441 para_Bleu_3:0.11901149393254483 para_Bleu_4:0.07454555120453704 para_METEOR:0.14324209261218024 para_ROUGE_L:0.31918573126228 para_CIDEr:0.23096832321460165 overall score of iter 7998: 0.4487559670313189 Save model at iter 7998 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. Save info to info.json ID seq2-ft(mix)-gt_percent-1.0 iter 8113 (epoch 6), loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.114), ('loss_bbox', 0.019), ('loss_giou', 0.236), ('loss_self_iou', 0.008), ('cardinality_error', 7.617), ('loss_ce_0', 0.295), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.257), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.617), ('loss_caption_0', 2.036), ('loss_caption', 2.044), ('total_loss', 11.427)]), time/iter = 0.677, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 8246 (epoch 6), loss = OrderedDict([('loss_ce', 0.286), ('loss_counter', 0.119), ('loss_bbox', 0.019), ('loss_giou', 0.237), ('loss_self_iou', 0.006), ('cardinality_error', 7.827), ('loss_ce_0', 0.283), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.257), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.827), ('loss_caption_0', 2.055), ('loss_caption', 2.057), ('total_loss', 11.458)]), time/iter = 0.160, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 8379 (epoch 6), loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.118), ('loss_bbox', 0.018), ('loss_giou', 0.225), ('loss_self_iou', 0.005), ('cardinality_error', 7.82), ('loss_ce_0', 0.286), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.246), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.82), ('loss_caption_0', 2.046), ('loss_caption', 2.041), ('total_loss', 11.331)]), time/iter = 0.160, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 8512 (epoch 6), loss = OrderedDict([('loss_ce', 0.286), ('loss_counter', 0.114), ('loss_bbox', 0.018), ('loss_giou', 0.228), ('loss_self_iou', 0.006), ('cardinality_error', 7.654), ('loss_ce_0', 0.283), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.245), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.654), ('loss_caption_0', 1.991), ('loss_caption', 1.997), ('total_loss', 11.118)]), time/iter = 0.157, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 8645 (epoch 6), loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.115), ('loss_bbox', 0.02), ('loss_giou', 0.251), ('loss_self_iou', 0.007), ('cardinality_error', 8.068), ('loss_ce_0', 0.287), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.265), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 8.068), ('loss_caption_0', 2.094), ('loss_caption', 2.097), ('total_loss', 11.714)]), time/iter = 0.159, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 8778 (epoch 6), loss = OrderedDict([('loss_ce', 0.288), ('loss_counter', 0.121), ('loss_bbox', 0.019), ('loss_giou', 0.24), ('loss_self_iou', 0.008), ('cardinality_error', 8.008), ('loss_ce_0', 0.286), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.258), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 8.008), ('loss_caption_0', 2.092), ('loss_caption', 2.092), ('total_loss', 11.63)]), time/iter = 0.155, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 8911 (epoch 6), loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.114), ('loss_bbox', 0.019), ('loss_giou', 0.235), ('loss_self_iou', 0.008), ('cardinality_error', 7.338), ('loss_ce_0', 0.297), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.248), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.338), ('loss_caption_0', 2.051), ('loss_caption', 2.054), ('total_loss', 11.446)]), time/iter = 0.156, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 9044 (epoch 6), loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.105), ('loss_bbox', 0.02), ('loss_giou', 0.227), ('loss_self_iou', 0.008), ('cardinality_error', 7.226), ('loss_ce_0', 0.292), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.243), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.226), ('loss_caption_0', 2.08), ('loss_caption', 2.084), ('total_loss', 11.478)]), time/iter = 0.160, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 9177 (epoch 6), loss = OrderedDict([('loss_ce', 0.291), ('loss_counter', 0.12), ('loss_bbox', 0.019), ('loss_giou', 0.254), ('loss_self_iou', 0.007), ('cardinality_error', 7.977), ('loss_ce_0', 0.288), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.275), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.977), ('loss_caption_0', 2.046), ('loss_caption', 2.031), ('total_loss', 11.546)]), time/iter = 0.158, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 9310 (epoch 6), loss = OrderedDict([('loss_ce', 0.28), ('loss_counter', 0.117), ('loss_bbox', 0.018), ('loss_giou', 0.236), ('loss_self_iou', 0.006), ('cardinality_error', 7.97), ('loss_ce_0', 0.281), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.252), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.97), ('loss_caption_0', 1.986), ('loss_caption', 1.995), ('total_loss', 11.157)]), time/iter = 0.152, bad_vid = 0.000 Validation results of iter 9331: Bleu_1:0.2003309018825777 Bleu_2:0.1225756065112458 Bleu_3:0.06724461390362559 Bleu_4:0.033684328156599955 METEOR:0.0938288297360794 ROUGE_L:0.1832565856913202 CIDEr:0.5805494889367487 Recall:0.28578288505804933 Precision:0.4570872842207636 soda_c:0.07457933387713374 para_Bleu_1:0.3713316702717572 para_Bleu_2:0.22391267992808692 para_Bleu_3:0.1360620228892395 para_Bleu_4:0.08475146307949002 para_METEOR:0.15553928732702577 para_ROUGE_L:0.3279787647771023 para_CIDEr:0.24807495620487915 overall score of iter 9331: 0.4883657066113949 Save model at iter 9331 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. Save Best-model at iter 9331 to checkpoint file. Save info to info.json ID seq2-ft(mix)-gt_percent-1.0 iter 9443 (epoch 7), loss = OrderedDict([('loss_ce', 0.294), ('loss_counter', 0.114), ('loss_bbox', 0.018), ('loss_giou', 0.226), ('loss_self_iou', 0.006), ('cardinality_error', 7.617), ('loss_ce_0', 0.292), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.239), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.617), ('loss_caption_0', 2.065), ('loss_caption', 2.061), ('total_loss', 11.394)]), time/iter = 0.717, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 9576 (epoch 7), loss = OrderedDict([('loss_ce', 0.285), ('loss_counter', 0.119), ('loss_bbox', 0.02), ('loss_giou', 0.231), ('loss_self_iou', 0.006), ('cardinality_error', 7.917), ('loss_ce_0', 0.284), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.252), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.917), ('loss_caption_0', 1.977), ('loss_caption', 1.974), ('total_loss', 11.093)]), time/iter = 0.165, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 9709 (epoch 7), loss = OrderedDict([('loss_ce', 0.291), ('loss_counter', 0.117), ('loss_bbox', 0.016), ('loss_giou', 0.224), ('loss_self_iou', 0.006), ('cardinality_error', 8.098), ('loss_ce_0', 0.29), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.242), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 8.098), ('loss_caption_0', 2.051), ('loss_caption', 2.063), ('total_loss', 11.373)]), time/iter = 0.170, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 9842 (epoch 7), loss = OrderedDict([('loss_ce', 0.288), ('loss_counter', 0.11), ('loss_bbox', 0.018), ('loss_giou', 0.242), ('loss_self_iou', 0.007), ('cardinality_error', 7.662), ('loss_ce_0', 0.286), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.262), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.662), ('loss_caption_0', 1.939), ('loss_caption', 1.953), ('total_loss', 11.058)]), time/iter = 0.169, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 9975 (epoch 7), loss = OrderedDict([('loss_ce', 0.28), ('loss_counter', 0.116), ('loss_bbox', 0.017), ('loss_giou', 0.238), ('loss_self_iou', 0.006), ('cardinality_error', 8.233), ('loss_ce_0', 0.281), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.255), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 8.233), ('loss_caption_0', 2.024), ('loss_caption', 2.026), ('total_loss', 11.31)]), time/iter = 0.167, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 10108 (epoch 7), loss = OrderedDict([('loss_ce', 0.283), ('loss_counter', 0.111), ('loss_bbox', 0.018), ('loss_giou', 0.232), ('loss_self_iou', 0.006), ('cardinality_error', 7.466), ('loss_ce_0', 0.279), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.246), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.466), ('loss_caption_0', 1.878), ('loss_caption', 1.882), ('total_loss', 10.667)]), time/iter = 0.162, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 10241 (epoch 7), loss = OrderedDict([('loss_ce', 0.285), ('loss_counter', 0.119), ('loss_bbox', 0.018), ('loss_giou', 0.24), ('loss_self_iou', 0.007), ('cardinality_error', 7.722), ('loss_ce_0', 0.282), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.253), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.722), ('loss_caption_0', 1.984), ('loss_caption', 1.988), ('total_loss', 11.165)]), time/iter = 0.162, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 10374 (epoch 7), loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.113), ('loss_bbox', 0.017), ('loss_giou', 0.225), ('loss_self_iou', 0.007), ('cardinality_error', 7.692), ('loss_ce_0', 0.285), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.241), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.692), ('loss_caption_0', 2.089), ('loss_caption', 2.094), ('total_loss', 11.498)]), time/iter = 0.164, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 10507 (epoch 7), loss = OrderedDict([('loss_ce', 0.287), ('loss_counter', 0.113), ('loss_bbox', 0.019), ('loss_giou', 0.22), ('loss_self_iou', 0.007), ('cardinality_error', 7.564), ('loss_ce_0', 0.283), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.241), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.564), ('loss_caption_0', 1.936), ('loss_caption', 1.935), ('total_loss', 10.84)]), time/iter = 0.165, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 10640 (epoch 7), loss = OrderedDict([('loss_ce', 0.281), ('loss_counter', 0.115), ('loss_bbox', 0.02), ('loss_giou', 0.232), ('loss_self_iou', 0.008), ('cardinality_error', 7.549), ('loss_ce_0', 0.278), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.249), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.549), ('loss_caption_0', 2.041), ('loss_caption', 2.042), ('total_loss', 11.323)]), time/iter = 0.178, bad_vid = 0.000 Validation results of iter 10664: Bleu_1:0.19584871429233122 Bleu_2:0.1203954133477019 Bleu_3:0.06765236989260215 Bleu_4:0.03515047236439923 METEOR:0.09347581038898298 ROUGE_L:0.18336361365161372 CIDEr:0.5642570328531701 Recall:0.287053410514844 Precision:0.4506790316418327 soda_c:0.07315525040409161 para_Bleu_1:0.39595219023577966 para_Bleu_2:0.23717913606151478 para_Bleu_3:0.14480681642134902 para_Bleu_4:0.0901695364250172 para_METEOR:0.16127903027678414 para_ROUGE_L:0.3324403291093838 para_CIDEr:0.23804687234043756 overall score of iter 10664: 0.48949543904223886 Save model at iter 10664 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. Save Best-model at iter 10664 to checkpoint file. Save info to info.json ID seq2-ft(mix)-gt_percent-1.0 iter 10773 (epoch 8), loss = OrderedDict([('loss_ce', 0.279), ('loss_counter', 0.114), ('loss_bbox', 0.017), ('loss_giou', 0.235), ('loss_self_iou', 0.006), ('cardinality_error', 7.94), ('loss_ce_0', 0.278), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.253), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.94), ('loss_caption_0', 1.851), ('loss_caption', 1.84), ('total_loss', 10.561)]), time/iter = 0.724, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 10906 (epoch 8), loss = OrderedDict([('loss_ce', 0.279), ('loss_counter', 0.109), ('loss_bbox', 0.017), ('loss_giou', 0.215), ('loss_self_iou', 0.006), ('cardinality_error', 7.218), ('loss_ce_0', 0.278), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.231), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.218), ('loss_caption_0', 1.945), ('loss_caption', 1.948), ('total_loss', 10.791)]), time/iter = 0.165, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 11039 (epoch 8), loss = OrderedDict([('loss_ce', 0.288), ('loss_counter', 0.108), ('loss_bbox', 0.017), ('loss_giou', 0.207), ('loss_self_iou', 0.006), ('cardinality_error', 7.579), ('loss_ce_0', 0.283), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.223), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.579), ('loss_caption_0', 1.92), ('loss_caption', 1.927), ('total_loss', 10.664)]), time/iter = 0.165, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 11172 (epoch 8), loss = OrderedDict([('loss_ce', 0.28), ('loss_counter', 0.11), ('loss_bbox', 0.018), ('loss_giou', 0.215), ('loss_self_iou', 0.006), ('cardinality_error', 7.451), ('loss_ce_0', 0.279), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.231), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.451), ('loss_caption_0', 1.91), ('loss_caption', 1.9), ('total_loss', 10.635)]), time/iter = 0.155, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 11305 (epoch 8), loss = OrderedDict([('loss_ce', 0.278), ('loss_counter', 0.125), ('loss_bbox', 0.017), ('loss_giou', 0.233), ('loss_self_iou', 0.006), ('cardinality_error', 8.09), ('loss_ce_0', 0.276), ('loss_counter_0', 0.126), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.244), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 8.09), ('loss_caption_0', 1.876), ('loss_caption', 1.877), ('total_loss', 10.648)]), time/iter = 0.152, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 11438 (epoch 8), loss = OrderedDict([('loss_ce', 0.273), ('loss_counter', 0.113), ('loss_bbox', 0.016), ('loss_giou', 0.211), ('loss_self_iou', 0.005), ('cardinality_error', 7.744), ('loss_ce_0', 0.269), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.231), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.744), ('loss_caption_0', 1.981), ('loss_caption', 1.968), ('total_loss', 10.865)]), time/iter = 0.156, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 11571 (epoch 8), loss = OrderedDict([('loss_ce', 0.281), ('loss_counter', 0.114), ('loss_bbox', 0.018), ('loss_giou', 0.225), ('loss_self_iou', 0.006), ('cardinality_error', 7.699), ('loss_ce_0', 0.277), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.243), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.699), ('loss_caption_0', 1.833), ('loss_caption', 1.846), ('total_loss', 10.461)]), time/iter = 0.149, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 11704 (epoch 8), loss = OrderedDict([('loss_ce', 0.28), ('loss_counter', 0.115), ('loss_bbox', 0.017), ('loss_giou', 0.21), ('loss_self_iou', 0.006), ('cardinality_error', 7.82), ('loss_ce_0', 0.278), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.226), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.82), ('loss_caption_0', 1.91), ('loss_caption', 1.915), ('total_loss', 10.628)]), time/iter = 0.159, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 11837 (epoch 8), loss = OrderedDict([('loss_ce', 0.271), ('loss_counter', 0.111), ('loss_bbox', 0.016), ('loss_giou', 0.215), ('loss_self_iou', 0.007), ('cardinality_error', 8.0), ('loss_ce_0', 0.273), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.23), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 8.0), ('loss_caption_0', 1.936), ('loss_caption', 1.939), ('total_loss', 10.726)]), time/iter = 0.156, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 11970 (epoch 8), loss = OrderedDict([('loss_ce', 0.272), ('loss_counter', 0.115), ('loss_bbox', 0.017), ('loss_giou', 0.22), ('loss_self_iou', 0.006), ('cardinality_error', 8.158), ('loss_ce_0', 0.27), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.242), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 8.158), ('loss_caption_0', 1.953), ('loss_caption', 1.962), ('total_loss', 10.881)]), time/iter = 0.168, bad_vid = 0.000 Validation results of iter 11997: Bleu_1:0.19696025394358163 Bleu_2:0.12042554867022627 Bleu_3:0.06805715701089529 Bleu_4:0.034063345644385214 METEOR:0.09208296372249718 ROUGE_L:0.1803782633150628 CIDEr:0.5812603125344058 Recall:0.29169024735901117 Precision:0.44299129936438486 soda_c:0.07606608300691252 para_Bleu_1:0.383549187276652 para_Bleu_2:0.23192713278728125 para_Bleu_3:0.14217181061136971 para_Bleu_4:0.0892715976218228 para_METEOR:0.16074434603101373 para_ROUGE_L:0.3336567463040183 para_CIDEr:0.2859809872200661 overall score of iter 11997: 0.5359969308729027 Save model at iter 11997 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. Save Best-model at iter 11997 to checkpoint file. Save info to info.json ID seq2-ft(mix)-gt_percent-1.0 iter 12103 (epoch 9), loss = OrderedDict([('loss_ce', 0.275), ('loss_counter', 0.111), ('loss_bbox', 0.016), ('loss_giou', 0.216), ('loss_self_iou', 0.006), ('cardinality_error', 8.038), ('loss_ce_0', 0.274), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.231), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 8.038), ('loss_caption_0', 1.832), ('loss_caption', 1.845), ('total_loss', 10.35)]), time/iter = 0.705, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 12236 (epoch 9), loss = OrderedDict([('loss_ce', 0.272), ('loss_counter', 0.111), ('loss_bbox', 0.016), ('loss_giou', 0.206), ('loss_self_iou', 0.005), ('cardinality_error', 7.812), ('loss_ce_0', 0.266), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.223), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.812), ('loss_caption_0', 1.968), ('loss_caption', 1.959), ('total_loss', 10.757)]), time/iter = 0.166, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 12369 (epoch 9), loss = OrderedDict([('loss_ce', 0.273), ('loss_counter', 0.118), ('loss_bbox', 0.016), ('loss_giou', 0.21), ('loss_self_iou', 0.005), ('cardinality_error', 7.827), ('loss_ce_0', 0.27), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.226), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.827), ('loss_caption_0', 1.89), ('loss_caption', 1.903), ('total_loss', 10.534)]), time/iter = 0.158, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 12502 (epoch 9), loss = OrderedDict([('loss_ce', 0.27), ('loss_counter', 0.108), ('loss_bbox', 0.016), ('loss_giou', 0.205), ('loss_self_iou', 0.006), ('cardinality_error', 7.684), ('loss_ce_0', 0.268), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.224), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.684), ('loss_caption_0', 1.903), ('loss_caption', 1.905), ('total_loss', 10.519)]), time/iter = 0.160, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 12635 (epoch 9), loss = OrderedDict([('loss_ce', 0.27), ('loss_counter', 0.111), ('loss_bbox', 0.015), ('loss_giou', 0.218), ('loss_self_iou', 0.005), ('cardinality_error', 7.947), ('loss_ce_0', 0.269), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.232), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.947), ('loss_caption_0', 1.822), ('loss_caption', 1.826), ('total_loss', 10.284)]), time/iter = 0.158, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 12768 (epoch 9), loss = OrderedDict([('loss_ce', 0.277), ('loss_counter', 0.111), ('loss_bbox', 0.017), ('loss_giou', 0.219), ('loss_self_iou', 0.008), ('cardinality_error', 7.669), ('loss_ce_0', 0.276), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.235), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.669), ('loss_caption_0', 1.905), ('loss_caption', 1.909), ('total_loss', 10.662)]), time/iter = 0.160, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 12901 (epoch 9), loss = OrderedDict([('loss_ce', 0.269), ('loss_counter', 0.106), ('loss_bbox', 0.015), ('loss_giou', 0.208), ('loss_self_iou', 0.005), ('cardinality_error', 7.639), ('loss_ce_0', 0.267), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.224), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.639), ('loss_caption_0', 1.856), ('loss_caption', 1.863), ('total_loss', 10.344)]), time/iter = 0.154, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 13034 (epoch 9), loss = OrderedDict([('loss_ce', 0.273), ('loss_counter', 0.112), ('loss_bbox', 0.015), ('loss_giou', 0.216), ('loss_self_iou', 0.005), ('cardinality_error', 7.85), ('loss_ce_0', 0.274), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.231), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.85), ('loss_caption_0', 1.841), ('loss_caption', 1.841), ('total_loss', 10.356)]), time/iter = 0.149, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 13167 (epoch 9), loss = OrderedDict([('loss_ce', 0.275), ('loss_counter', 0.109), ('loss_bbox', 0.018), ('loss_giou', 0.21), ('loss_self_iou', 0.005), ('cardinality_error', 7.406), ('loss_ce_0', 0.273), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.226), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.406), ('loss_caption_0', 1.931), ('loss_caption', 1.927), ('total_loss', 10.663)]), time/iter = 0.162, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 13300 (epoch 9), loss = OrderedDict([('loss_ce', 0.274), ('loss_counter', 0.113), ('loss_bbox', 0.017), ('loss_giou', 0.212), ('loss_self_iou', 0.005), ('cardinality_error', 7.737), ('loss_ce_0', 0.272), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.23), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.737), ('loss_caption_0', 1.853), ('loss_caption', 1.849), ('total_loss', 10.379)]), time/iter = 0.154, bad_vid = 0.000 Validation results of iter 13330: Bleu_1:0.20446290018298774 Bleu_2:0.12418412895577716 Bleu_3:0.06899010124646034 Bleu_4:0.03428116460131532 METEOR:0.09595521703655657 ROUGE_L:0.1876517650928566 CIDEr:0.5887832993219201 Recall:0.3017153873964599 Precision:0.4588439095550697 soda_c:0.07875391677883807 para_Bleu_1:0.3953706124668704 para_Bleu_2:0.24043007714841402 para_Bleu_3:0.14833197751929023 para_Bleu_4:0.09386644902900565 para_METEOR:0.16476396966168239 para_ROUGE_L:0.33760319454244797 para_CIDEr:0.31194480042956774 overall score of iter 13330: 0.5705752191202558 Save model at iter 13330 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. Save Best-model at iter 13330 to checkpoint file. Save info to info.json ID seq2-ft(mix)-gt_percent-1.0 iter 13433 (epoch 10), loss = OrderedDict([('loss_ce', 0.271), ('loss_counter', 0.112), ('loss_bbox', 0.017), ('loss_giou', 0.217), ('loss_self_iou', 0.006), ('cardinality_error', 7.835), ('loss_ce_0', 0.267), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.235), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.835), ('loss_caption_0', 1.804), ('loss_caption', 1.811), ('total_loss', 10.223)]), time/iter = 0.700, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 13566 (epoch 10), loss = OrderedDict([('loss_ce', 0.266), ('loss_counter', 0.116), ('loss_bbox', 0.015), ('loss_giou', 0.204), ('loss_self_iou', 0.005), ('cardinality_error', 7.774), ('loss_ce_0', 0.266), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.221), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.774), ('loss_caption_0', 1.884), ('loss_caption', 1.887), ('total_loss', 10.42)]), time/iter = 0.155, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 13699 (epoch 10), loss = OrderedDict([('loss_ce', 0.261), ('loss_counter', 0.111), ('loss_bbox', 0.016), ('loss_giou', 0.201), ('loss_self_iou', 0.006), ('cardinality_error', 7.729), ('loss_ce_0', 0.259), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.218), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.729), ('loss_caption_0', 1.823), ('loss_caption', 1.806), ('total_loss', 10.083)]), time/iter = 0.158, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 13832 (epoch 10), loss = OrderedDict([('loss_ce', 0.269), ('loss_counter', 0.111), ('loss_bbox', 0.016), ('loss_giou', 0.211), ('loss_self_iou', 0.005), ('cardinality_error', 7.699), ('loss_ce_0', 0.271), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.228), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.699), ('loss_caption_0', 1.855), ('loss_caption', 1.857), ('total_loss', 10.374)]), time/iter = 0.164, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 13965 (epoch 10), loss = OrderedDict([('loss_ce', 0.275), ('loss_counter', 0.105), ('loss_bbox', 0.016), ('loss_giou', 0.196), ('loss_self_iou', 0.006), ('cardinality_error', 7.128), ('loss_ce_0', 0.271), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.214), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.128), ('loss_caption_0', 1.809), ('loss_caption', 1.8), ('total_loss', 10.055)]), time/iter = 0.151, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 14098 (epoch 10), loss = OrderedDict([('loss_ce', 0.273), ('loss_counter', 0.112), ('loss_bbox', 0.016), ('loss_giou', 0.213), ('loss_self_iou', 0.007), ('cardinality_error', 7.925), ('loss_ce_0', 0.273), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.23), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.925), ('loss_caption_0', 1.863), ('loss_caption', 1.863), ('total_loss', 10.433)]), time/iter = 0.155, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 14231 (epoch 10), loss = OrderedDict([('loss_ce', 0.261), ('loss_counter', 0.113), ('loss_bbox', 0.017), ('loss_giou', 0.212), ('loss_self_iou', 0.007), ('cardinality_error', 7.82), ('loss_ce_0', 0.262), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.222), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.82), ('loss_caption_0', 1.936), ('loss_caption', 1.929), ('total_loss', 10.624)]), time/iter = 0.161, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 14364 (epoch 10), loss = OrderedDict([('loss_ce', 0.263), ('loss_counter', 0.104), ('loss_bbox', 0.015), ('loss_giou', 0.216), ('loss_self_iou', 0.005), ('cardinality_error', 7.744), ('loss_ce_0', 0.263), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.227), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.744), ('loss_caption_0', 1.757), ('loss_caption', 1.754), ('total_loss', 9.948)]), time/iter = 0.185, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 14497 (epoch 10), loss = OrderedDict([('loss_ce', 0.266), ('loss_counter', 0.11), ('loss_bbox', 0.015), ('loss_giou', 0.2), ('loss_self_iou', 0.005), ('cardinality_error', 7.827), ('loss_ce_0', 0.265), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.214), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.827), ('loss_caption_0', 1.896), ('loss_caption', 1.894), ('total_loss', 10.407)]), time/iter = 0.155, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 14630 (epoch 10), loss = OrderedDict([('loss_ce', 0.263), ('loss_counter', 0.113), ('loss_bbox', 0.015), ('loss_giou', 0.208), ('loss_self_iou', 0.005), ('cardinality_error', 7.925), ('loss_ce_0', 0.261), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.224), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.925), ('loss_caption_0', 1.84), ('loss_caption', 1.842), ('total_loss', 10.253)]), time/iter = 0.158, bad_vid = 0.000 Validation results of iter 14663: Bleu_1:0.19267153393038786 Bleu_2:0.11732781330402656 Bleu_3:0.06746115616325608 Bleu_4:0.03425583839334337 METEOR:0.08963300348041837 ROUGE_L:0.17480207136309905 CIDEr:0.575137603362526 Recall:0.30432682743951917 Precision:0.4353044354138446 soda_c:0.07762847290423684 para_Bleu_1:0.393384019586376 para_Bleu_2:0.23835405770332685 para_Bleu_3:0.14545808678454117 para_Bleu_4:0.09085202435904723 para_METEOR:0.16354570345255123 para_ROUGE_L:0.3343729651839732 para_CIDEr:0.27098453497923136 overall score of iter 14663: 0.5253822627908299 Save model at iter 14663 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. Save info to info.json ID seq2-ft(mix)-gt_percent-1.0 iter 14763 (epoch 11), loss = OrderedDict([('loss_ce', 0.266), ('loss_counter', 0.111), ('loss_bbox', 0.015), ('loss_giou', 0.208), ('loss_self_iou', 0.006), ('cardinality_error', 7.85), ('loss_ce_0', 0.264), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.225), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.85), ('loss_caption_0', 1.87), ('loss_caption', 1.877), ('total_loss', 10.398)]), time/iter = 0.690, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 14896 (epoch 11), loss = OrderedDict([('loss_ce', 0.26), ('loss_counter', 0.112), ('loss_bbox', 0.015), ('loss_giou', 0.2), ('loss_self_iou', 0.005), ('cardinality_error', 7.692), ('loss_ce_0', 0.259), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.217), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.692), ('loss_caption_0', 1.796), ('loss_caption', 1.784), ('total_loss', 9.979)]), time/iter = 0.160, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 15029 (epoch 11), loss = OrderedDict([('loss_ce', 0.264), ('loss_counter', 0.103), ('loss_bbox', 0.015), ('loss_giou', 0.195), ('loss_self_iou', 0.006), ('cardinality_error', 7.414), ('loss_ce_0', 0.264), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.211), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.414), ('loss_caption_0', 1.763), ('loss_caption', 1.767), ('total_loss', 9.842)]), time/iter = 0.154, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 15162 (epoch 11), loss = OrderedDict([('loss_ce', 0.263), ('loss_counter', 0.113), ('loss_bbox', 0.015), ('loss_giou', 0.196), ('loss_self_iou', 0.004), ('cardinality_error', 7.767), ('loss_ce_0', 0.262), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.211), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.767), ('loss_caption_0', 1.781), ('loss_caption', 1.781), ('total_loss', 9.916)]), time/iter = 0.155, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 15295 (epoch 11), loss = OrderedDict([('loss_ce', 0.257), ('loss_counter', 0.106), ('loss_bbox', 0.015), ('loss_giou', 0.2), ('loss_self_iou', 0.005), ('cardinality_error', 7.662), ('loss_ce_0', 0.255), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.214), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.662), ('loss_caption_0', 1.735), ('loss_caption', 1.75), ('total_loss', 9.755)]), time/iter = 0.153, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 15428 (epoch 11), loss = OrderedDict([('loss_ce', 0.258), ('loss_counter', 0.114), ('loss_bbox', 0.015), ('loss_giou', 0.21), ('loss_self_iou', 0.005), ('cardinality_error', 7.992), ('loss_ce_0', 0.261), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.22), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.992), ('loss_caption_0', 1.852), ('loss_caption', 1.86), ('total_loss', 10.298)]), time/iter = 0.156, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 15561 (epoch 11), loss = OrderedDict([('loss_ce', 0.256), ('loss_counter', 0.112), ('loss_bbox', 0.015), ('loss_giou', 0.204), ('loss_self_iou', 0.006), ('cardinality_error', 8.068), ('loss_ce_0', 0.257), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.218), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 8.068), ('loss_caption_0', 1.878), ('loss_caption', 1.866), ('total_loss', 10.314)]), time/iter = 0.161, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 15694 (epoch 11), loss = OrderedDict([('loss_ce', 0.256), ('loss_counter', 0.106), ('loss_bbox', 0.015), ('loss_giou', 0.202), ('loss_self_iou', 0.004), ('cardinality_error', 7.647), ('loss_ce_0', 0.257), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.215), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.647), ('loss_caption_0', 1.7), ('loss_caption', 1.684), ('total_loss', 9.569)]), time/iter = 0.152, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 15827 (epoch 11), loss = OrderedDict([('loss_ce', 0.259), ('loss_counter', 0.104), ('loss_bbox', 0.016), ('loss_giou', 0.194), ('loss_self_iou', 0.005), ('cardinality_error', 7.722), ('loss_ce_0', 0.257), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.209), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.722), ('loss_caption_0', 1.848), ('loss_caption', 1.839), ('total_loss', 10.119)]), time/iter = 0.153, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 15960 (epoch 11), loss = OrderedDict([('loss_ce', 0.26), ('loss_counter', 0.107), ('loss_bbox', 0.015), ('loss_giou', 0.197), ('loss_self_iou', 0.004), ('cardinality_error', 7.609), ('loss_ce_0', 0.257), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.214), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.609), ('loss_caption_0', 1.847), ('loss_caption', 1.858), ('total_loss', 10.198)]), time/iter = 0.161, bad_vid = 0.000 Validation results of iter 15996: Bleu_1:0.1989422607268001 Bleu_2:0.12223038556953512 Bleu_3:0.06835990671747892 Bleu_4:0.03486159828438583 METEOR:0.09408978838449876 ROUGE_L:0.18200142867223945 CIDEr:0.593480700759431 Recall:0.30795469953703025 Precision:0.4513424333993264 soda_c:0.0796861065455984 para_Bleu_1:0.39594509057043764 para_Bleu_2:0.24087109399513515 para_Bleu_3:0.14790262814870953 para_Bleu_4:0.09321042711819619 para_METEOR:0.1655617051143519 para_ROUGE_L:0.3391051008488012 para_CIDEr:0.32807196750555834 overall score of iter 15996: 0.5868440997381064 Save model at iter 15996 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. Save Best-model at iter 15996 to checkpoint file. Save info to info.json ID seq2-ft(mix)-gt_percent-1.0 iter 16093 (epoch 12), loss = OrderedDict([('loss_ce', 0.257), ('loss_counter', 0.109), ('loss_bbox', 0.015), ('loss_giou', 0.19), ('loss_self_iou', 0.004), ('cardinality_error', 7.992), ('loss_ce_0', 0.258), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.992), ('loss_caption_0', 1.773), ('loss_caption', 1.769), ('total_loss', 9.789)]), time/iter = 0.727, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 16226 (epoch 12), loss = OrderedDict([('loss_ce', 0.26), ('loss_counter', 0.11), ('loss_bbox', 0.014), ('loss_giou', 0.198), ('loss_self_iou', 0.004), ('cardinality_error', 7.805), ('loss_ce_0', 0.259), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.215), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.805), ('loss_caption_0', 1.743), ('loss_caption', 1.749), ('total_loss', 9.786)]), time/iter = 0.156, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 16359 (epoch 12), loss = OrderedDict([('loss_ce', 0.265), ('loss_counter', 0.116), ('loss_bbox', 0.014), ('loss_giou', 0.198), ('loss_self_iou', 0.005), ('cardinality_error', 7.85), ('loss_ce_0', 0.264), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.214), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.85), ('loss_caption_0', 1.797), ('loss_caption', 1.778), ('total_loss', 9.972)]), time/iter = 0.156, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 16492 (epoch 12), loss = OrderedDict([('loss_ce', 0.254), ('loss_counter', 0.105), ('loss_bbox', 0.015), ('loss_giou', 0.189), ('loss_self_iou', 0.004), ('cardinality_error', 7.383), ('loss_ce_0', 0.257), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.383), ('loss_caption_0', 1.796), ('loss_caption', 1.808), ('total_loss', 9.899)]), time/iter = 0.159, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 16625 (epoch 12), loss = OrderedDict([('loss_ce', 0.258), ('loss_counter', 0.109), ('loss_bbox', 0.014), ('loss_giou', 0.186), ('loss_self_iou', 0.005), ('cardinality_error', 7.782), ('loss_ce_0', 0.256), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.203), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.782), ('loss_caption_0', 1.78), ('loss_caption', 1.779), ('total_loss', 9.812)]), time/iter = 0.159, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 16758 (epoch 12), loss = OrderedDict([('loss_ce', 0.252), ('loss_counter', 0.106), ('loss_bbox', 0.014), ('loss_giou', 0.196), ('loss_self_iou', 0.005), ('cardinality_error', 7.962), ('loss_ce_0', 0.252), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.211), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.962), ('loss_caption_0', 1.795), ('loss_caption', 1.806), ('total_loss', 9.948)]), time/iter = 0.153, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 16891 (epoch 12), loss = OrderedDict([('loss_ce', 0.258), ('loss_counter', 0.109), ('loss_bbox', 0.016), ('loss_giou', 0.199), ('loss_self_iou', 0.005), ('cardinality_error', 7.797), ('loss_ce_0', 0.255), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.211), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.797), ('loss_caption_0', 1.788), ('loss_caption', 1.782), ('total_loss', 9.914)]), time/iter = 0.157, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 17024 (epoch 12), loss = OrderedDict([('loss_ce', 0.262), ('loss_counter', 0.11), ('loss_bbox', 0.014), ('loss_giou', 0.198), ('loss_self_iou', 0.005), ('cardinality_error', 7.511), ('loss_ce_0', 0.26), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.211), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.511), ('loss_caption_0', 1.717), ('loss_caption', 1.72), ('total_loss', 9.666)]), time/iter = 0.170, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 17157 (epoch 12), loss = OrderedDict([('loss_ce', 0.25), ('loss_counter', 0.104), ('loss_bbox', 0.015), ('loss_giou', 0.189), ('loss_self_iou', 0.004), ('cardinality_error', 7.692), ('loss_ce_0', 0.252), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.2), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.692), ('loss_caption_0', 1.738), ('loss_caption', 1.749), ('total_loss', 9.638)]), time/iter = 0.182, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 17290 (epoch 12), loss = OrderedDict([('loss_ce', 0.254), ('loss_counter', 0.109), ('loss_bbox', 0.015), ('loss_giou', 0.198), ('loss_self_iou', 0.005), ('cardinality_error', 7.932), ('loss_ce_0', 0.254), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.214), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.932), ('loss_caption_0', 1.815), ('loss_caption', 1.83), ('total_loss', 10.067)]), time/iter = 0.163, bad_vid = 0.000 Validation results of iter 17329: Bleu_1:0.19294534256446427 Bleu_2:0.11789730285267924 Bleu_3:0.06601509377472357 Bleu_4:0.03274421971508606 METEOR:0.0906445074413136 ROUGE_L:0.17678145420382357 CIDEr:0.5750907875125135 Recall:0.3073352674556176 Precision:0.4434536834427428 soda_c:0.07896521325127955 para_Bleu_1:0.39483511792471604 para_Bleu_2:0.23988438429479647 para_Bleu_3:0.1464330354033768 para_Bleu_4:0.09122283851671699 para_METEOR:0.16480200992253577 para_ROUGE_L:0.33317486176302236 para_CIDEr:0.29080350784714515 overall score of iter 17329: 0.5468283562863979 Save model at iter 17329 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. Save info to info.json ID seq2-ft(mix)-gt_percent-1.0 iter 17423 (epoch 13), loss = OrderedDict([('loss_ce', 0.262), ('loss_counter', 0.111), ('loss_bbox', 0.015), ('loss_giou', 0.195), ('loss_self_iou', 0.007), ('cardinality_error', 7.692), ('loss_ce_0', 0.259), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.208), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.692), ('loss_caption_0', 1.865), ('loss_caption', 1.881), ('total_loss', 10.261)]), time/iter = 0.713, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 17556 (epoch 13), loss = OrderedDict([('loss_ce', 0.252), ('loss_counter', 0.111), ('loss_bbox', 0.014), ('loss_giou', 0.193), ('loss_self_iou', 0.004), ('cardinality_error', 7.737), ('loss_ce_0', 0.253), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.209), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.737), ('loss_caption_0', 1.744), ('loss_caption', 1.743), ('total_loss', 9.707)]), time/iter = 0.168, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 17689 (epoch 13), loss = OrderedDict([('loss_ce', 0.259), ('loss_counter', 0.109), ('loss_bbox', 0.015), ('loss_giou', 0.199), ('loss_self_iou', 0.006), ('cardinality_error', 7.602), ('loss_ce_0', 0.262), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.211), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.602), ('loss_caption_0', 1.835), ('loss_caption', 1.819), ('total_loss', 10.1)]), time/iter = 0.160, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 17822 (epoch 13), loss = OrderedDict([('loss_ce', 0.25), ('loss_counter', 0.108), ('loss_bbox', 0.014), ('loss_giou', 0.191), ('loss_self_iou', 0.005), ('cardinality_error', 7.526), ('loss_ce_0', 0.249), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.206), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.526), ('loss_caption_0', 1.681), ('loss_caption', 1.67), ('total_loss', 9.397)]), time/iter = 0.152, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 17955 (epoch 13), loss = OrderedDict([('loss_ce', 0.255), ('loss_counter', 0.102), ('loss_bbox', 0.014), ('loss_giou', 0.184), ('loss_self_iou', 0.005), ('cardinality_error', 7.526), ('loss_ce_0', 0.252), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.2), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.526), ('loss_caption_0', 1.757), ('loss_caption', 1.745), ('total_loss', 9.658)]), time/iter = 0.159, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 18088 (epoch 13), loss = OrderedDict([('loss_ce', 0.251), ('loss_counter', 0.106), ('loss_bbox', 0.014), ('loss_giou', 0.177), ('loss_self_iou', 0.004), ('cardinality_error', 7.534), ('loss_ce_0', 0.251), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.191), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.534), ('loss_caption_0', 1.703), ('loss_caption', 1.701), ('total_loss', 9.39)]), time/iter = 0.154, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 18221 (epoch 13), loss = OrderedDict([('loss_ce', 0.252), ('loss_counter', 0.111), ('loss_bbox', 0.014), ('loss_giou', 0.201), ('loss_self_iou', 0.005), ('cardinality_error', 8.211), ('loss_ce_0', 0.252), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.213), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 8.211), ('loss_caption_0', 1.824), ('loss_caption', 1.816), ('total_loss', 10.053)]), time/iter = 0.161, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 18354 (epoch 13), loss = OrderedDict([('loss_ce', 0.253), ('loss_counter', 0.104), ('loss_bbox', 0.015), ('loss_giou', 0.195), ('loss_self_iou', 0.004), ('cardinality_error', 7.789), ('loss_ce_0', 0.249), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.21), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.789), ('loss_caption_0', 1.792), ('loss_caption', 1.779), ('total_loss', 9.874)]), time/iter = 0.157, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 18487 (epoch 13), loss = OrderedDict([('loss_ce', 0.255), ('loss_counter', 0.111), ('loss_bbox', 0.013), ('loss_giou', 0.19), ('loss_self_iou', 0.004), ('cardinality_error', 7.992), ('loss_ce_0', 0.251), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.205), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.992), ('loss_caption_0', 1.826), ('loss_caption', 1.81), ('total_loss', 9.979)]), time/iter = 0.162, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 18620 (epoch 13), loss = OrderedDict([('loss_ce', 0.251), ('loss_counter', 0.109), ('loss_bbox', 0.015), ('loss_giou', 0.193), ('loss_self_iou', 0.003), ('cardinality_error', 7.737), ('loss_ce_0', 0.251), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.206), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.737), ('loss_caption_0', 1.767), ('loss_caption', 1.771), ('total_loss', 9.784)]), time/iter = 0.153, bad_vid = 0.000 Validation results of iter 18662: Bleu_1:0.1916652028982354 Bleu_2:0.11864819375256218 Bleu_3:0.06801290454817709 Bleu_4:0.03421778123301331 METEOR:0.08890100804282676 ROUGE_L:0.17229926562968575 CIDEr:0.5719694906113042 Recall:0.3115151404333572 Precision:0.42734448265082836 soda_c:0.07979305036983636 para_Bleu_1:0.3972508455506424 para_Bleu_2:0.24317507500304622 para_Bleu_3:0.1497047997976745 para_Bleu_4:0.09437727320664267 para_METEOR:0.16651343432042678 para_ROUGE_L:0.33875534436877147 para_CIDEr:0.29220356232363026 overall score of iter 18662: 0.5530942698506998 Save model at iter 18662 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. Save info to info.json ID seq2-ft(mix)-gt_percent-1.0 iter 18753 (epoch 14), loss = OrderedDict([('loss_ce', 0.247), ('loss_counter', 0.115), ('loss_bbox', 0.013), ('loss_giou', 0.195), ('loss_self_iou', 0.004), ('cardinality_error', 8.241), ('loss_ce_0', 0.251), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.207), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 8.241), ('loss_caption_0', 1.758), ('loss_caption', 1.759), ('total_loss', 9.756)]), time/iter = 0.731, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 18886 (epoch 14), loss = OrderedDict([('loss_ce', 0.247), ('loss_counter', 0.103), ('loss_bbox', 0.015), ('loss_giou', 0.182), ('loss_self_iou', 0.004), ('cardinality_error', 7.436), ('loss_ce_0', 0.245), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.436), ('loss_caption_0', 1.696), ('loss_caption', 1.692), ('total_loss', 9.366)]), time/iter = 0.163, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 19019 (epoch 14), loss = OrderedDict([('loss_ce', 0.243), ('loss_counter', 0.104), ('loss_bbox', 0.013), ('loss_giou', 0.181), ('loss_self_iou', 0.003), ('cardinality_error', 7.692), ('loss_ce_0', 0.242), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.692), ('loss_caption_0', 1.73), ('loss_caption', 1.729), ('total_loss', 9.496)]), time/iter = 0.157, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 19152 (epoch 14), loss = OrderedDict([('loss_ce', 0.251), ('loss_counter', 0.112), ('loss_bbox', 0.014), ('loss_giou', 0.181), ('loss_self_iou', 0.006), ('cardinality_error', 7.82), ('loss_ce_0', 0.251), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.82), ('loss_caption_0', 1.779), ('loss_caption', 1.771), ('total_loss', 9.714)]), time/iter = 0.161, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 19285 (epoch 14), loss = OrderedDict([('loss_ce', 0.25), ('loss_counter', 0.105), ('loss_bbox', 0.014), ('loss_giou', 0.194), ('loss_self_iou', 0.004), ('cardinality_error', 7.669), ('loss_ce_0', 0.25), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.204), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.669), ('loss_caption_0', 1.76), ('loss_caption', 1.772), ('total_loss', 9.759)]), time/iter = 0.154, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 19418 (epoch 14), loss = OrderedDict([('loss_ce', 0.244), ('loss_counter', 0.115), ('loss_bbox', 0.013), ('loss_giou', 0.197), ('loss_self_iou', 0.004), ('cardinality_error', 8.256), ('loss_ce_0', 0.245), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.211), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 8.256), ('loss_caption_0', 1.754), ('loss_caption', 1.758), ('total_loss', 9.747)]), time/iter = 0.155, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 19551 (epoch 14), loss = OrderedDict([('loss_ce', 0.249), ('loss_counter', 0.109), ('loss_bbox', 0.013), ('loss_giou', 0.175), ('loss_self_iou', 0.004), ('cardinality_error', 7.865), ('loss_ce_0', 0.253), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.865), ('loss_caption_0', 1.68), ('loss_caption', 1.689), ('total_loss', 9.3)]), time/iter = 0.153, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 19684 (epoch 14), loss = OrderedDict([('loss_ce', 0.263), ('loss_counter', 0.104), ('loss_bbox', 0.015), ('loss_giou', 0.187), ('loss_self_iou', 0.005), ('cardinality_error', 7.474), ('loss_ce_0', 0.262), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.199), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.474), ('loss_caption_0', 1.81), ('loss_caption', 1.803), ('total_loss', 9.923)]), time/iter = 0.165, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 19817 (epoch 14), loss = OrderedDict([('loss_ce', 0.246), ('loss_counter', 0.106), ('loss_bbox', 0.014), ('loss_giou', 0.183), ('loss_self_iou', 0.005), ('cardinality_error', 7.526), ('loss_ce_0', 0.247), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.197), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.526), ('loss_caption_0', 1.769), ('loss_caption', 1.765), ('total_loss', 9.677)]), time/iter = 0.164, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 19950 (epoch 14), loss = OrderedDict([('loss_ce', 0.254), ('loss_counter', 0.108), ('loss_bbox', 0.013), ('loss_giou', 0.19), ('loss_self_iou', 0.005), ('cardinality_error', 7.797), ('loss_ce_0', 0.253), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.797), ('loss_caption_0', 1.736), ('loss_caption', 1.748), ('total_loss', 9.654)]), time/iter = 0.153, bad_vid = 0.000 Validation results of iter 19995: Bleu_1:0.19012877786294885 Bleu_2:0.11743680046097797 Bleu_3:0.06623934110461578 Bleu_4:0.03314975306654321 METEOR:0.08857227272587216 ROUGE_L:0.17208518718096077 CIDEr:0.5689998070546577 Recall:0.3090681299310951 Precision:0.43095498593310433 soda_c:0.08081534748318767 para_Bleu_1:0.3949292262433903 para_Bleu_2:0.24183495416706074 para_Bleu_3:0.1493168425692173 para_Bleu_4:0.0941904023418332 para_METEOR:0.16661877157717606 para_ROUGE_L:0.3391544295873436 para_CIDEr:0.3057631644012313 overall score of iter 19995: 0.5665723383202406 Save model at iter 19995 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. Save info to info.json ID seq2-ft(mix)-gt_percent-1.0 iter 20083 (epoch 15), loss = OrderedDict([('loss_ce', 0.254), ('loss_counter', 0.102), ('loss_bbox', 0.014), ('loss_giou', 0.186), ('loss_self_iou', 0.004), ('cardinality_error', 7.519), ('loss_ce_0', 0.257), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.197), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.519), ('loss_caption_0', 1.743), ('loss_caption', 1.756), ('total_loss', 9.655)]), time/iter = 0.703, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 20216 (epoch 15), loss = OrderedDict([('loss_ce', 0.244), ('loss_counter', 0.105), ('loss_bbox', 0.013), ('loss_giou', 0.179), ('loss_self_iou', 0.003), ('cardinality_error', 7.759), ('loss_ce_0', 0.244), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.193), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.759), ('loss_caption_0', 1.79), ('loss_caption', 1.781), ('total_loss', 9.713)]), time/iter = 0.168, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 20349 (epoch 15), loss = OrderedDict([('loss_ce', 0.246), ('loss_counter', 0.11), ('loss_bbox', 0.013), ('loss_giou', 0.19), ('loss_self_iou', 0.004), ('cardinality_error', 7.992), ('loss_ce_0', 0.245), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.992), ('loss_caption_0', 1.749), ('loss_caption', 1.759), ('total_loss', 9.675)]), time/iter = 0.159, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 20482 (epoch 15), loss = OrderedDict([('loss_ce', 0.244), ('loss_counter', 0.109), ('loss_bbox', 0.015), ('loss_giou', 0.193), ('loss_self_iou', 0.005), ('cardinality_error', 7.94), ('loss_ce_0', 0.244), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.207), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.94), ('loss_caption_0', 1.694), ('loss_caption', 1.715), ('total_loss', 9.502)]), time/iter = 0.155, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 20615 (epoch 15), loss = OrderedDict([('loss_ce', 0.257), ('loss_counter', 0.107), ('loss_bbox', 0.014), ('loss_giou', 0.188), ('loss_self_iou', 0.005), ('cardinality_error', 7.368), ('loss_ce_0', 0.257), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.368), ('loss_caption_0', 1.77), ('loss_caption', 1.771), ('total_loss', 9.775)]), time/iter = 0.156, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 20748 (epoch 15), loss = OrderedDict([('loss_ce', 0.247), ('loss_counter', 0.107), ('loss_bbox', 0.013), ('loss_giou', 0.178), ('loss_self_iou', 0.004), ('cardinality_error', 7.857), ('loss_ce_0', 0.247), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.857), ('loss_caption_0', 1.786), ('loss_caption', 1.773), ('total_loss', 9.695)]), time/iter = 0.153, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 20881 (epoch 15), loss = OrderedDict([('loss_ce', 0.243), ('loss_counter', 0.103), ('loss_bbox', 0.013), ('loss_giou', 0.178), ('loss_self_iou', 0.003), ('cardinality_error', 7.594), ('loss_ce_0', 0.242), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.594), ('loss_caption_0', 1.746), ('loss_caption', 1.748), ('total_loss', 9.541)]), time/iter = 0.155, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 21014 (epoch 15), loss = OrderedDict([('loss_ce', 0.249), ('loss_counter', 0.108), ('loss_bbox', 0.015), ('loss_giou', 0.19), ('loss_self_iou', 0.005), ('cardinality_error', 8.09), ('loss_ce_0', 0.249), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 8.09), ('loss_caption_0', 1.709), ('loss_caption', 1.698), ('total_loss', 9.49)]), time/iter = 0.149, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 21147 (epoch 15), loss = OrderedDict([('loss_ce', 0.246), ('loss_counter', 0.115), ('loss_bbox', 0.014), ('loss_giou', 0.186), ('loss_self_iou', 0.004), ('cardinality_error', 7.812), ('loss_ce_0', 0.248), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.198), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.812), ('loss_caption_0', 1.733), ('loss_caption', 1.732), ('total_loss', 9.57)]), time/iter = 0.155, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 21280 (epoch 15), loss = OrderedDict([('loss_ce', 0.246), ('loss_counter', 0.104), ('loss_bbox', 0.014), ('loss_giou', 0.187), ('loss_self_iou', 0.004), ('cardinality_error', 7.632), ('loss_ce_0', 0.245), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.197), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.632), ('loss_caption_0', 1.646), ('loss_caption', 1.658), ('total_loss', 9.233)]), time/iter = 0.152, bad_vid = 0.000 Validation results of iter 21328: Bleu_1:0.1927355202990476 Bleu_2:0.11755729236198051 Bleu_3:0.06532950485231373 Bleu_4:0.0318670348131602 METEOR:0.08966953019840175 ROUGE_L:0.17549405824640266 CIDEr:0.5708533801009449 Recall:0.31055728552993345 Precision:0.4412863394810881 soda_c:0.08079399116249976 para_Bleu_1:0.3847850395827542 para_Bleu_2:0.23591168028694995 para_Bleu_3:0.14500000021146267 para_Bleu_4:0.09097906463153684 para_METEOR:0.1633729521776342 para_ROUGE_L:0.33764324525807 para_CIDEr:0.3225522700715415 overall score of iter 21328: 0.5769042868807126 Save model at iter 21328 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. Save info to info.json ID seq2-ft(mix)-gt_percent-1.0 iter 21413 (epoch 16), loss = OrderedDict([('loss_ce', 0.24), ('loss_counter', 0.107), ('loss_bbox', 0.014), ('loss_giou', 0.175), ('loss_self_iou', 0.004), ('cardinality_error', 7.541), ('loss_ce_0', 0.239), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.541), ('loss_caption_0', 1.637), ('loss_caption', 1.633), ('total_loss', 9.069)]), time/iter = 0.698, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 21546 (epoch 16), loss = OrderedDict([('loss_ce', 0.245), ('loss_counter', 0.102), ('loss_bbox', 0.013), ('loss_giou', 0.172), ('loss_self_iou', 0.004), ('cardinality_error', 7.624), ('loss_ce_0', 0.243), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.185), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.624), ('loss_caption_0', 1.773), ('loss_caption', 1.784), ('total_loss', 9.621)]), time/iter = 0.162, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 21679 (epoch 16), loss = OrderedDict([('loss_ce', 0.239), ('loss_counter', 0.107), ('loss_bbox', 0.014), ('loss_giou', 0.181), ('loss_self_iou', 0.004), ('cardinality_error', 7.992), ('loss_ce_0', 0.238), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.992), ('loss_caption_0', 1.809), ('loss_caption', 1.805), ('total_loss', 9.791)]), time/iter = 0.162, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 21812 (epoch 16), loss = OrderedDict([('loss_ce', 0.246), ('loss_counter', 0.107), ('loss_bbox', 0.013), ('loss_giou', 0.179), ('loss_self_iou', 0.003), ('cardinality_error', 7.677), ('loss_ce_0', 0.25), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.013), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.677), ('loss_caption_0', 1.674), ('loss_caption', 1.676), ('total_loss', 9.277)]), time/iter = 0.157, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 21945 (epoch 16), loss = OrderedDict([('loss_ce', 0.244), ('loss_counter', 0.108), ('loss_bbox', 0.014), ('loss_giou', 0.192), ('loss_self_iou', 0.004), ('cardinality_error', 7.865), ('loss_ce_0', 0.244), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.206), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.865), ('loss_caption_0', 1.713), ('loss_caption', 1.714), ('total_loss', 9.531)]), time/iter = 0.155, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 22078 (epoch 16), loss = OrderedDict([('loss_ce', 0.251), ('loss_counter', 0.11), ('loss_bbox', 0.014), ('loss_giou', 0.19), ('loss_self_iou', 0.005), ('cardinality_error', 7.707), ('loss_ce_0', 0.247), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.707), ('loss_caption_0', 1.772), ('loss_caption', 1.758), ('total_loss', 9.738)]), time/iter = 0.159, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 22211 (epoch 16), loss = OrderedDict([('loss_ce', 0.249), ('loss_counter', 0.101), ('loss_bbox', 0.013), ('loss_giou', 0.18), ('loss_self_iou', 0.005), ('cardinality_error', 7.541), ('loss_ce_0', 0.249), ('loss_counter_0', 0.101), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.193), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.541), ('loss_caption_0', 1.665), ('loss_caption', 1.66), ('total_loss', 9.243)]), time/iter = 0.160, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 22344 (epoch 16), loss = OrderedDict([('loss_ce', 0.246), ('loss_counter', 0.113), ('loss_bbox', 0.015), ('loss_giou', 0.187), ('loss_self_iou', 0.004), ('cardinality_error', 8.008), ('loss_ce_0', 0.248), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 8.008), ('loss_caption_0', 1.799), ('loss_caption', 1.784), ('total_loss', 9.823)]), time/iter = 0.163, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 22477 (epoch 16), loss = OrderedDict([('loss_ce', 0.246), ('loss_counter', 0.102), ('loss_bbox', 0.013), ('loss_giou', 0.184), ('loss_self_iou', 0.004), ('cardinality_error', 7.699), ('loss_ce_0', 0.247), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.197), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.699), ('loss_caption_0', 1.722), ('loss_caption', 1.733), ('total_loss', 9.525)]), time/iter = 0.159, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 22610 (epoch 16), loss = OrderedDict([('loss_ce', 0.243), ('loss_counter', 0.106), ('loss_bbox', 0.014), ('loss_giou', 0.188), ('loss_self_iou', 0.004), ('cardinality_error', 7.729), ('loss_ce_0', 0.245), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.2), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.729), ('loss_caption_0', 1.664), ('loss_caption', 1.667), ('total_loss', 9.297)]), time/iter = 0.154, bad_vid = 0.000 Validation results of iter 22661: Bleu_1:0.1905629005997804 Bleu_2:0.11689699082903934 Bleu_3:0.06544029555928756 Bleu_4:0.03330988693345351 METEOR:0.08938496175202132 ROUGE_L:0.17298359351524648 CIDEr:0.5732307929342625 Recall:0.309604513071417 Precision:0.43046524955715343 soda_c:0.08056479007503722 para_Bleu_1:0.3975304274857351 para_Bleu_2:0.24253918136446623 para_Bleu_3:0.14848895422464012 para_Bleu_4:0.09337330751749118 para_METEOR:0.16677196164785574 para_ROUGE_L:0.33750187221117683 para_CIDEr:0.31278894258081524 overall score of iter 22661: 0.5729342117461622 Save model at iter 22661 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. Save info to info.json ID seq2-ft(mix)-gt_percent-1.0 iter 22743 (epoch 17), loss = OrderedDict([('loss_ce', 0.244), ('loss_counter', 0.108), ('loss_bbox', 0.015), ('loss_giou', 0.196), ('loss_self_iou', 0.005), ('cardinality_error', 7.714), ('loss_ce_0', 0.244), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.21), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.714), ('loss_caption_0', 1.773), ('loss_caption', 1.775), ('total_loss', 9.803)]), time/iter = 0.714, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 22876 (epoch 17), loss = OrderedDict([('loss_ce', 0.245), ('loss_counter', 0.11), ('loss_bbox', 0.013), ('loss_giou', 0.181), ('loss_self_iou', 0.004), ('cardinality_error', 7.774), ('loss_ce_0', 0.249), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.774), ('loss_caption_0', 1.76), ('loss_caption', 1.759), ('total_loss', 9.631)]), time/iter = 0.157, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 23009 (epoch 17), loss = OrderedDict([('loss_ce', 0.237), ('loss_counter', 0.105), ('loss_bbox', 0.012), ('loss_giou', 0.171), ('loss_self_iou', 0.003), ('cardinality_error', 7.872), ('loss_ce_0', 0.237), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.183), ('loss_self_iou_0', 0.003), ('cardinality_error_0', 7.872), ('loss_caption_0', 1.69), ('loss_caption', 1.688), ('total_loss', 9.229)]), time/iter = 0.161, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 23142 (epoch 17), loss = OrderedDict([('loss_ce', 0.242), ('loss_counter', 0.098), ('loss_bbox', 0.013), ('loss_giou', 0.177), ('loss_self_iou', 0.004), ('cardinality_error', 7.744), ('loss_ce_0', 0.239), ('loss_counter_0', 0.1), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.744), ('loss_caption_0', 1.66), ('loss_caption', 1.663), ('total_loss', 9.173)]), time/iter = 0.157, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 23275 (epoch 17), loss = OrderedDict([('loss_ce', 0.242), ('loss_counter', 0.108), ('loss_bbox', 0.014), ('loss_giou', 0.183), ('loss_self_iou', 0.004), ('cardinality_error', 7.82), ('loss_ce_0', 0.242), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.197), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.82), ('loss_caption_0', 1.727), ('loss_caption', 1.741), ('total_loss', 9.535)]), time/iter = 0.160, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 23408 (epoch 17), loss = OrderedDict([('loss_ce', 0.235), ('loss_counter', 0.104), ('loss_bbox', 0.014), ('loss_giou', 0.173), ('loss_self_iou', 0.004), ('cardinality_error', 7.083), ('loss_ce_0', 0.235), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.182), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.083), ('loss_caption_0', 1.678), ('loss_caption', 1.68), ('total_loss', 9.181)]), time/iter = 0.153, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 23541 (epoch 17), loss = OrderedDict([('loss_ce', 0.25), ('loss_counter', 0.112), ('loss_bbox', 0.013), ('loss_giou', 0.185), ('loss_self_iou', 0.003), ('cardinality_error', 7.782), ('loss_ce_0', 0.253), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.197), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.782), ('loss_caption_0', 1.686), ('loss_caption', 1.674), ('total_loss', 9.361)]), time/iter = 0.158, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 23674 (epoch 17), loss = OrderedDict([('loss_ce', 0.242), ('loss_counter', 0.104), ('loss_bbox', 0.013), ('loss_giou', 0.175), ('loss_self_iou', 0.004), ('cardinality_error', 7.699), ('loss_ce_0', 0.242), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.188), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.699), ('loss_caption_0', 1.734), ('loss_caption', 1.755), ('total_loss', 9.502)]), time/iter = 0.169, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 23807 (epoch 17), loss = OrderedDict([('loss_ce', 0.247), ('loss_counter', 0.109), ('loss_bbox', 0.013), ('loss_giou', 0.188), ('loss_self_iou', 0.004), ('cardinality_error', 8.023), ('loss_ce_0', 0.248), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.199), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 8.023), ('loss_caption_0', 1.838), ('loss_caption', 1.842), ('total_loss', 10.01)]), time/iter = 0.176, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 23940 (epoch 17), loss = OrderedDict([('loss_ce', 0.242), ('loss_counter', 0.107), ('loss_bbox', 0.013), ('loss_giou', 0.178), ('loss_self_iou', 0.004), ('cardinality_error', 7.789), ('loss_ce_0', 0.246), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.189), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.789), ('loss_caption_0', 1.661), ('loss_caption', 1.655), ('total_loss', 9.188)]), time/iter = 0.168, bad_vid = 0.000 Validation results of iter 23994: Bleu_1:0.19099469488969467 Bleu_2:0.11646897839764006 Bleu_3:0.06451308365995856 Bleu_4:0.032200079484133 METEOR:0.08912416771202449 ROUGE_L:0.1730757893125124 CIDEr:0.5693051160396969 Recall:0.3097042977992106 Precision:0.43274547601681085 soda_c:0.08084297498321232 para_Bleu_1:0.3924031546442418 para_Bleu_2:0.23911474626028398 para_Bleu_3:0.14600811918196227 para_Bleu_4:0.09107950853175292 para_METEOR:0.16594454181978452 para_ROUGE_L:0.33729101832099057 para_CIDEr:0.30892642009784 overall score of iter 23994: 0.5659504704493774 Save model at iter 23994 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. Save info to info.json ID seq2-ft(mix)-gt_percent-1.0 iter 24073 (epoch 18), loss = OrderedDict([('loss_ce', 0.244), ('loss_counter', 0.11), ('loss_bbox', 0.012), ('loss_giou', 0.178), ('loss_self_iou', 0.003), ('cardinality_error', 7.97), ('loss_ce_0', 0.246), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.013), ('loss_giou_0', 0.191), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.97), ('loss_caption_0', 1.689), ('loss_caption', 1.683), ('total_loss', 9.309)]), time/iter = 0.720, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 24206 (epoch 18), loss = OrderedDict([('loss_ce', 0.237), ('loss_counter', 0.118), ('loss_bbox', 0.013), ('loss_giou', 0.183), ('loss_self_iou', 0.005), ('cardinality_error', 8.286), ('loss_ce_0', 0.236), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.013), ('loss_giou_0', 0.195), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 8.286), ('loss_caption_0', 1.712), ('loss_caption', 1.715), ('total_loss', 9.432)]), time/iter = 0.161, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 24339 (epoch 18), loss = OrderedDict([('loss_ce', 0.245), ('loss_counter', 0.098), ('loss_bbox', 0.012), ('loss_giou', 0.167), ('loss_self_iou', 0.003), ('cardinality_error', 7.316), ('loss_ce_0', 0.247), ('loss_counter_0', 0.099), ('loss_bbox_0', 0.013), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.003), ('cardinality_error_0', 7.316), ('loss_caption_0', 1.695), ('loss_caption', 1.701), ('total_loss', 9.257)]), time/iter = 0.159, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 24472 (epoch 18), loss = OrderedDict([('loss_ce', 0.243), ('loss_counter', 0.108), ('loss_bbox', 0.013), ('loss_giou', 0.176), ('loss_self_iou', 0.003), ('cardinality_error', 7.459), ('loss_ce_0', 0.248), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.459), ('loss_caption_0', 1.699), ('loss_caption', 1.699), ('total_loss', 9.337)]), time/iter = 0.158, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 24605 (epoch 18), loss = OrderedDict([('loss_ce', 0.242), ('loss_counter', 0.103), ('loss_bbox', 0.014), ('loss_giou', 0.18), ('loss_self_iou', 0.004), ('cardinality_error', 7.812), ('loss_ce_0', 0.243), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.189), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.812), ('loss_caption_0', 1.775), ('loss_caption', 1.773), ('total_loss', 9.644)]), time/iter = 0.163, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 24738 (epoch 18), loss = OrderedDict([('loss_ce', 0.243), ('loss_counter', 0.101), ('loss_bbox', 0.016), ('loss_giou', 0.187), ('loss_self_iou', 0.004), ('cardinality_error', 7.556), ('loss_ce_0', 0.246), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.196), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.556), ('loss_caption_0', 1.727), ('loss_caption', 1.73), ('total_loss', 9.525)]), time/iter = 0.166, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 24871 (epoch 18), loss = OrderedDict([('loss_ce', 0.239), ('loss_counter', 0.104), ('loss_bbox', 0.013), ('loss_giou', 0.181), ('loss_self_iou', 0.004), ('cardinality_error', 7.692), ('loss_ce_0', 0.241), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.692), ('loss_caption_0', 1.77), ('loss_caption', 1.773), ('total_loss', 9.641)]), time/iter = 0.154, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 25004 (epoch 18), loss = OrderedDict([('loss_ce', 0.246), ('loss_counter', 0.109), ('loss_bbox', 0.013), ('loss_giou', 0.186), ('loss_self_iou', 0.004), ('cardinality_error', 8.143), ('loss_ce_0', 0.247), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.197), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 8.143), ('loss_caption_0', 1.692), ('loss_caption', 1.684), ('total_loss', 9.379)]), time/iter = 0.151, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 25137 (epoch 18), loss = OrderedDict([('loss_ce', 0.245), ('loss_counter', 0.111), ('loss_bbox', 0.014), ('loss_giou', 0.179), ('loss_self_iou', 0.004), ('cardinality_error', 7.88), ('loss_ce_0', 0.245), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.88), ('loss_caption_0', 1.691), ('loss_caption', 1.696), ('total_loss', 9.347)]), time/iter = 0.154, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 25270 (epoch 18), loss = OrderedDict([('loss_ce', 0.237), ('loss_counter', 0.103), ('loss_bbox', 0.014), ('loss_giou', 0.185), ('loss_self_iou', 0.004), ('cardinality_error', 7.767), ('loss_ce_0', 0.238), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.196), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.767), ('loss_caption_0', 1.687), ('loss_caption', 1.694), ('total_loss', 9.34)]), time/iter = 0.146, bad_vid = 0.000 Validation results of iter 25327: Bleu_1:0.19191750615066444 Bleu_2:0.11783589874301872 Bleu_3:0.06597231596326529 Bleu_4:0.03167603834812624 METEOR:0.08996609888818348 ROUGE_L:0.1746391859525846 CIDEr:0.5689023016363987 Recall:0.31503357525649683 Precision:0.4376628112951966 soda_c:0.08097707611185051 para_Bleu_1:0.3977375551078834 para_Bleu_2:0.24323062675170298 para_Bleu_3:0.1488548587270082 para_Bleu_4:0.09292110149283073 para_METEOR:0.16716298804356167 para_ROUGE_L:0.33781551083855066 para_CIDEr:0.31014493696748857 overall score of iter 25327: 0.570229026503881 Save model at iter 25327 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. Save info to info.json ID seq2-ft(mix)-gt_percent-1.0 iter 25403 (epoch 19), loss = OrderedDict([('loss_ce', 0.247), ('loss_counter', 0.102), ('loss_bbox', 0.013), ('loss_giou', 0.176), ('loss_self_iou', 0.005), ('cardinality_error', 7.429), ('loss_ce_0', 0.248), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.186), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.429), ('loss_caption_0', 1.705), ('loss_caption', 1.695), ('total_loss', 9.343)]), time/iter = 0.723, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 25536 (epoch 19), loss = OrderedDict([('loss_ce', 0.241), ('loss_counter', 0.107), ('loss_bbox', 0.013), ('loss_giou', 0.189), ('loss_self_iou', 0.003), ('cardinality_error', 7.887), ('loss_ce_0', 0.246), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.196), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.887), ('loss_caption_0', 1.717), ('loss_caption', 1.729), ('total_loss', 9.517)]), time/iter = 0.163, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 25669 (epoch 19), loss = OrderedDict([('loss_ce', 0.239), ('loss_counter', 0.111), ('loss_bbox', 0.014), ('loss_giou', 0.177), ('loss_self_iou', 0.004), ('cardinality_error', 7.707), ('loss_ce_0', 0.243), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.186), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.707), ('loss_caption_0', 1.718), ('loss_caption', 1.711), ('total_loss', 9.385)]), time/iter = 0.161, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 25802 (epoch 19), loss = OrderedDict([('loss_ce', 0.24), ('loss_counter', 0.111), ('loss_bbox', 0.013), ('loss_giou', 0.183), ('loss_self_iou', 0.004), ('cardinality_error', 8.173), ('loss_ce_0', 0.242), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.193), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 8.173), ('loss_caption_0', 1.732), ('loss_caption', 1.735), ('total_loss', 9.515)]), time/iter = 0.162, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 25935 (epoch 19), loss = OrderedDict([('loss_ce', 0.241), ('loss_counter', 0.105), ('loss_bbox', 0.013), ('loss_giou', 0.179), ('loss_self_iou', 0.005), ('cardinality_error', 7.82), ('loss_ce_0', 0.241), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.82), ('loss_caption_0', 1.626), ('loss_caption', 1.628), ('total_loss', 9.063)]), time/iter = 0.153, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 26068 (epoch 19), loss = OrderedDict([('loss_ce', 0.24), ('loss_counter', 0.102), ('loss_bbox', 0.014), ('loss_giou', 0.182), ('loss_self_iou', 0.005), ('cardinality_error', 7.444), ('loss_ce_0', 0.243), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.444), ('loss_caption_0', 1.697), ('loss_caption', 1.701), ('total_loss', 9.35)]), time/iter = 0.162, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 26201 (epoch 19), loss = OrderedDict([('loss_ce', 0.239), ('loss_counter', 0.097), ('loss_bbox', 0.014), ('loss_giou', 0.168), ('loss_self_iou', 0.005), ('cardinality_error', 7.301), ('loss_ce_0', 0.237), ('loss_counter_0', 0.099), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.301), ('loss_caption_0', 1.702), ('loss_caption', 1.703), ('total_loss', 9.254)]), time/iter = 0.161, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 26334 (epoch 19), loss = OrderedDict([('loss_ce', 0.238), ('loss_counter', 0.112), ('loss_bbox', 0.013), ('loss_giou', 0.174), ('loss_self_iou', 0.003), ('cardinality_error', 7.827), ('loss_ce_0', 0.242), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.188), ('loss_self_iou_0', 0.003), ('cardinality_error_0', 7.827), ('loss_caption_0', 1.729), ('loss_caption', 1.725), ('total_loss', 9.424)]), time/iter = 0.164, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 26467 (epoch 19), loss = OrderedDict([('loss_ce', 0.247), ('loss_counter', 0.109), ('loss_bbox', 0.014), ('loss_giou', 0.181), ('loss_self_iou', 0.003), ('cardinality_error', 8.023), ('loss_ce_0', 0.245), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.195), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 8.023), ('loss_caption_0', 1.751), ('loss_caption', 1.746), ('total_loss', 9.586)]), time/iter = 0.162, bad_vid = 0.000 ID seq2-ft(mix)-gt_percent-1.0 iter 26600 (epoch 19), loss = OrderedDict([('loss_ce', 0.242), ('loss_counter', 0.108), ('loss_bbox', 0.014), ('loss_giou', 0.186), ('loss_self_iou', 0.004), ('cardinality_error', 7.902), ('loss_ce_0', 0.242), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.196), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.902), ('loss_caption_0', 1.727), ('loss_caption', 1.737), ('total_loss', 9.533)]), time/iter = 0.156, bad_vid = 0.000 Validation results of iter 26660: Bleu_1:0.1908811984292725 Bleu_2:0.11664270449592412 Bleu_3:0.06546844271584715 Bleu_4:0.03266470081303028 METEOR:0.08981101020496235 ROUGE_L:0.17382953846907112 CIDEr:0.5716745559959934 Recall:0.31292035599338697 Precision:0.4345220728699943 soda_c:0.08127095018359767 para_Bleu_1:0.40170065588267356 para_Bleu_2:0.2447870245859959 para_Bleu_3:0.14990588787772124 para_Bleu_4:0.09419227635900729 para_METEOR:0.16780671784283924 para_ROUGE_L:0.33845945539662686 para_CIDEr:0.3198675630646056 overall score of iter 26660: 0.5818665572664521 Save model at iter 26660 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. Save info to info.json Best epoch: 11 Best Model Performance: Bleu_1:0.1989422607268001 Bleu_2:0.12223038556953512 Bleu_3:0.06835990671747892 Bleu_4:0.03486159828438583 METEOR:0.09408978838449876 ROUGE_L:0.18200142867223945 CIDEr:0.593480700759431 Recall:0.30795469953703025 Precision:0.4513424333993264 soda_c:0.0796861065455984 para_Bleu_1:0.39594509057043764 para_Bleu_2:0.24087109399513515 para_Bleu_3:0.14790262814870953 para_Bleu_4:0.09321042711819619 para_METEOR:0.1655617051143519 para_ROUGE_L:0.3391051008488012 para_CIDEr:0.32807196750555834 avg_proposal_number:-1 Best Overall Score epoch11: 1.5265537286258848