diff --git a/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml b/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..61d3e52456e1ea6b0d6726c159018cade8a37d34 --- /dev/null +++ b/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 20 +width_ratio: 1 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..a07a5868fc11247094f40d8d8350e9088832eb8d --- /dev/null +++ b/backup/cfgs/howto-anet_anet_clip_topk20_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 20 +width_ratio: 1 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/backup/cfgs/howto-anet_anet_clip_topk20_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml b/backup/cfgs/howto-anet_anet_clip_topk20_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..8d52bdad23f666dc1d065a692affd57950d91ee5 --- /dev/null +++ b/backup/cfgs/howto-anet_anet_clip_topk20_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 20 +width_ratio: 2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/backup/cfgs/howto-anet_anet_clip_topk20_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/backup/cfgs/howto-anet_anet_clip_topk20_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..6ba6d107bf6ea0c714bdcee68f97e22bf3d94f03 --- /dev/null +++ b/backup/cfgs/howto-anet_anet_clip_topk20_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 20 +width_ratio: 2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml b/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..b3dd203151d68d04d7367c1bb92602c4c9c44036 --- /dev/null +++ b/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 30 +width_ratio: 1 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..fcfaef85faa02aabfc3633861e2c3b3f88ec2b66 --- /dev/null +++ b/backup/cfgs/howto-anet_anet_clip_topk30_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 30 +width_ratio: 1 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/backup/cfgs/howto-anet_anet_clip_topk30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml b/backup/cfgs/howto-anet_anet_clip_topk30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..97fbc46c507b17e85a2c8ec633ac1d01645609d3 --- /dev/null +++ b/backup/cfgs/howto-anet_anet_clip_topk30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 30 +width_ratio: 2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/backup/cfgs/howto-anet_anet_clip_topk30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/backup/cfgs/howto-anet_anet_clip_topk30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..d01e18020cd386eb7f5db9fd00662eb3992740eb --- /dev/null +++ b/backup/cfgs/howto-anet_anet_clip_topk30_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 30 +width_ratio: 2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml b/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..1b4b417c3f5f2d549211c84b1587f94bafa5cf9a --- /dev/null +++ b/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 40 +width_ratio: 1 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..f144a283f2b705dfac08821fb6b1038d07d4fe7b --- /dev/null +++ b/backup/cfgs/howto-anet_anet_clip_topk40_r1_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 40 +width_ratio: 1 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/backup/cfgs/howto-anet_anet_clip_topk40_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml b/backup/cfgs/howto-anet_anet_clip_topk40_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..0141ba07a86d4a5fc85962f8185ca6771c0f149d --- /dev/null +++ b/backup/cfgs/howto-anet_anet_clip_topk40_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 40 +width_ratio: 2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/backup/cfgs/howto-anet_anet_clip_topk40_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml b/backup/cfgs/howto-anet_anet_clip_topk40_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml new file mode 100644 index 0000000000000000000000000000000000000000..6dde138a7b891c588267ba53f189063824fc4fbb --- /dev/null +++ b/backup/cfgs/howto-anet_anet_clip_topk40_r2_iter3_th2_refine_aug(8,0.02)_top3_2stage_inscap_puyu.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_puyu.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 40 +width_ratio: 2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/backup/cfgs_base/howto/base_howto-anet_anet_mixlm.yml b/backup/cfgs_base/howto/base_howto-anet_anet_mixlm.yml new file mode 100644 index 0000000000000000000000000000000000000000..889d543d5b813b8a574700f9ad209fd237144075 --- /dev/null +++ b/backup/cfgs_base/howto/base_howto-anet_anet_mixlm.yml @@ -0,0 +1,64 @@ +id: anet + +visual_feature_type: c3d +visual_feature_folder: 'data/anet/features/c3d' +feature_dim: 500 +invalid_video_json: [] +train_proposal_file: data/generated_proposals/dbg_trainval_top100.json +eval_proposal_file: data/generated_proposals/dbg_trainval_top100.json +gt_file_for_eval: ['data/anet/captiondata/val_1.json', 'data/anet/captiondata/val_2.json'] +gt_file_for_para_eval: ['data/anet/captiondata/para/anet_entities_val_1_para.json', 'data/anet/captiondata/para/anet_entities_val_2_para.json'] +train_caption_file: ['data/howto/captiondata/howto100m_train_mixlm.json', 'data/anet/captiondata/train_modified.json'] +val_caption_file: 'data/anet/captiondata/val_1.json' + +max_caption_len: 50 + +dict_file: data/howto/vocabulary_howto_rate2_mixlm_anet.json +vocab_size: 18884 +# dict_file_for_sim: data/howto/vocabulary_howto_rate5.json +# vocab_size: 8531 + + +train_proposal_type: gt +train_proposal_sample_num: 30 +sample_method: nearest + +epoch: 10 +batch_size: 1 +lr: 0.00005 +learning_rate_decay_start: 8 +learning_rate_decay_every: 3 +learning_rate_decay_rate: 0.5 +weight_decay: 0.0001 +save_all_checkpoint: 0 + +num_queries: 100 +dec_layers: 2 +enc_layers: 2 +transformer_ff_dim: 512 +transformer_dropout_prob: 0.1 +frame_embedding_num: 100 +caption_decoder_type: light +att_hid_size: 0 + +with_box_refine: 1 + +fix_xcw: 1 +set_cost_caption: 0 +set_cost_giou: 4 +set_cost_bbox: 0 +set_cost_class: 2 +self_iou_loss_coef: 0 +#cost_alpha: 0.5 +#cost_gamma: 1 +#focal_alpha: 0.5 +#focal_gamma: 1 +caption_loss_coef: 2 +giou_loss_coef: 4 +bbox_loss_coef: 0 +cls_loss_coef: 2 +count_loss_coef: 0.5 +max_eseq_length: 10 +lloss_cross_entropy: 0 +lloss_focal_loss: 0 +lloss_gau_mask: 1 \ No newline at end of file diff --git a/backup/cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml b/backup/cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml new file mode 100644 index 0000000000000000000000000000000000000000..36d19db653936c2342b12bfc603de32b2295e287 --- /dev/null +++ b/backup/cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml @@ -0,0 +1,61 @@ +id: yc2_tsn_pdvcl + +visual_feature_type: ['resnet', 'bn'] +visual_feature_folder: ['data/yc2/features/resnet_bn/', 'data/yc2/features/resnet_bn/'] +feature_dim: 3072 +invalid_video_json: [] +train_caption_file: ['data/howto/captiondata/howto100m_train_mixlm.json', 'data/yc2/captiondata/yc2_train.json'] +val_caption_file: 'data/yc2/captiondata/yc2_val.json' +gt_file_for_eval: ['data/yc2/captiondata/yc2_val.json'] +gt_file_for_para_eval: ['data/yc2/captiondata/para/para_yc2_val.json'] +max_caption_len: 50 + +dict_file: data/howto/vocabulary_howto_rate2_mixlm_yc2.json +vocab_size: 17447 +# dict_file_for_sim: data/howto/vocabulary_howto_rate5.json +# vocab_size: 8531 + + +train_proposal_type: gt +train_proposal_sample_num: 30 +sample_method: nearest + +epoch: 10 +batch_size: 1 +lr: 0.00005 +learning_rate_decay_start: 8 +learning_rate_decay_every: 3 +learning_rate_decay_rate: 0.5 +weight_decay: 0.0001 +save_all_checkpoint: 0 + +num_queries: 100 +dec_layers: 2 +enc_layers: 2 +transformer_ff_dim: 512 +transformer_dropout_prob: 0.1 +frame_embedding_num: 200 +caption_decoder_type: light +att_hid_size: 0 + +with_box_refine: 1 + +fix_xcw: 1 +set_cost_caption: 0 +set_cost_giou: 4 +set_cost_bbox: 0 +set_cost_class: 2 +self_iou_loss_coef: 0 +#cost_alpha: 0.5 +#cost_gamma: 1 +#focal_alpha: 0.5 +#focal_gamma: 1 +caption_loss_coef: 2 +giou_loss_coef: 4 +bbox_loss_coef: 0 +cls_loss_coef: 2 +count_loss_coef: 0.5 +max_eseq_length: 20 +lloss_cross_entropy: 0 +lloss_focal_loss: 0 +lloss_gau_mask: 1 \ No newline at end of file diff --git a/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_mixlm.yml b/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_mixlm.yml new file mode 100644 index 0000000000000000000000000000000000000000..87fd2de7d0e97619b3774084d4de97485ac0eedd --- /dev/null +++ b/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_mixlm.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet_mixlm.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_refine_mixlm.yml b/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_refine_mixlm.yml new file mode 100644 index 0000000000000000000000000000000000000000..da4400435c82abc6bc70c758bdbb8d52b9d68cc2 --- /dev/null +++ b/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_CLIP_refine_mixlm.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet_mixlm.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_mixlm.yml b/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_mixlm.yml new file mode 100644 index 0000000000000000000000000000000000000000..57427fb4f57b5c5b3518de52811ee7d31f799017 --- /dev/null +++ b/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_mixlm.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet_mixlm.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_refine_mixlm.yml b/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_refine_mixlm.yml new file mode 100644 index 0000000000000000000000000000000000000000..93a5cabefb35f8f40517cd74d3cee811008d4659 --- /dev/null +++ b/backup/cfgs_base/howto/howto-anet_anet_anc_(sim_op_order_v2)_UniVL_refine_mixlm.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet_mixlm.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_mixlm.yml b/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_mixlm.yml new file mode 100644 index 0000000000000000000000000000000000000000..6b3775fd1a72294b76078cdc49baf850ed5056ca --- /dev/null +++ b/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_mixlm.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet_mixlm.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_mixlm.yml b/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_mixlm.yml new file mode 100644 index 0000000000000000000000000000000000000000..48afb4f1afd1ca36d3e0c6689d2e7d562099cbeb --- /dev/null +++ b/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine_mixlm.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet_mixlm.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/CLIP_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_mixlm.yml b/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_mixlm.yml new file mode 100644 index 0000000000000000000000000000000000000000..62f4d92634f0885f13107332cc7e23b824ff8596 --- /dev/null +++ b/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_mixlm.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet_mixlm.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine_mixlm.yml b/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine_mixlm.yml new file mode 100644 index 0000000000000000000000000000000000000000..64486f4d57b00de9c7b0c403ff49749c7cccee4a --- /dev/null +++ b/backup/cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_UniVL_refine_mixlm.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-anet_anet_mixlm.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet/UniVL_feature/text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 2 +top_frames: 35 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_mixlm.yml b/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_mixlm.yml new file mode 100644 index 0000000000000000000000000000000000000000..29489663091e3ca5546b6d55f9937dc01ff97a8e --- /dev/null +++ b/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_mixlm.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_refine_mixlm.yml b/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_refine_mixlm.yml new file mode 100644 index 0000000000000000000000000000000000000000..bf6ced9cc86151f3c804201b38a127a8cb2f5381 --- /dev/null +++ b/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_CLIP_refine_mixlm.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_mixlm.yml b/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_mixlm.yml new file mode 100644 index 0000000000000000000000000000000000000000..472fdf294e8ba71f16708011d3d8529e5b32800c --- /dev/null +++ b/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_mixlm.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_refine_mixlm.yml b/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_refine_mixlm.yml new file mode 100644 index 0000000000000000000000000000000000000000..9505834e0b888345ad6b73283848e9717a58ac98 --- /dev/null +++ b/backup/cfgs_base/howto/howto-yc2_yc2_anc_(sim_op_order_v2)_UniVL_refine_mixlm.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 1 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_mixlm.yml b/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_mixlm.yml new file mode 100644 index 0000000000000000000000000000000000000000..753fdd06b9fbb789f5f5215ce9ae73d2f694bf12 --- /dev/null +++ b/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_mixlm.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_mixlm.yml b/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_mixlm.yml new file mode 100644 index 0000000000000000000000000000000000000000..09ca13a378e8eacef7f262ae789b9ffa7346c34e --- /dev/null +++ b/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_mixlm.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_mixlm_v0.yml b/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_mixlm_v0.yml new file mode 100644 index 0000000000000000000000000000000000000000..09ca13a378e8eacef7f262ae789b9ffa7346c34e --- /dev/null +++ b/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_CLIP_refine_mixlm_v0.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml + +visual_feature_type: ['CLIP'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder: ['/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/video/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: CLIP +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_mixlm.yml b/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_mixlm.yml new file mode 100644 index 0000000000000000000000000000000000000000..cae9be1bd91a74c7b263399c84dd4e0ff80849b4 --- /dev/null +++ b/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_mixlm.yml @@ -0,0 +1,42 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 0 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 0 +refine_pseudo_stage_num: 1 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 12 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_puyu.yml b/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_puyu.yml index 456af3710b005fecd7848bf8bfe73e7de8dd58df..798dc7b939e23386a864ee0ac3f53f2628b7138d 100644 --- a/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_puyu.yml +++ b/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_puyu.yml @@ -3,7 +3,7 @@ base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml visual_feature_type: ['UniVL'] visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] -text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +text_feature_folder: ['/mnt/data/pjlab-3090-sport/wuhao/features/howto100m/univl_features/text_puyu', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] feature_dim: 768 diff --git a/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine_mixlm.yml b/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine_mixlm.yml new file mode 100644 index 0000000000000000000000000000000000000000..36061ccd68f650aef565bc8ce8a31be53eebf41a --- /dev/null +++ b/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine_mixlm.yml @@ -0,0 +1,46 @@ +id: refine_aug(5,0.3)_top3_1stage +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_mixlm.yml + +visual_feature_type: ['UniVL'] +visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] +text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +feature_dim: 768 +hidden_dim: 512 + +use_pseudo_box: 1 +pseudo_box_aug: 1 +pseudo_box_aug_num: 5 +pseudo_box_aug_ratio: 0.3 +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +iteration: 3 +width_th: 2 +statistic_mode: mode +width_ratio: 1 +window_size: 3 +top_frames: 10 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 30 + +use_anchor: 0 +pretrained_language_model: UniVL +disable_contrastive_projection: 1 + +caption_decoder_type: standard +cap_nheads: 1 +cap_dec_n_points: 4 +cap_num_feature_levels: 4 +soft_attention: 1 +att_hid_size: 512 + +num_queries: 100 + +ec_alpha: 1.0 + +self_iou_loss_coef: 0.0 +ref_rank_loss_coef: 0.0 +contrastive_loss_start_coef: 0.0 diff --git a/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine_puyu.yml b/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine_puyu.yml index c6cdee3f0a43f9eee2898c610868aea88103ad9a..54a0ceb172e743011c5a9183cbc31c39d3084019 100644 --- a/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine_puyu.yml +++ b/backup/cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine_puyu.yml @@ -3,7 +3,7 @@ base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2_puyu.yml visual_feature_type: ['UniVL'] visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] -text_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] +text_feature_folder: ['/mnt/data/pjlab-3090-sport/wuhao/features/howto100m/univl_features/text_puyu', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] visual_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_visual/'] text_feature_folder_val: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text/'] feature_dim: 768 diff --git a/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml b/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml index 93346fbdee8e6aaf398c9c429b91cc825377c9aa..13ca9fd266ebd75d23197a8e51ad913227640b06 100644 --- a/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml +++ b/backup/cfgs_base/howto/howto_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml @@ -1,5 +1,5 @@ id: refine_aug(5,0.3)_top3_1stage -base_cfg_path: cfgs_base/howto/base_howto_yc2.yml +base_cfg_path: cfgs_base/howto/base_howto-yc2_yc2.yml visual_feature_type: ['UniVL'] visual_feature_folder: ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/visual'] diff --git a/backup/cfgs_ft_gt/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/backup/cfgs_ft_gt/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..01358882bfd7a3c849085a12e2b93b42012add45 --- /dev/null +++ b/backup/cfgs_ft_gt/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 30 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/backup/cfgs_ft_gt/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_rand2.yml b/backup/cfgs_ft_gt/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_rand2.yml new file mode 100644 index 0000000000000000000000000000000000000000..01358882bfd7a3c849085a12e2b93b42012add45 --- /dev/null +++ b/backup/cfgs_ft_gt/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_rand2.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 30 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk diff --git a/backup/cfgs_ft_gt/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml b/backup/cfgs_ft_gt/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml new file mode 100644 index 0000000000000000000000000000000000000000..ac09bd7b115aac8b96a46053f07ee52d43c4a165 --- /dev/null +++ b/backup/cfgs_ft_gt/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 25 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/backup/cfgs_ft_gt/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_rand2.yml b/backup/cfgs_ft_gt/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_rand2.yml new file mode 100644 index 0000000000000000000000000000000000000000..ac09bd7b115aac8b96a46053f07ee52d43c4a165 --- /dev/null +++ b/backup/cfgs_ft_gt/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap_rand2.yml @@ -0,0 +1,19 @@ +id: '' +base_cfg_path: cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml + + +pseudo_box_aug_num: 8 +pseudo_box_aug_ratio: 0.02 +pseudo_box_aug_mode: random_range +refine_pseudo_box: 1 +refine_pseudo_stage_num: 2 +merge_k_boxes: 3 +pseudo_box_type: similarity_op_order_v2 +top_frames: 25 +width_ratio: 1 +iteration: 3 +width_th: 1 +use_query_box_for_refine: 0 +gt_proposal_sample_num: 20 +mil_loss_coef: 0 +merge_criterion: ins_cap_topk \ No newline at end of file diff --git a/backup/change_config_add.py b/backup/change_config_add.py index 610c71dbf03a1817cda08454698805982df1f985..4b9ecff04cf568dba78df9a67a4a418abc9edf08 100644 --- a/backup/change_config_add.py +++ b/backup/change_config_add.py @@ -12,10 +12,12 @@ args = parser.parse_args() # Define the folder containing YAML files -folder_path = 'cfgs_ref' +# folder_path = 'cfgs_ref' # folder_path = 'cfgs_base/anet' # folder_path = 'cfgs' -file_filter = 'yc2' +folder_path = 'cfgs_yc2_ft_perc' + +file_filter = '' @@ -24,18 +26,18 @@ file_filter = 'yc2' # find_string = '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/clip_features/text_proj' # find_string = 'data/yc2/captiondata/yc2' # find_string = "/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2/UniVL_features/UniVL_text" -find_string = "UniVL_refine" +find_string = "ft_gt_percent: 0.25" # find_string = "pdvc_mode: 0" # replace_string = '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/visual' # replace_string = '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features/clip/text' # replace_string = 'data/tasty/captiondata/tasty' # replace_string = "cfgs_base/tasty/tasty_tsn_pdvcl.yml" -replace_string = "CLIP_refine" +replace_string = "ft_gt_percent: 0.75" # replace_string = "pdvc_mode: 1" -old_name = 'univl' -new_name = 'clip' +old_name = 'perc0.25' +new_name = 'perc0.75' def replace_yaml(yaml_file_path, new_file_path, old_string, new_string): # Read the YAML file as text diff --git a/backup/misc/__pycache__/utils.cpython-38.pyc b/backup/misc/__pycache__/utils.cpython-38.pyc index 9b3a8d5ea6440b3f900ca9cd1815a5cf81f0534c..5a07b1b2f7c2819d5dcfeb1e5e462ac236b6d940 100644 Binary files a/backup/misc/__pycache__/utils.cpython-38.pyc and b/backup/misc/__pycache__/utils.cpython-38.pyc differ diff --git a/backup/misc/utils.py b/backup/misc/utils.py index b3e979477f4d1a97c28daed7f5592ea6a0a59716..301a498189d0568ce14362b3630f2c89c2a26c6e 100644 --- a/backup/misc/utils.py +++ b/backup/misc/utils.py @@ -241,7 +241,7 @@ def build_folder(opt): save_foldername += '_C-layer' if 'puyu' in opt.train_caption_file[0]: save_foldername += '_puyu' - elif 'mix' in opt.train_caption_file[0]: + elif 'mixlm' in opt.train_caption_file[0]: save_foldername += '_mixlm' if opt.id != '': @@ -281,8 +281,13 @@ def build_folder(opt): return save_folder -def backup_envir(save_folder): +def backup_envir(save_folder, opt): + cfg_path = opt.cfg_path + dir_path = os.path.dirname(cfg_path) backup_folders = ['cfgs_base', 'cfgs', 'misc', 'pdvc'] + if dir_path not in backup_folders: + backup_folders.append(dir_path) + backup_files = glob.glob('./*.py') for folder in backup_folders: shutil.copytree(folder, os.path.join(save_folder, 'backup', folder)) diff --git a/backup/opts.py b/backup/opts.py index e2edf8fa4918e9b960cd26d0fa561d3b1155b4ff..8c0abaea05f6aefca9779237b1d3c555f10e45ec 100644 --- a/backup/opts.py +++ b/backup/opts.py @@ -269,6 +269,7 @@ def parse_opts(): # reranking parser.add_argument('--ec_alpha', type=float, default=0.3) + parser.add_argument('--test', action='store_true', default=False) args = parser.parse_args() if args.cfg_path: diff --git a/backup/pdvc/__pycache__/pdvc.cpython-38.pyc b/backup/pdvc/__pycache__/pdvc.cpython-38.pyc index f7bef286e186c9f27de1ea48197eee0fae6a7d6f..b5958f3c996e09fc92224c0dfbc6f1585d0c2b6c 100644 Binary files a/backup/pdvc/__pycache__/pdvc.cpython-38.pyc and b/backup/pdvc/__pycache__/pdvc.cpython-38.pyc differ diff --git a/backup/pdvc/__pycache__/video_segmentation.cpython-38.pyc b/backup/pdvc/__pycache__/video_segmentation.cpython-38.pyc index 3f2bec65e4730af469226f8efdb168b47da926ef..4a9e333fb5a96578c8f8c3017ccf7d80466fff6f 100644 Binary files a/backup/pdvc/__pycache__/video_segmentation.cpython-38.pyc and b/backup/pdvc/__pycache__/video_segmentation.cpython-38.pyc differ diff --git a/backup/pdvc/pdvc.py b/backup/pdvc/pdvc.py index c342477fb906acda08cf40a040eb45b2b9e901b8..4f7ffe3067b2a1382a79c0efc5a8ac828baa9c03 100644 --- a/backup/pdvc/pdvc.py +++ b/backup/pdvc/pdvc.py @@ -316,6 +316,8 @@ class PDVC(nn.Module): video_step_alignment = [align_frame_into_steps_op_v1(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), topk=self.opt.top_frames, scale=self.opt.width_ratio, beta=1, order=True, num_iterations=self.opt.iteration) for i in range(N)] elif self.opt.pseudo_box_type == 'similarity_op_order_v2': video_step_alignment = [align_frame_into_steps_op_order_v2(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), topk=self.opt.top_frames, threshold=self.opt.width_th, ratio=self.opt.width_ratio, iteration=self.opt.iteration) for i in range(N)] + elif self.opt.pseudo_box_type == 'similarity_op_v2': + video_step_alignment = [align_frame_into_steps_op_v2(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), topk=self.opt.top_frames, threshold=self.opt.width_th, ratio=self.opt.width_ratio, iteration=self.opt.iteration) for i in range(N)] elif self.opt.pseudo_box_type == 'weight_sim': if self.opt.width_ratio < 0: video_step_alignment = [step_retrieval_weight_sim(dt['video_tensor'][i], raw_text_embed[i].to(memory.device), \ diff --git a/backup/pdvc/video_segmentation.py b/backup/pdvc/video_segmentation.py index 93775df585e53204022fceb86c693516386b6023..dfa7f74cf0a858fc7dc8a929638fc294fc9bfc13 100644 --- a/backup/pdvc/video_segmentation.py +++ b/backup/pdvc/video_segmentation.py @@ -632,6 +632,65 @@ def align_frame_into_steps_op_order_v2(frame_features, step_features, topk=15, t return (best_bbox, min_loss) +def align_frame_into_steps_op_v2(frame_features, step_features, topk=15, threshold=0.5, ratio=1, iteration=3): + # breakpoint() + if step_features.shape[0] == 0: + return -np.ones(frame_features.shape[0]) + + sim = compute_sim(step_features, frame_features, config_eval_l2norm).cpu() + sorted_index = torch.argsort(-sim, dim=1) + top_indices_list_global = [sorted_index[i][:topk] for i in range(sim.shape[0])] + top_values_list_global = [sim[i][top_indices_list_global[i]] for i in range(sim.shape[0])] + + + uniform_boxes = uniform_window(frame_features.shape[0], step_features.shape[0]) + + iter_bbox_loss = {} + for iter in range(iteration): + # if iter == 0: + # refined_uniform_boxes = expand_window(uniform_boxes, frame_features.shape[0], step_features.shape[0], ratio) + # else: + # refined_uniform_boxes = expand_window(bbox, frame_features.shape[0], step_features.shape[0], ratio) # last bbox + + + # global: from all frames, local: from refined uniform boxes + + # top_indices_list_local = [sorted_index[i][(sorted_index[i] >= refined_uniform_boxes[i][0]) & (sorted_index[i] <= refined_uniform_boxes[i][1])][:topk] for i in range(sim.shape[0])] + # top_values_list_local = [sim[i][top_indices_list_local[i]] for i in range(sim.shape[0])] + + # size_local = [len(top_indices_list_local[i]) for i in range(sim.shape[0])] + # if sum(size_local) < (topk-2) * len(size_local): + # top_indices_list = top_indices_list_global + # top_values_list = top_values_list_global + # else: + # top_indices_list = top_indices_list_local + # top_values_list = top_values_list_local + + # top_indices_list = [top_indices_list_global[i] if len(top_indices_list_local[i]) < topk else top_indices_list_local[i] for i in range(sim.shape[0])] + + bbox = [] + for i in range(len(top_indices_list_global)): + filtered_indices = compute_filtered_indices(top_indices_list_global[i].tolist(), top_values_list_global[i].tolist(), threshold) + if len(filtered_indices) == 0: + filtered_indices = compute_filtered_indices(top_indices_list_global[i].tolist(), top_indices_list_global[i].tolist(), threshold) + if len(filtered_indices) == 0: + bbox.append(uniform_boxes[i]) + continue + bbox.append([min(filtered_indices), max(filtered_indices)]) + + # compute bbox loss + bbox_loss_list = [compute_bbox_loss(top_indices_list_global[i], bbox[i], top_values_list_global[i]) for i in range(len(top_indices_list_global))] + bbox_loss = sum(bbox_loss_list) + iter_bbox_loss[iter] = {'loss': bbox_loss, 'bbox': bbox} + + # select the minimum bbox loss and bbox as output + min_loss_iter = min(iter_bbox_loss.keys(), key=lambda k: iter_bbox_loss[k]['loss']) + min_loss = iter_bbox_loss[min_loss_iter]['loss'] + best_bbox = iter_bbox_loss[min_loss_iter]['bbox'] + + + return (best_bbox, min_loss) + # pesudo box 4: based on fixed window. the result is bad. give up diff --git a/backup/train.py b/backup/train.py index 8777c91ee32ec28365e2c7579d3d84fab8571135..43c0c73fd63d66eb7055f913723dd086ab80d288 100644 --- a/backup/train.py +++ b/backup/train.py @@ -48,8 +48,8 @@ def construct_save_path(opt, save_folder="/mnt/data/pjlab-3090-sport/wuhao/code/ if len(opt.train_caption_file) == 2: if 'puyu' in opt.train_caption_file[0]: elements.append('howto_puyu') - elif 'mix' in opt.train_caption_file[0]: - elements.append('howto_mix') + elif 'mixlm' in opt.train_caption_file[0]: + elements.append('howto_mixlm') else: elements.append('howto_llama2') elements.append('howto') @@ -65,8 +65,8 @@ def construct_save_path(opt, save_folder="/mnt/data/pjlab-3090-sport/wuhao/code/ elif 'howto' in opt.train_caption_file: if 'puyu' in opt.train_caption_file: elements.append('howto_puyu') - elif 'mix' in opt.train_caption_file: - elements.append('howto_mix') + elif 'mixlm' in opt.train_caption_file: + elements.append('howto_mixlm') else: elements.append('howto_llama2') # elements.append('howto') @@ -114,8 +114,97 @@ def train(opt): logger = create_logger(save_folder, 'train.log') tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary')) + # if use mixlm model + saved_path = construct_save_path(opt) + + if 'mixlm' in saved_path: + # text_feature_folder_mixlm = os.path.join(save_folder, 'text_feature') + mixlm_pbox_path = construct_save_path(opt, save_folder='test').replace('.json', '').replace('test/', '') + text_feature_folder_mixlm = os.path.join('/mnt/data/Gvlab/wuhao/code/tmp', 'mix_text_feature', mixlm_pbox_path) + os.makedirs(text_feature_folder_mixlm, exist_ok=True) + if 'clip' in save_folder or 'CLIP' in save_folder: + text_feature_folder_llama2 = map_path('/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj') + text_feature_folder_puyu = '/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip' + elif 'univl' in save_folder or 'UniVL' in save_folder or 'Uni' in save_folder: + text_feature_folder_llama2 = map_path('/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL/text') + text_feature_folder_puyu = '/mnt/data/pjlab-3090-sport/wuhao/features/howto100m/univl_features/text_puyu' + + if not os.path.exists(saved_path): + llama2_pbox_path = saved_path.replace('mixlm', 'llama2') + puyu_pbox_path = saved_path.replace('mixlm', 'puyu') + with open(llama2_pbox_path, 'r') as f: + llama2_pbox = json.load(f) + with open(puyu_pbox_path, 'r') as f: + puyu_pbox = json.load(f) + + mixlm_pbox = {} + for video_key in llama2_pbox.keys(): + if llama2_pbox.get(video_key) is None and puyu_pbox.get(video_key) is None: + mixlm_pbox[video_key] = None + elif llama2_pbox.get(video_key) is None: + mixlm_pbox[video_key] = {'box': puyu_pbox[video_key]['box'], 'loss': puyu_pbox[video_key]['loss'], 'llm': 'puyu'} + elif puyu_pbox.get(video_key) is None: + mixlm_pbox[video_key] = {'box': llama2_pbox[video_key]['box'], 'loss': llama2_pbox[video_key]['loss'], 'llm': 'llama2'} + else: + if llama2_pbox[video_key]['loss'] < puyu_pbox[video_key]['loss']: + mixlm_pbox[video_key] = {'box': llama2_pbox[video_key]['box'], 'loss': llama2_pbox[video_key]['loss'], 'llm': 'llama2'} + else: + mixlm_pbox[video_key] = {'box': puyu_pbox[video_key]['box'], 'loss': puyu_pbox[video_key]['loss'], 'llm': 'puyu'} + with open(saved_path, 'w') as f: + json.dump(mixlm_pbox, f) + + with open(saved_path, 'r') as f: + mixlm_pbox = json.load(f) + with open('data/howto/captiondata/howto100m_train_puyu.json', 'r') as f: + meta_puyu = json.load(f) + with open('data/howto/captiondata/howto100m_train.json', 'r') as f: + meta_llama2 = json.load(f) + + meta_mixlm = {} + for video_key in mixlm_pbox.keys(): + if mixlm_pbox.get(video_key) is not None and (meta_llama2.get(video_key) is not None or meta_puyu.get(video_key) is not None): + if mixlm_pbox[video_key]['llm'] == 'llama2': + meta_mixlm[video_key] = meta_llama2[video_key] + llama2_feature_path = os.path.join(text_feature_folder_llama2, video_key + '.npy') + if not os.path.exists(llama2_feature_path): + continue + # if os.path.exists(llama2_feature_path): + # os.unlink(llama2_feature_path) + # if not os.path.exists(llama2_feature_path): + # os.symlink(llama2_feature_path, os.path.join(text_feature_folder_mixlm, video_key + '.npy')) + soft_link_path = os.path.join(text_feature_folder_mixlm, video_key + '.npy') + # if os.path.exists(soft_link_path): + # os.unlink(soft_link_path) + if not os.path.exists(soft_link_path): + # print(os.path.exists(soft_link_path), os.path.exists(llama2_feature_path)) + os.symlink(llama2_feature_path, soft_link_path) + # text_feature = np.load(llama2_feature_path) + # if text_feature.shape[0] != len(meta_llama2[video_key]['sentences']): + # print(f"{video_key} has {text_feature.shape[0]} sentences, but {len(meta_llama2[video_key]['sentences'])} sentences found in meta file") + else: + meta_mixlm[video_key] = meta_puyu[video_key] + puyu_feature_path = os.path.join(text_feature_folder_puyu, video_key + '.npy') + if not os.path.exists(puyu_feature_path): + continue + + soft_link_path = os.path.join(text_feature_folder_mixlm, video_key + '.npy') + + # if os.path.exists(soft_link_path): + # os.unlink(soft_link_path) + if not os.path.exists(soft_link_path): + os.symlink(puyu_feature_path, soft_link_path) + # text_feature = np.load(puyu_feature_path) + # if text_feature.shape[0] != len(meta_puyu[video_key]['sentences']): + # print(f"{video_key} has {text_feature.shape[0]} sentences, but {len(meta_puyu[video_key]['sentences'])} sentences found in meta file") + with open(os.path.join(save_folder, 'train_caption_mixlm.json'), 'w') as f: + json.dump(meta_mixlm, f) + opt.train_caption_file[0] = os.path.join(save_folder, 'train_caption_mixlm.json') + opt.text_feature_folder[0] = text_feature_folder_mixlm + # pass + + if not opt.start_from: - backup_envir(save_folder) + backup_envir(save_folder, opt) logger.info('backup evironment completed !') saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}} @@ -136,6 +225,8 @@ def train(opt): if prev_opt.get(opt_name) != vars(opt).get(opt_name): logger.info('Change opt {} : {} --> {}'.format(opt_name, prev_opt.get(opt_name), vars(opt).get(opt_name))) + print(opt.text_feature_folder) + print(opt.train_caption_file) if len(opt.visual_feature_folder) == 2: train_dataset_1 = PropSeqDataset(opt.train_caption_file[0], [opt.visual_feature_folder[0]], @@ -195,7 +286,6 @@ def train(opt): model.train() # try to load saved pbox - saved_path = construct_save_path(opt) if os.path.exists(saved_path): try: with open(saved_path, 'r') as f: @@ -322,10 +412,11 @@ def train(opt): # if dt['video_key'][0] != 'LGArj9Do0xc': # continue # # for fast debugging - # if trained_samples > 5: - # break - # else: - # trained_samples += 1 + if opt.test: + if trained_samples > 5: + break + else: + trained_samples += 1 # if trained_samples < 1714: # trained_samples += 1 # continue @@ -486,7 +577,7 @@ def train(opt): epoch += 1 - if epoch == 1 and model.pseudo_boxes is not None and 'hyper' not in opt.train_caption_file[0]: + if epoch == 1 and model.pseudo_boxes is not None and 'mixlm' not in opt.train_caption_file[0]: # save the pseudo boxes pbox_save_path = construct_save_path(opt) if not os.path.exists(pbox_save_path): diff --git a/backup/train_fewshot.py b/backup/train_fewshot.py index db60bfe68fc32d3da5df89f5af1201a7151a3e8a..d35b3feefc80f1a87e4fb30394702c28d04472d6 100644 --- a/backup/train_fewshot.py +++ b/backup/train_fewshot.py @@ -68,7 +68,7 @@ def train(opt): tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary')) if not opt.start_from: - backup_envir(save_folder) + backup_envir(save_folder, opt) logger.info('backup evironment completed !') saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}} diff --git a/backup/train_ft2_gt.py b/backup/train_ft2_gt.py index b007713a2ebbdae00dd0edaef54c41a3260279dd..b767f5c2525ed10b6551ba02a5551bafe0f1737e 100644 --- a/backup/train_ft2_gt.py +++ b/backup/train_ft2_gt.py @@ -147,7 +147,7 @@ def train(opt): tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary')) if not opt.start_from: - backup_envir(save_folder) + backup_envir(save_folder, opt) logger.info('backup evironment completed !') saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}} @@ -190,16 +190,17 @@ def train(opt): # train_dataset.translator = train_dataset_1.translator else: - print('the script only support two dataset for pretrain and target task respectively') - exit(1) + # print('the script only support two dataset for pretrain and target task respectively') + # exit(1) train_dataset_target = PropSeqDataset(opt.train_caption_file, opt.visual_feature_folder, opt.text_feature_folder, opt.dict_file, True, 'gt', opt) - train_loader_target = DataLoader(train_dataset_target, batch_size=opt.batch_size, + subset_data = PercentageSubsetDataset(train_dataset_target, opt.ft_gt_percent) + train_loader_target = DataLoader(subset_data, batch_size=opt.batch_size, shuffle=True, num_workers=opt.nthreads, collate_fn=collate_fn, worker_init_fn=_init_fn) - train_dataloaders = [train_loader_target] + # train_dataloaders = [train_loader_target] # val_dataset = PropSeqDataset(opt.val_caption_file, # opt.visual_feature_folder, diff --git a/backup/train_pre_ft_gt.py b/backup/train_pre_ft_gt.py index 4e6c204b58c0fed4cca87004c6816d7830cee1cc..9440eb8b4b86d2123a997285686e704425519a3f 100644 --- a/backup/train_pre_ft_gt.py +++ b/backup/train_pre_ft_gt.py @@ -45,7 +45,7 @@ import copy a100_folder = ['/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/youcook2', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/Tasty/features', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Tasty/UniVL_feature', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/huabin/dataset/Anet', '/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features'] r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/features/tasty', '/mnt/data/Gvlab/wuhao/features/tasty/univl', '/mnt/data/Gvlab/wuhao/features/anet', '/mnt/data/Gvlab/wuhao/features/howto100m'] -pretrain_data_mode = 'single' # 'mix' or 'seq' or 'single' +pretrain_data_mode = 'mix' # 'mix' or 'seq' or 'single' # /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features -> /mnt/data/Gvlab/wuhao/features/howto100m # /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features @@ -122,7 +122,7 @@ def train(opt): tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary')) if not opt.start_from: - backup_envir(save_folder) + backup_envir(save_folder, opt) logger.info('backup evironment completed !') saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}} diff --git a/backup/train_pre_perc.py b/backup/train_pre_perc.py index 909dcdece82848854abf5f774b1d5f848f0a49eb..15f50480e382fc5704c5a6e019594b9478bcca11 100644 --- a/backup/train_pre_perc.py +++ b/backup/train_pre_perc.py @@ -47,6 +47,48 @@ r3090_folder = ['/mnt/data/Gvlab/wuhao/features/yc2', '/mnt/data/Gvlab/wuhao/fea # /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip -> /mnt/data/Gvlab/wuhao/features/howto100m/clip_features # /cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/UniVL -> /mnt/data/Gvlab/wuhao/features/howto100m/univl_features +def construct_save_path(opt, save_folder="/mnt/data/pjlab-3090-sport/wuhao/code/dibs/pbox"): + elements = [] + # breakpoint() + if len(opt.train_caption_file) == 2: + if 'puyu' in opt.train_caption_file[0]: + elements.append('howto_puyu') + elif 'mixlm' in opt.train_caption_file[0]: + elements.append('howto_mixlm') + else: + elements.append('howto_llama2') + elements.append('howto') + if 'yc2' in opt.train_caption_file[1]: + elements.append('yc2') + elif 'anet' in opt.train_caption_file[1]: + elements.append('anet') + else: + if 'yc2' in opt.train_caption_file: + elements.append('yc2') + elif 'anet' in opt.train_caption_file: + elements.append('anet') + elif 'howto' in opt.train_caption_file: + if 'puyu' in opt.train_caption_file: + elements.append('howto_puyu') + elif 'mixlm' in opt.train_caption_file: + elements.append('howto_mixlm') + else: + elements.append('howto_llama2') + # elements.append('howto') + + if 'clip' in opt.visual_feature_folder[0] or 'CLIP' in opt.visual_feature_folder[0]: + elements.append('clip') + elif 'UniVL' in opt.visual_feature_folder[0] or 'univl' in opt.visual_feature_folder[0]: + elements.append('univl') + # add pbox parameters + pbox_type = "simop_v2" if opt.pseudo_box_type == "similarity_op_order_v2" else "simop" + elements.append(pbox_type) + elements.append(f"top{opt.top_frames}") + elements.append(f"r{opt.width_ratio}") + elements.append(f"iter{opt.iteration}") + elements.append(f"th{opt.width_th}") + return os.path.join(save_folder, '_'.join(elements) + '.json') + def seed_worker(worker_id): worker_seed = torch.initial_seed() % 2**32 np.random.seed(worker_seed) @@ -70,8 +112,53 @@ def train(opt): logger = create_logger(save_folder, 'train.log') tf_writer = SummaryWriter(os.path.join(save_folder, 'tf_summary')) + # if use mixlm model + saved_path = construct_save_path(opt) + + if 'mixlm' in saved_path: + text_feature_folder_mixlm = os.path.join(save_folder, 'text_feature') + os.makedirs(text_feature_folder_mixlm, exist_ok=True) + if 'clip' in save_folder: + text_feature_folder_llama2 = map_path('/cpfs01/shared/Gvlab-A100/Gvlab-A100_hdd/wuhao/howto100m/features/clip/text_proj') + text_feature_folder_puyu = '/mnt/data/Gvlab/wuhao/code/clip_frame_feature_extraction/features/howto100m/clip' + elif 'univl' in save_folder: + text_feature_folder_llama2 = '/mnt/data/Gvlab/wuhao/features/howto100m/univl_features' + text_feature_folder_puyu = '/mnt/data/Gvlab/wuhao/features/howto100m/univl_features' + + if not os.path.exists(saved_path): + llama2_pbox_path = saved_path.replace('mixlm', 'llama2') + puyu_pbox_path = saved_path.replace('mixlm', 'puyu') + with open(llama2_pbox_path, 'r') as f: + llama2_pbox = json.load(f) + with open(puyu_pbox_path, 'r') as f: + puyu_pbox = json.load(f) + + mixlm_pbox = {} + for video_key in llama2_pbox.keys(): + if llama2_pbox[video_key] is None and puyu_pbox[video_key] is None: + mixlm_pbox[video_key] = None + else: + if llama2_pbox[video_key]['loss'] < puyu_pbox[video_key]['loss']: + mixlm_pbox[video_key] = {'pbox': llama2_pbox[video_key]['pbox'], 'loss': llama2_pbox[video_key]['loss'], 'llm': 'llama2'} + else: + mixlm_pbox[video_key] = {'pbox': puyu_pbox[video_key]['pbox'], 'loss': puyu_pbox[video_key]['loss'], 'llm': 'puyu'} + with open(saved_path, 'w') as f: + json.dump(mixlm_pbox, f) + + with open(saved_path, 'r') as f: + mixlm_pbox = json.load(f) + for video_key in mixlm_pbox.keys(): + if mixlm_pbox[video_key] is not None: + if mixlm_pbox[video_key]['llm'] == 'llama2': + llama2_feature_path = os.path.join(text_feature_folder_llama2, video_key + '.npy') + os.symlink(llama2_feature_path, os.path.join(text_feature_folder_mixlm, video_key + '.npy')) + else: + puyu_feature_path = os.path.join(text_feature_folder_puyu, video_key + '.npy') + os.symlink(puyu_feature_path, os.path.join(text_feature_folder_mixlm, video_key + '.npy')) + opt.text_feature_folder[0] = text_feature_folder_mixlm + if not opt.start_from: - backup_envir(save_folder) + backup_envir(save_folder, opt) logger.info('backup evironment completed !') saved_info = {'best': {}, 'last': {}, 'history': {}, 'eval_history': {}} @@ -430,30 +517,52 @@ def train(opt): torch.cuda.empty_cache() # Stop criterion if epoch >= opt.epoch: - # load Best model and conduct evaluation - print('====== Conduct the Final Evaluation to test Best Checkpoint ======') - val_logger = create_logger(save_folder, 'val.log') - loaded_pth = torch.load(os.path.join(save_folder, 'model-best.pth'), map_location='cuda') - model.load_state_dict(loaded_pth['model'], strict=True) - model.eval() - result_json_path = saved_info['best']['result_json_path'] - eval_score, _ = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) - if opt.caption_decoder_type == 'none': - current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) - else: - if opt.criteria_for_best_ckpt == 'dvc': - current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean() - else: - current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean() + # # load Best model and conduct evaluation + # print('====== Conduct the Final Evaluation to test Best Checkpoint ======') + # val_logger = create_logger(save_folder, 'val.log') + # loaded_pth = torch.load(os.path.join(save_folder, 'model-best.pth'), map_location='cuda') + # model.load_state_dict(loaded_pth['model'], strict=True) + # model.eval() + # result_json_path = saved_info['best']['result_json_path'] + # eval_score, _ = evaluate(model, criterion, postprocessors, val_loader, result_json_path, logger=logger, args=opt, alpha=opt.ec_alpha, device=opt.device, debug=opt.debug) + # if opt.caption_decoder_type == 'none': + # current_score = 2./(1./eval_score['Precision'] + 1./eval_score['Recall']) + # else: + # if opt.criteria_for_best_ckpt == 'dvc': + # current_score = np.array(eval_score['METEOR']).mean() + np.array(eval_score['soda_c']).mean() + # else: + # current_score = np.array(eval_score['para_METEOR']).mean() + np.array(eval_score['para_CIDEr']).mean() + np.array(eval_score['para_Bleu_4']).mean() - _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)] - print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()]) - val_logger.info('Best-model is saved at iter {}.\n'.format(saved_info['best']['iter'])) + # _ = [item.append(np.array(item).mean()) for item in eval_score.values() if isinstance(item, list)] + # print_info = '\n'.join([key + ":" + str(eval_score[key]) for key in eval_score.keys()]) + # val_logger.info('Best-model is saved at iter {}.\n'.format(saved_info['best']['iter'])) + # val_logger.info('\nBest Model Performance:\n' + print_info) + # val_logger.info('\nBest Overall Score {}: {}\n'.format(iteration, current_score)) + + # tf_writer.close() + # break + + val_logger = create_logger(save_folder, 'val.log') + infos_path = os.path.join(save_folder, 'info.json') + + with open(infos_path, 'r') as f: + data = json.load(f) + val_history = data['history']['val_result_history'] + + metric_sum = {} + metrics = ['METEOR', 'CIDEr', 'soda_c', 'Precision', 'Recall'] + for k, v in val_history.items(): + metric_sum[k] = sum([v['eval_score'][metric] for metric in metrics]) + # print(f"{k}: {metric_sum[k]}") + + best_epoch = max(metric_sum, key=metric_sum.get) + best_val_score = val_history[best_epoch]['eval_score'] + val_logger.info(f"Best epoch: {best_epoch}") + print_info = '\n'.join([key + ":" + str(best_val_score[key]) for key in best_val_score.keys()]) val_logger.info('\nBest Model Performance:\n' + print_info) - val_logger.info('\nBest Overall Score {}: {}\n'.format(iteration, current_score)) + val_logger.info('\nBest Overall Score epoch{}: {}\n'.format(best_epoch, metric_sum[best_epoch])) - tf_writer.close() - break + break return saved_info diff --git a/info.json b/info.json index 882983ce3374f2dc0e07ba72cc8c953647b5ce11..b0ef7913dc5040eadde57551f13d9f8da312bcff 100644 --- a/info.json +++ b/info.json @@ -1 +1 @@ -{"best": {"opt": {"cfg_path": "cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml", "id": "seq2-ft(mix)-gt_percent-1.0", "gpu_id": [], "disable_tqdm": false, "seed": 777, "random_seed": false, "disable_cudnn": 0, "debug": false, "device": "cuda", "map": true, "train_caption_file": ["data/howto/captiondata/howto100m_train.json", "data/anet/captiondata/train_modified.json"], "invalid_video_json": [], "val_caption_file": "data/anet/captiondata/val_1.json", "visual_feature_folder": ["/mnt/data/Gvlab/wuhao/features/howto100m/clip/visual", "/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/visual/"], "text_feature_folder": ["/mnt/data/Gvlab/wuhao/features/howto100m/clip/text_proj", "/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/text/"], "gt_file_for_auc": "data/anet/captiondata/val_all.json", "gt_file_for_eval": ["data/anet/captiondata/val_1.json", "data/anet/captiondata/val_2.json"], "gt_file_for_para_eval": ["data/anet/captiondata/para/anet_entities_val_1_para.json", "data/anet/captiondata/para/anet_entities_val_2_para.json"], "dict_file": "data/howto/vocabulary_howto_rate2_anet.json", "criteria_for_best_ckpt": "overall", "visual_feature_type": ["CLIP"], "feature_dim": 768, "start_from": "", "start_from_mode": "last", "pretrain": null, "pretrain_path": "", "nthreads": 4, "data_norm": 0, "data_rescale": 1, "feature_sample_rate": 1, "train_proposal_sample_num": 30, "gt_proposal_sample_num": 20, "ft_gt_percent": 1.0, "pre_percent": 1.0, "vocab_size": 16221, "wordRNN_input_feats_type": "C", "caption_decoder_type": "standard", "rnn_size": 512, "num_layers": 1, "input_encoding_size": 512, "att_hid_size": 512, "drop_prob": 0.5, "max_caption_len": 50, "hidden_dim": 512, "num_queries": 100, "hidden_dropout_prob": 0.5, "layer_norm_eps": 1e-12, "caption_cost_type": "loss", "set_cost_caption": 0, "set_cost_class": 2, "set_cost_bbox": 0, "set_cost_giou": 4, "cost_alpha": 0.25, "cost_gamma": 2, "bbox_loss_coef": 0, "giou_loss_coef": 4, "count_loss_coef": 0.5, "caption_loss_coef": 2, "eos_coef": 0.1, "num_classes": 1, "dec_layers": 2, "enc_layers": 2, "transformer_ff_dim": 512, "transformer_dropout_prob": 0.1, "frame_embedding_num": 100, "sample_method": "nearest", "fix_xcw": 1, "use_anchor": 0, "random_anchor_init": true, "prior_anchor_duration_init": true, "matcher_type": "default", "pretrained_language_model": "CLIP", "text_hidden_dim": 768, "max_text_input_len": 32, "max_pos_num": 500, "huggingface_cache_dir": ".cache", "text_encoder_learning_strategy": "frozen", "use_pseudo_box": false, "pseudo_box_type": "similarity_op_order_v2", "top_frames": 30, "window_size": 2, "statistic_mode": "mode", "width_ratio": 1, "beta": 1, "width_th": 1, "iteration": 3, "pseudo_box_aug": false, "pseudo_box_aug_num": 8, "pseudo_box_aug_ratio": 0.02, "pseudo_box_aug_mode": "random_range", "refine_pseudo_box": false, "use_additional_score_layer": false, "use_additional_cap_layer": false, "merge_k_boxes": 3, "merge_criterion": "ins_cap_topk", "merge_mode": "weighted_sum", "refine_pseudo_stage_num": 2, "use_query_box_for_refine": 0, "norm_ins_score": "sigmoid", "cap_prob_clip": false, "use_neg_pseudo_box": false, "num_neg_box": 10, "weighted_mil_loss": false, "focal_mil": false, "disable_rematch": false, "start_refine_epoch": -1, "align_keep_percentile": 0.1, "align_top_band_size": 0, "align_drop_z": 0, "align_one_to_many": false, "align_many_to_one": false, "align_contiguous": false, "set_cost_sim": 1.0, "enable_contrastive": false, "disable_contrastive_projection": 1, "contrastive_hidden_size": 128, "contrastive_loss_start_coef": 0.0, "contrastive_loss_temperature": 0.1, "enable_cross_video_cl": true, "enable_e2t_cl": true, "enable_bg_for_cl": true, "set_cost_cl": 0.0, "cl_schedule_val": [0, 0.1], "cl_schedule_time": [0, 2], "prior_manner": "all", "training_scheme": "all", "epoch": 20, "batch_size": 1, "batch_size_for_eval": 1, "grad_clip": 100.0, "optimizer_type": "adam", "weight_decay": 0.0001, "lr": 5e-05, "learning_rate_decay_start": 8, "learning_rate_decay_every": 3, "learning_rate_decay_rate": 0.5, "min_epoch_when_save": -1, "save_checkpoint_every": 1, "save_all_checkpoint": 0, "save_dir": "/mnt/data/pjlab-3090-sport/wuhao/logs/dibs", "lr_backbone_names": ["None"], "lr_backbone": 2e-05, "lr_proj": 0, "lr_linear_proj_names": ["reference_points", "sampling_offsets"], "lr_linear_proj_mult": 0.1, "with_box_refine": 1, "transformer_input_type": "queries", "backbone": null, "dilation": false, "position_embedding": "sine", "position_embedding_scale": 6.283185307179586, "num_feature_levels": 4, "nheads": 8, "dec_n_points": 4, "enc_n_points": 4, "share_caption_head": 1, "cap_nheads": 1, "cap_dec_n_points": 4, "cap_num_feature_levels": 4, "disable_mid_caption_heads": false, "aux_loss": true, "cls_loss_coef": 2, "self_iou_loss_coef": 0.0, "ref_rank_loss_coef": 0.0, "mil_loss_coef": 0, "focal_alpha": 0.25, "focal_gamma": 2.0, "max_eseq_length": 10, "lloss_gau_mask": 1, "lloss_beta": 1, "scheduled_sampling_start": -1, "basic_ss_prob": 0, "scheduled_sampling_increase_every": 2, "scheduled_sampling_increase_prob": 0.05, "scheduled_sampling_max_prob": 0.25, "ec_alpha": 1.0, "train_proposal_file": "data/generated_proposals/dbg_trainval_top100.json", "eval_proposal_file": "data/generated_proposals/dbg_trainval_top100.json", "train_proposal_type": "gt", "lloss_cross_entropy": 0, "lloss_focal_loss": 0, "base_cfg_path": "cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml", "visual_feature_folder_val": ["/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/visual/"], "text_feature_folder_val": ["/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/text/"], "soft_attention": 1, "id_ori": "", "dict_file_val": "data/howto/vocabulary_howto_rate2_anet.json", "vocab_size_val": 16221, "current_lr": 3.125e-06, "event_context_dim": null, "clip_context_dim": 512}, "iter": 200180, "epoch": 19, "best_val_score": 0.4938654333071738, "result_json_path": "/mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/prediction/num4917_epoch19.json", "avg_proposal_num": -1, "Precision": 0.5612365263371945, "Recall": 0.5270524681293403}, "last": {"opt": {"cfg_path": "cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml", "id": "seq2-ft(mix)-gt_percent-1.0", "gpu_id": [], "disable_tqdm": false, "seed": 777, "random_seed": false, "disable_cudnn": 0, "debug": false, "device": "cuda", "map": true, "train_caption_file": ["data/howto/captiondata/howto100m_train.json", "data/anet/captiondata/train_modified.json"], "invalid_video_json": [], "val_caption_file": "data/anet/captiondata/val_1.json", "visual_feature_folder": ["/mnt/data/Gvlab/wuhao/features/howto100m/clip/visual", "/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/visual/"], "text_feature_folder": ["/mnt/data/Gvlab/wuhao/features/howto100m/clip/text_proj", "/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/text/"], "gt_file_for_auc": "data/anet/captiondata/val_all.json", "gt_file_for_eval": ["data/anet/captiondata/val_1.json", "data/anet/captiondata/val_2.json"], "gt_file_for_para_eval": ["data/anet/captiondata/para/anet_entities_val_1_para.json", "data/anet/captiondata/para/anet_entities_val_2_para.json"], "dict_file": "data/howto/vocabulary_howto_rate2_anet.json", "criteria_for_best_ckpt": "overall", "visual_feature_type": ["CLIP"], "feature_dim": 768, "start_from": "", "start_from_mode": "last", "pretrain": null, "pretrain_path": "", "nthreads": 4, "data_norm": 0, "data_rescale": 1, "feature_sample_rate": 1, "train_proposal_sample_num": 30, "gt_proposal_sample_num": 20, "ft_gt_percent": 1.0, "pre_percent": 1.0, "vocab_size": 16221, "wordRNN_input_feats_type": "C", "caption_decoder_type": "standard", "rnn_size": 512, "num_layers": 1, "input_encoding_size": 512, "att_hid_size": 512, "drop_prob": 0.5, "max_caption_len": 50, "hidden_dim": 512, "num_queries": 100, "hidden_dropout_prob": 0.5, "layer_norm_eps": 1e-12, "caption_cost_type": "loss", "set_cost_caption": 0, "set_cost_class": 2, "set_cost_bbox": 0, "set_cost_giou": 4, "cost_alpha": 0.25, "cost_gamma": 2, "bbox_loss_coef": 0, "giou_loss_coef": 4, "count_loss_coef": 0.5, "caption_loss_coef": 2, "eos_coef": 0.1, "num_classes": 1, "dec_layers": 2, "enc_layers": 2, "transformer_ff_dim": 512, "transformer_dropout_prob": 0.1, "frame_embedding_num": 100, "sample_method": "nearest", "fix_xcw": 1, "use_anchor": 0, "random_anchor_init": true, "prior_anchor_duration_init": true, "matcher_type": "default", "pretrained_language_model": "CLIP", "text_hidden_dim": 768, "max_text_input_len": 32, "max_pos_num": 500, "huggingface_cache_dir": ".cache", "text_encoder_learning_strategy": "frozen", "use_pseudo_box": false, "pseudo_box_type": "similarity_op_order_v2", "top_frames": 30, "window_size": 2, "statistic_mode": "mode", "width_ratio": 1, "beta": 1, "width_th": 1, "iteration": 3, "pseudo_box_aug": false, "pseudo_box_aug_num": 8, "pseudo_box_aug_ratio": 0.02, "pseudo_box_aug_mode": "random_range", "refine_pseudo_box": false, "use_additional_score_layer": false, "use_additional_cap_layer": false, "merge_k_boxes": 3, "merge_criterion": "ins_cap_topk", "merge_mode": "weighted_sum", "refine_pseudo_stage_num": 2, "use_query_box_for_refine": 0, "norm_ins_score": "sigmoid", "cap_prob_clip": false, "use_neg_pseudo_box": false, "num_neg_box": 10, "weighted_mil_loss": false, "focal_mil": false, "disable_rematch": false, "start_refine_epoch": -1, "align_keep_percentile": 0.1, "align_top_band_size": 0, "align_drop_z": 0, "align_one_to_many": false, "align_many_to_one": false, "align_contiguous": false, "set_cost_sim": 1.0, "enable_contrastive": false, "disable_contrastive_projection": 1, "contrastive_hidden_size": 128, "contrastive_loss_start_coef": 0.0, "contrastive_loss_temperature": 0.1, "enable_cross_video_cl": true, "enable_e2t_cl": true, "enable_bg_for_cl": true, "set_cost_cl": 0.0, "cl_schedule_val": [0, 0.1], "cl_schedule_time": [0, 2], "prior_manner": "all", "training_scheme": "all", "epoch": 20, "batch_size": 1, "batch_size_for_eval": 1, "grad_clip": 100.0, "optimizer_type": "adam", "weight_decay": 0.0001, "lr": 5e-05, "learning_rate_decay_start": 8, "learning_rate_decay_every": 3, "learning_rate_decay_rate": 0.5, "min_epoch_when_save": -1, "save_checkpoint_every": 1, "save_all_checkpoint": 0, "save_dir": "/mnt/data/pjlab-3090-sport/wuhao/logs/dibs", "lr_backbone_names": ["None"], "lr_backbone": 2e-05, "lr_proj": 0, "lr_linear_proj_names": ["reference_points", "sampling_offsets"], "lr_linear_proj_mult": 0.1, "with_box_refine": 1, "transformer_input_type": "queries", "backbone": null, "dilation": false, "position_embedding": "sine", "position_embedding_scale": 6.283185307179586, "num_feature_levels": 4, "nheads": 8, "dec_n_points": 4, "enc_n_points": 4, "share_caption_head": 1, "cap_nheads": 1, "cap_dec_n_points": 4, "cap_num_feature_levels": 4, "disable_mid_caption_heads": false, "aux_loss": true, "cls_loss_coef": 2, "self_iou_loss_coef": 0.0, "ref_rank_loss_coef": 0.0, "mil_loss_coef": 0, "focal_alpha": 0.25, "focal_gamma": 2.0, "max_eseq_length": 10, "lloss_gau_mask": 1, "lloss_beta": 1, "scheduled_sampling_start": -1, "basic_ss_prob": 0, "scheduled_sampling_increase_every": 2, "scheduled_sampling_increase_prob": 0.05, "scheduled_sampling_max_prob": 0.25, "ec_alpha": 1.0, "train_proposal_file": "data/generated_proposals/dbg_trainval_top100.json", "eval_proposal_file": "data/generated_proposals/dbg_trainval_top100.json", "train_proposal_type": "gt", "lloss_cross_entropy": 0, "lloss_focal_loss": 0, "base_cfg_path": "cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml", "visual_feature_folder_val": ["/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/visual/"], "text_feature_folder_val": ["/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/text/"], "soft_attention": 1, "id_ori": "", "dict_file_val": "data/howto/vocabulary_howto_rate2_anet.json", "vocab_size_val": 16221, "current_lr": 3.125e-06, "event_context_dim": null, "clip_context_dim": 512}, "iter": 200180, "epoch": 19, "best_val_score": 0.4938654333071738}, "history": {"val_result_history": {"0": {"eval_score": {"Bleu_1": 0.15656016917085527, "Bleu_2": 0.08210369852679855, "Bleu_3": 0.042491746140277446, "Bleu_4": 0.021149866989626908, "METEOR": 0.08752782819459405, "ROUGE_L": 0.1577032846084498, "CIDEr": 0.2687260839927409, "Recall": 0.4986985069085389, "Precision": 0.548450952477792, "soda_c": 0.045070258467165024, "para_Bleu_1": 0.36987086578065714, "para_Bleu_2": 0.1987998709052068, "para_Bleu_3": 0.11671522868501899, "para_Bleu_4": 0.07164097958462183, "para_METEOR": 0.13901753612789455, "para_ROUGE_L": 0.2826680559963382, "para_CIDEr": 0.0956891322121665, "avg_proposal_number": -1}}, "1": {"eval_score": {"Bleu_1": 0.15965966113561106, "Bleu_2": 0.08785069799970043, "Bleu_3": 0.04739925348589703, "Bleu_4": 0.02377096308421814, "METEOR": 0.09062964515721111, "ROUGE_L": 0.1652647774491388, "CIDEr": 0.27366191469495676, "Recall": 0.45131293652113946, "Precision": 0.5379414954918249, "soda_c": 0.04303682007432423, "para_Bleu_1": 0.3640361416830845, "para_Bleu_2": 0.1986476696673755, "para_Bleu_3": 0.11814800235116821, "para_Bleu_4": 0.07336184523852665, "para_METEOR": 0.13911724177507803, "para_ROUGE_L": 0.28211794880017504, "para_CIDEr": 0.08634617454158834}}, "2": {"eval_score": {"Bleu_1": 0.15440507165989542, "Bleu_2": 0.08178273697953425, "Bleu_3": 0.042600749568780155, "Bleu_4": 0.02119123483046711, "METEOR": 0.08563216148714695, "ROUGE_L": 0.156809182143994, "CIDEr": 0.25960752079137744, "Recall": 0.5075951227720545, "Precision": 0.571834112941489, "soda_c": 0.048597974030683, "para_Bleu_1": 0.3985431504573892, "para_Bleu_2": 0.22415947108296613, "para_Bleu_3": 0.1341003834690626, "para_Bleu_4": 0.08312155143550452, "para_METEOR": 0.1510085678983445, "para_ROUGE_L": 0.2957598062989384, "para_CIDEr": 0.12271570278513648, "avg_proposal_number": -1}}, "3": {"eval_score": {"Bleu_1": 0.16003947012491918, "Bleu_2": 0.08640386650819816, "Bleu_3": 0.045769192920880976, "Bleu_4": 0.023139762266241797, "METEOR": 0.08893476927946467, "ROUGE_L": 0.16285119298911696, "CIDEr": 0.27850058398714506, "Recall": 0.4974410652224822, "Precision": 0.571762083926507, "soda_c": 0.04898353247531122, "para_Bleu_1": 0.4116267700746525, "para_Bleu_2": 0.23315066082372427, "para_Bleu_3": 0.139785630195007, "para_Bleu_4": 0.08689414164874545, "para_METEOR": 0.15321412716959742, "para_ROUGE_L": 0.2993749803089721, "para_CIDEr": 0.12755194391496638, "avg_proposal_number": -1}}, "4": {"eval_score": {"Bleu_1": 0.1612752203314224, "Bleu_2": 0.08712092952271142, "Bleu_3": 0.04643407984417907, "Bleu_4": 0.024237450149938583, "METEOR": 0.0888552980469009, "ROUGE_L": 0.16165678007821221, "CIDEr": 0.28844655875134945, "Recall": 0.5079771255793173, "Precision": 0.5707494407158785, "soda_c": 0.05143467092505771, "para_Bleu_1": 0.425828341023263, "para_Bleu_2": 0.2431293051387748, "para_Bleu_3": 0.14662751878582, "para_Bleu_4": 0.09131956416083617, "para_METEOR": 0.15868276543147294, "para_ROUGE_L": 0.30762031965083425, "para_CIDEr": 0.1438790695271004, "avg_proposal_number": -1}}, "5": {"eval_score": {"Bleu_1": 0.16203040821313286, "Bleu_2": 0.087418866671477, "Bleu_3": 0.04641401855891123, "Bleu_4": 0.023872355329811287, "METEOR": 0.08736154709181514, "ROUGE_L": 0.16095171754962678, "CIDEr": 0.3019460931650574, "Recall": 0.5237442505746305, "Precision": 0.5691986983933232, "soda_c": 0.05366939846142926, "para_Bleu_1": 0.4285515683378188, "para_Bleu_2": 0.24896313523930838, "para_Bleu_3": 0.15083849533584295, "para_Bleu_4": 0.09425440122753082, "para_METEOR": 0.15418242275887206, "para_ROUGE_L": 0.3037081433191389, "para_CIDEr": 0.16822639157343386, "avg_proposal_number": -1}}, "6": {"eval_score": {"Bleu_1": 0.17095715677415013, "Bleu_2": 0.0951967897773989, "Bleu_3": 0.05145074727592996, "Bleu_4": 0.026686223548170303, "METEOR": 0.09033289555302068, "ROUGE_L": 0.16939818741017104, "CIDEr": 0.33299543538258497, "Recall": 0.5001550726802355, "Precision": 0.5629321740898863, "soda_c": 0.05378783144134501, "para_Bleu_1": 0.44719474980697405, "para_Bleu_2": 0.2615784516531111, "para_Bleu_3": 0.15956746990786394, "para_Bleu_4": 0.09983770060804388, "para_METEOR": 0.15549284849496958, "para_ROUGE_L": 0.30852597622578265, "para_CIDEr": 0.18758102150887232, "avg_proposal_number": -1}}, "7": {"eval_score": {"Bleu_1": 0.16525493799366836, "Bleu_2": 0.09017429361474327, "Bleu_3": 0.04843073565357156, "Bleu_4": 0.025752141227780294, "METEOR": 0.09042668571725655, "ROUGE_L": 0.1657835735936403, "CIDEr": 0.30766696683798356, "Recall": 0.5070758476264831, "Precision": 0.5698723815334497, "soda_c": 0.05193286444599829, "para_Bleu_1": 0.4299765573510605, "para_Bleu_2": 0.24998607326423264, "para_Bleu_3": 0.15168978606887273, "para_Bleu_4": 0.09540463753102806, "para_METEOR": 0.15913054274631774, "para_ROUGE_L": 0.30821511076520103, "para_CIDEr": 0.14655297481419807}}, "8": {"eval_score": {"Bleu_1": 0.1659435247550983, "Bleu_2": 0.09010888064116455, "Bleu_3": 0.04740925434645997, "Bleu_4": 0.023810200153797586, "METEOR": 0.0893691583245007, "ROUGE_L": 0.16481267120708817, "CIDEr": 0.3096929324572276, "Recall": 0.5271698247293078, "Precision": 0.5766981899532185, "soda_c": 0.05637593299631936, "para_Bleu_1": 0.4507795558374508, "para_Bleu_2": 0.2668765313566654, "para_Bleu_3": 0.16324000259413463, "para_Bleu_4": 0.10292908422008885, "para_METEOR": 0.163503434468027, "para_ROUGE_L": 0.3141109355407807, "para_CIDEr": 0.1830754815850521, "avg_proposal_number": -1}}, "9": {"eval_score": {"Bleu_1": 0.16664911544364056, "Bleu_2": 0.09023295213839283, "Bleu_3": 0.04763940550902772, "Bleu_4": 0.02409205514859969, "METEOR": 0.0878588871148787, "ROUGE_L": 0.16401896184386325, "CIDEr": 0.31947446694949533, "Recall": 0.5282742157284517, "Precision": 0.5750796556165633, "soda_c": 0.05745241491068406, "para_Bleu_1": 0.46204429574393835, "para_Bleu_2": 0.2749900961045832, "para_Bleu_3": 0.1683879565471281, "para_Bleu_4": 0.10624339593597942, "para_METEOR": 0.16245439213508253, "para_ROUGE_L": 0.3162965936511474, "para_CIDEr": 0.20803178964320856, "avg_proposal_number": -1}}, "10": {"eval_score": {"Bleu_1": 0.1671778590456048, "Bleu_2": 0.09077014613023152, "Bleu_3": 0.0476684747303012, "Bleu_4": 0.02445564298599047, "METEOR": 0.08933235383587503, "ROUGE_L": 0.1654660162888944, "CIDEr": 0.31886265111118334, "Recall": 0.5314017615268335, "Precision": 0.5831469052945512, "soda_c": 0.05853263249839839, "para_Bleu_1": 0.46544090189732323, "para_Bleu_2": 0.2789325258737778, "para_Bleu_3": 0.17172911957785325, "para_Bleu_4": 0.10903514181091935, "para_METEOR": 0.16550159188298816, "para_ROUGE_L": 0.3181118223429575, "para_CIDEr": 0.2056618808195008, "avg_proposal_number": -1}}, "11": {"eval_score": {"Bleu_1": 0.16560019346009094, "Bleu_2": 0.08934946581658681, "Bleu_3": 0.04692472826903507, "Bleu_4": 0.023331060597699706, "METEOR": 0.08861943572471001, "ROUGE_L": 0.16392659155605854, "CIDEr": 0.31177527957257306, "Recall": 0.5248955646301546, "Precision": 0.5713061826316813, "soda_c": 0.056694173808073595, "para_Bleu_1": 0.45551540477127933, "para_Bleu_2": 0.2725270289009415, "para_Bleu_3": 0.16731081427102573, "para_Bleu_4": 0.10555679460767188, "para_METEOR": 0.1665724805603667, "para_ROUGE_L": 0.31619749898051375, "para_CIDEr": 0.19719071969736374}}, "12": {"eval_score": {"Bleu_1": 0.16778675341331784, "Bleu_2": 0.09082555766488616, "Bleu_3": 0.047445681271689716, "Bleu_4": 0.02375280793420285, "METEOR": 0.08883520478698428, "ROUGE_L": 0.16531435721130755, "CIDEr": 0.31778343902267087, "Recall": 0.5273619026669621, "Precision": 0.5698181479221706, "soda_c": 0.05753856798988932, "para_Bleu_1": 0.4610381779339771, "para_Bleu_2": 0.2761144617772928, "para_Bleu_3": 0.16915034097081671, "para_Bleu_4": 0.10654029953240575, "para_METEOR": 0.16638305166981465, "para_ROUGE_L": 0.31710573495570465, "para_CIDEr": 0.19601570682645908}}, "13": {"eval_score": {"Bleu_1": 0.16683698969676453, "Bleu_2": 0.09036855967772307, "Bleu_3": 0.047484441130632896, "Bleu_4": 0.023876859658376735, "METEOR": 0.08814626862844692, "ROUGE_L": 0.16473003568483396, "CIDEr": 0.3189568758512915, "Recall": 0.5281546209817979, "Precision": 0.5704333604501349, "soda_c": 0.057417105431783064, "para_Bleu_1": 0.4580706340663244, "para_Bleu_2": 0.27372623489326064, "para_Bleu_3": 0.16745128920972313, "para_Bleu_4": 0.10550306643408856, "para_METEOR": 0.16656454278617736, "para_ROUGE_L": 0.31631873012989425, "para_CIDEr": 0.19724321819057877}}, "14": {"eval_score": {"Bleu_1": 0.16662144072598145, "Bleu_2": 0.08988753231411394, "Bleu_3": 0.04690847145308288, "Bleu_4": 0.023224274927987735, "METEOR": 0.08725158341768323, "ROUGE_L": 0.16364893754496343, "CIDEr": 0.32028824475030926, "Recall": 0.5260420675803493, "Precision": 0.5630584367161506, "soda_c": 0.057565785652999135, "para_Bleu_1": 0.46764194087144684, "para_Bleu_2": 0.2801629240374498, "para_Bleu_3": 0.1713033186995987, "para_Bleu_4": 0.10750827268624512, "para_METEOR": 0.16742715934059368, "para_ROUGE_L": 0.31858424377772926, "para_CIDEr": 0.2089956210595351, "avg_proposal_number": -1}}, "15": {"eval_score": {"Bleu_1": 0.16754398447821903, "Bleu_2": 0.08978801866243748, "Bleu_3": 0.046077601805781236, "Bleu_4": 0.02215727819941335, "METEOR": 0.08650894641812401, "ROUGE_L": 0.16425299709373153, "CIDEr": 0.3192637628790779, "Recall": 0.5308598805776927, "Precision": 0.5705477594739302, "soda_c": 0.059035206979637336, "para_Bleu_1": 0.4722129873397206, "para_Bleu_2": 0.2843271953295457, "para_Bleu_3": 0.17433620623201318, "para_Bleu_4": 0.10943737200004257, "para_METEOR": 0.16524483023272712, "para_ROUGE_L": 0.3180351825656492, "para_CIDEr": 0.2139382514781602, "avg_proposal_number": -1}}, "16": {"eval_score": {"Bleu_1": 0.16584280243722227, "Bleu_2": 0.08889969905794425, "Bleu_3": 0.04569298286173284, "Bleu_4": 0.021992960199339176, "METEOR": 0.08570833880397384, "ROUGE_L": 0.16234979503724006, "CIDEr": 0.3170462149966731, "Recall": 0.5273397281824633, "Precision": 0.5648989898989865, "soda_c": 0.058539462474976364, "para_Bleu_1": 0.4735378044184376, "para_Bleu_2": 0.2855599966961999, "para_Bleu_3": 0.17485842077678387, "para_Bleu_4": 0.10998333079246524, "para_METEOR": 0.16580782598840993, "para_ROUGE_L": 0.3184105968751349, "para_CIDEr": 0.2144083270960459, "avg_proposal_number": -1}}, "17": {"eval_score": {"Bleu_1": 0.16720622564646215, "Bleu_2": 0.08946643461131876, "Bleu_3": 0.04568137095423273, "Bleu_4": 0.022039722503534608, "METEOR": 0.08588931176535387, "ROUGE_L": 0.16315869782389542, "CIDEr": 0.32099741016990446, "Recall": 0.5265047853249455, "Precision": 0.5647345942647923, "soda_c": 0.05847424883094643, "para_Bleu_1": 0.47508155945278135, "para_Bleu_2": 0.2858233856765029, "para_Bleu_3": 0.17499503512152859, "para_Bleu_4": 0.11002968407978216, "para_METEOR": 0.16541373751181562, "para_ROUGE_L": 0.3190110890037882, "para_CIDEr": 0.21421557986951392}}, "18": {"eval_score": {"Bleu_1": 0.1662475028889873, "Bleu_2": 0.08895418147726737, "Bleu_3": 0.04559170272578064, "Bleu_4": 0.021869443641790748, "METEOR": 0.0853620749347768, "ROUGE_L": 0.16226693807975517, "CIDEr": 0.3203697867996399, "Recall": 0.5243080966273422, "Precision": 0.5592002237136435, "soda_c": 0.058066485957305666, "para_Bleu_1": 0.47302383939773723, "para_Bleu_2": 0.2848420020452884, "para_Bleu_3": 0.17477626094199183, "para_Bleu_4": 0.11005159892431456, "para_METEOR": 0.16474042555391544, "para_ROUGE_L": 0.31754161420686944, "para_CIDEr": 0.2082818020277855}}, "19": {"eval_score": {"Bleu_1": 0.16600244771432068, "Bleu_2": 0.08859363359362551, "Bleu_3": 0.045174799285766926, "Bleu_4": 0.021453706973694267, "METEOR": 0.08469975853590762, "ROUGE_L": 0.1615333099598977, "CIDEr": 0.3178372173219055, "Recall": 0.5270524681293403, "Precision": 0.5612365263371945, "soda_c": 0.05852570981425518, "para_Bleu_1": 0.47641872729084495, "para_Bleu_2": 0.28679556025023933, "para_Bleu_3": 0.1757988669447671, "para_Bleu_4": 0.11061748158923715, "para_METEOR": 0.1647238014039032, "para_ROUGE_L": 0.3182336912910021, "para_CIDEr": 0.21852415031403352, "avg_proposal_number": -1}}}, "loss_history": {"1000": {"loss_ce": 0.284, "loss_counter": 0.126, "loss_bbox": 0.117, "loss_giou": 0.275, "loss_self_iou": 0.126, "cardinality_error": 3.775, "loss_ce_0": 0.284, "loss_counter_0": 0.126, "loss_bbox_0": 0.118, "loss_giou_0": 0.276, "loss_self_iou_0": 0.126, "cardinality_error_0": 3.775, "loss_caption_0": 3.781, "loss_caption": 3.778, "total_loss": 18.585}, "2000": {"loss_ce": 0.287, "loss_counter": 0.119, "loss_bbox": 0.087, "loss_giou": 0.239, "loss_self_iou": 0.12, "cardinality_error": 3.705, "loss_ce_0": 0.289, "loss_counter_0": 0.118, "loss_bbox_0": 0.087, "loss_giou_0": 0.239, "loss_self_iou_0": 0.121, "cardinality_error_0": 3.705, "loss_caption_0": 3.682, "loss_caption": 3.675, "total_loss": 17.896}, "3000": {"loss_ce": 0.291, "loss_counter": 0.122, "loss_bbox": 0.078, "loss_giou": 0.227, "loss_self_iou": 0.098, "cardinality_error": 3.705, "loss_ce_0": 0.292, "loss_counter_0": 0.122, "loss_bbox_0": 0.078, "loss_giou_0": 0.228, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.705, "loss_caption_0": 3.668, "loss_caption": 3.664, "total_loss": 17.771}, "4000": {"loss_ce": 0.289, "loss_counter": 0.126, "loss_bbox": 0.078, "loss_giou": 0.224, "loss_self_iou": 0.1, "cardinality_error": 3.784, "loss_ce_0": 0.291, "loss_counter_0": 0.127, "loss_bbox_0": 0.078, "loss_giou_0": 0.223, "loss_self_iou_0": 0.1, "cardinality_error_0": 3.784, "loss_caption_0": 3.624, "loss_caption": 3.629, "total_loss": 17.579}, "5000": {"loss_ce": 0.285, "loss_counter": 0.121, "loss_bbox": 0.08, "loss_giou": 0.218, "loss_self_iou": 0.114, "cardinality_error": 3.674, "loss_ce_0": 0.287, "loss_counter_0": 0.121, "loss_bbox_0": 0.08, "loss_giou_0": 0.218, "loss_self_iou_0": 0.115, "cardinality_error_0": 3.674, "loss_caption_0": 3.629, "loss_caption": 3.629, "total_loss": 17.526}, "6000": {"loss_ce": 0.292, "loss_counter": 0.13, "loss_bbox": 0.076, "loss_giou": 0.22, "loss_self_iou": 0.098, "cardinality_error": 3.786, "loss_ce_0": 0.293, "loss_counter_0": 0.129, "loss_bbox_0": 0.076, "loss_giou_0": 0.22, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.786, "loss_caption_0": 3.625, "loss_caption": 3.622, "total_loss": 17.555}, "7000": {"loss_ce": 0.292, "loss_counter": 0.12, "loss_bbox": 0.076, "loss_giou": 0.215, "loss_self_iou": 0.097, "cardinality_error": 3.746, "loss_ce_0": 0.293, "loss_counter_0": 0.119, "loss_bbox_0": 0.076, "loss_giou_0": 0.215, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.746, "loss_caption_0": 3.58, "loss_caption": 3.576, "total_loss": 17.319}, "8000": {"loss_ce": 0.288, "loss_counter": 0.129, "loss_bbox": 0.078, "loss_giou": 0.218, "loss_self_iou": 0.108, "cardinality_error": 3.754, "loss_ce_0": 0.288, "loss_counter_0": 0.128, "loss_bbox_0": 0.079, "loss_giou_0": 0.218, "loss_self_iou_0": 0.11, "cardinality_error_0": 3.754, "loss_caption_0": 3.546, "loss_caption": 3.546, "total_loss": 17.209}, "9000": {"loss_ce": 0.29, "loss_counter": 0.12, "loss_bbox": 0.078, "loss_giou": 0.219, "loss_self_iou": 0.1, "cardinality_error": 3.685, "loss_ce_0": 0.291, "loss_counter_0": 0.12, "loss_bbox_0": 0.078, "loss_giou_0": 0.219, "loss_self_iou_0": 0.101, "cardinality_error_0": 3.685, "loss_caption_0": 3.544, "loss_caption": 3.54, "total_loss": 17.2}, "10000": {"loss_ce": 0.293, "loss_counter": 0.125, "loss_bbox": 0.077, "loss_giou": 0.22, "loss_self_iou": 0.101, "cardinality_error": 3.748, "loss_ce_0": 0.293, "loss_counter_0": 0.125, "loss_bbox_0": 0.078, "loss_giou_0": 0.22, "loss_self_iou_0": 0.102, "cardinality_error_0": 3.748, "loss_caption_0": 3.582, "loss_caption": 3.577, "total_loss": 17.376}, "11000": {"loss_ce": 0.29, "loss_counter": 0.124, "loss_bbox": 0.077, "loss_giou": 0.217, "loss_self_iou": 0.101, "cardinality_error": 3.788, "loss_ce_0": 0.292, "loss_counter_0": 0.123, "loss_bbox_0": 0.076, "loss_giou_0": 0.217, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.788, "loss_caption_0": 3.446, "loss_caption": 3.443, "total_loss": 16.802}, "12000": {"loss_ce": 0.29, "loss_counter": 0.12, "loss_bbox": 0.076, "loss_giou": 0.214, "loss_self_iou": 0.103, "cardinality_error": 3.694, "loss_ce_0": 0.291, "loss_counter_0": 0.12, "loss_bbox_0": 0.075, "loss_giou_0": 0.213, "loss_self_iou_0": 0.103, "cardinality_error_0": 3.694, "loss_caption_0": 3.427, "loss_caption": 3.428, "total_loss": 16.701}, "13000": {"loss_ce": 0.291, "loss_counter": 0.12, "loss_bbox": 0.076, "loss_giou": 0.217, "loss_self_iou": 0.107, "cardinality_error": 3.689, "loss_ce_0": 0.291, "loss_counter_0": 0.12, "loss_bbox_0": 0.076, "loss_giou_0": 0.217, "loss_self_iou_0": 0.107, "cardinality_error_0": 3.689, "loss_caption_0": 3.464, "loss_caption": 3.461, "total_loss": 16.871}, "14000": {"loss_ce": 0.292, "loss_counter": 0.118, "loss_bbox": 0.073, "loss_giou": 0.21, "loss_self_iou": 0.1, "cardinality_error": 3.663, "loss_ce_0": 0.292, "loss_counter_0": 0.118, "loss_bbox_0": 0.073, "loss_giou_0": 0.211, "loss_self_iou_0": 0.101, "cardinality_error_0": 3.663, "loss_caption_0": 3.414, "loss_caption": 3.41, "total_loss": 16.616}, "15000": {"loss_ce": 0.295, "loss_counter": 0.127, "loss_bbox": 0.076, "loss_giou": 0.214, "loss_self_iou": 0.103, "cardinality_error": 3.828, "loss_ce_0": 0.296, "loss_counter_0": 0.127, "loss_bbox_0": 0.076, "loss_giou_0": 0.215, "loss_self_iou_0": 0.104, "cardinality_error_0": 3.828, "loss_caption_0": 3.453, "loss_caption": 3.453, "total_loss": 16.836}, "16000": {"loss_ce": 0.296, "loss_counter": 0.121, "loss_bbox": 0.073, "loss_giou": 0.206, "loss_self_iou": 0.105, "cardinality_error": 3.687, "loss_ce_0": 0.297, "loss_counter_0": 0.12, "loss_bbox_0": 0.072, "loss_giou_0": 0.207, "loss_self_iou_0": 0.104, "cardinality_error_0": 3.687, "loss_caption_0": 3.461, "loss_caption": 3.462, "total_loss": 16.803}, "17000": {"loss_ce": 0.3, "loss_counter": 0.127, "loss_bbox": 0.073, "loss_giou": 0.208, "loss_self_iou": 0.102, "cardinality_error": 3.791, "loss_ce_0": 0.3, "loss_counter_0": 0.127, "loss_bbox_0": 0.073, "loss_giou_0": 0.209, "loss_self_iou_0": 0.103, "cardinality_error_0": 3.791, "loss_caption_0": 3.469, "loss_caption": 3.465, "total_loss": 16.864}, "18000": {"loss_ce": 0.298, "loss_counter": 0.119, "loss_bbox": 0.074, "loss_giou": 0.205, "loss_self_iou": 0.107, "cardinality_error": 3.68, "loss_ce_0": 0.298, "loss_counter_0": 0.119, "loss_bbox_0": 0.074, "loss_giou_0": 0.206, "loss_self_iou_0": 0.107, "cardinality_error_0": 3.68, "loss_caption_0": 3.478, "loss_caption": 3.475, "total_loss": 16.859}, "19000": {"loss_ce": 0.305, "loss_counter": 0.126, "loss_bbox": 0.073, "loss_giou": 0.207, "loss_self_iou": 0.099, "cardinality_error": 3.752, "loss_ce_0": 0.304, "loss_counter_0": 0.126, "loss_bbox_0": 0.072, "loss_giou_0": 0.208, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.752, "loss_caption_0": 3.396, "loss_caption": 3.396, "total_loss": 16.585}, "20000": {"loss_ce": 0.303, "loss_counter": 0.128, "loss_bbox": 0.071, "loss_giou": 0.208, "loss_self_iou": 0.101, "cardinality_error": 3.804, "loss_ce_0": 0.304, "loss_counter_0": 0.128, "loss_bbox_0": 0.071, "loss_giou_0": 0.208, "loss_self_iou_0": 0.1, "cardinality_error_0": 3.804, "loss_caption_0": 3.42, "loss_caption": 3.419, "total_loss": 16.684}, "21000": {"loss_ce": 0.298, "loss_counter": 0.122, "loss_bbox": 0.071, "loss_giou": 0.202, "loss_self_iou": 0.101, "cardinality_error": 3.666, "loss_ce_0": 0.299, "loss_counter_0": 0.122, "loss_bbox_0": 0.071, "loss_giou_0": 0.202, "loss_self_iou_0": 0.103, "cardinality_error_0": 3.666, "loss_caption_0": 3.344, "loss_caption": 3.335, "total_loss": 16.294}, "22000": {"loss_ce": 0.293, "loss_counter": 0.119, "loss_bbox": 0.073, "loss_giou": 0.201, "loss_self_iou": 0.109, "cardinality_error": 3.752, "loss_ce_0": 0.292, "loss_counter_0": 0.118, "loss_bbox_0": 0.073, "loss_giou_0": 0.203, "loss_self_iou_0": 0.11, "cardinality_error_0": 3.752, "loss_caption_0": 3.302, "loss_caption": 3.304, "total_loss": 16.116}, "23000": {"loss_ce": 0.299, "loss_counter": 0.128, "loss_bbox": 0.077, "loss_giou": 0.208, "loss_self_iou": 0.113, "cardinality_error": 3.803, "loss_ce_0": 0.299, "loss_counter_0": 0.128, "loss_bbox_0": 0.076, "loss_giou_0": 0.208, "loss_self_iou_0": 0.112, "cardinality_error_0": 3.803, "loss_caption_0": 3.348, "loss_caption": 3.34, "total_loss": 16.363}, "24000": {"loss_ce": 0.293, "loss_counter": 0.122, "loss_bbox": 0.076, "loss_giou": 0.207, "loss_self_iou": 0.093, "cardinality_error": 3.729, "loss_ce_0": 0.294, "loss_counter_0": 0.122, "loss_bbox_0": 0.076, "loss_giou_0": 0.207, "loss_self_iou_0": 0.094, "cardinality_error_0": 3.729, "loss_caption_0": 3.354, "loss_caption": 3.351, "total_loss": 16.364}, "25000": {"loss_ce": 0.294, "loss_counter": 0.122, "loss_bbox": 0.078, "loss_giou": 0.213, "loss_self_iou": 0.091, "cardinality_error": 3.734, "loss_ce_0": 0.295, "loss_counter_0": 0.122, "loss_bbox_0": 0.077, "loss_giou_0": 0.214, "loss_self_iou_0": 0.09, "cardinality_error_0": 3.734, "loss_caption_0": 3.372, "loss_caption": 3.372, "total_loss": 16.494}, "26000": {"loss_ce": 0.298, "loss_counter": 0.125, "loss_bbox": 0.072, "loss_giou": 0.203, "loss_self_iou": 0.096, "cardinality_error": 3.784, "loss_ce_0": 0.299, "loss_counter_0": 0.125, "loss_bbox_0": 0.073, "loss_giou_0": 0.204, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.784, "loss_caption_0": 3.334, "loss_caption": 3.333, "total_loss": 16.279}, "27000": {"loss_ce": 0.289, "loss_counter": 0.118, "loss_bbox": 0.076, "loss_giou": 0.203, "loss_self_iou": 0.102, "cardinality_error": 3.64, "loss_ce_0": 0.291, "loss_counter_0": 0.119, "loss_bbox_0": 0.076, "loss_giou_0": 0.203, "loss_self_iou_0": 0.101, "cardinality_error_0": 3.64, "loss_caption_0": 3.348, "loss_caption": 3.345, "total_loss": 16.287}, "28000": {"loss_ce": 0.292, "loss_counter": 0.125, "loss_bbox": 0.077, "loss_giou": 0.201, "loss_self_iou": 0.095, "cardinality_error": 3.774, "loss_ce_0": 0.293, "loss_counter_0": 0.125, "loss_bbox_0": 0.076, "loss_giou_0": 0.202, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.774, "loss_caption_0": 3.337, "loss_caption": 3.333, "total_loss": 16.249}, "29000": {"loss_ce": 0.298, "loss_counter": 0.12, "loss_bbox": 0.075, "loss_giou": 0.204, "loss_self_iou": 0.1, "cardinality_error": 3.755, "loss_ce_0": 0.299, "loss_counter_0": 0.12, "loss_bbox_0": 0.074, "loss_giou_0": 0.205, "loss_self_iou_0": 0.101, "cardinality_error_0": 3.755, "loss_caption_0": 3.315, "loss_caption": 3.321, "total_loss": 16.223}, "30000": {"loss_ce": 0.302, "loss_counter": 0.119, "loss_bbox": 0.071, "loss_giou": 0.195, "loss_self_iou": 0.103, "cardinality_error": 3.72, "loss_ce_0": 0.302, "loss_counter_0": 0.119, "loss_bbox_0": 0.072, "loss_giou_0": 0.196, "loss_self_iou_0": 0.104, "cardinality_error_0": 3.72, "loss_caption_0": 3.347, "loss_caption": 3.349, "total_loss": 16.283}, "31000": {"loss_ce": 0.296, "loss_counter": 0.123, "loss_bbox": 0.073, "loss_giou": 0.202, "loss_self_iou": 0.114, "cardinality_error": 3.772, "loss_ce_0": 0.296, "loss_counter_0": 0.123, "loss_bbox_0": 0.074, "loss_giou_0": 0.203, "loss_self_iou_0": 0.115, "cardinality_error_0": 3.772, "loss_caption_0": 3.24, "loss_caption": 3.242, "total_loss": 15.889}, "32000": {"loss_ce": 0.3, "loss_counter": 0.117, "loss_bbox": 0.069, "loss_giou": 0.193, "loss_self_iou": 0.093, "cardinality_error": 3.66, "loss_ce_0": 0.3, "loss_counter_0": 0.117, "loss_bbox_0": 0.07, "loss_giou_0": 0.195, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.66, "loss_caption_0": 3.251, "loss_caption": 3.248, "total_loss": 15.869}, "33000": {"loss_ce": 0.302, "loss_counter": 0.126, "loss_bbox": 0.07, "loss_giou": 0.197, "loss_self_iou": 0.102, "cardinality_error": 3.787, "loss_ce_0": 0.301, "loss_counter_0": 0.126, "loss_bbox_0": 0.071, "loss_giou_0": 0.199, "loss_self_iou_0": 0.104, "cardinality_error_0": 3.787, "loss_caption_0": 3.223, "loss_caption": 3.225, "total_loss": 15.81}, "34000": {"loss_ce": 0.297, "loss_counter": 0.121, "loss_bbox": 0.076, "loss_giou": 0.201, "loss_self_iou": 0.107, "cardinality_error": 3.719, "loss_ce_0": 0.296, "loss_counter_0": 0.121, "loss_bbox_0": 0.077, "loss_giou_0": 0.202, "loss_self_iou_0": 0.108, "cardinality_error_0": 3.719, "loss_caption_0": 3.21, "loss_caption": 3.206, "total_loss": 15.752}, "35000": {"loss_ce": 0.303, "loss_counter": 0.122, "loss_bbox": 0.074, "loss_giou": 0.201, "loss_self_iou": 0.1, "cardinality_error": 3.761, "loss_ce_0": 0.304, "loss_counter_0": 0.121, "loss_bbox_0": 0.073, "loss_giou_0": 0.202, "loss_self_iou_0": 0.1, "cardinality_error_0": 3.761, "loss_caption_0": 3.261, "loss_caption": 3.267, "total_loss": 16.006}, "36000": {"loss_ce": 0.302, "loss_counter": 0.12, "loss_bbox": 0.074, "loss_giou": 0.202, "loss_self_iou": 0.096, "cardinality_error": 3.731, "loss_ce_0": 0.302, "loss_counter_0": 0.12, "loss_bbox_0": 0.075, "loss_giou_0": 0.203, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.731, "loss_caption_0": 3.322, "loss_caption": 3.322, "total_loss": 16.237}, "37000": {"loss_ce": 0.306, "loss_counter": 0.12, "loss_bbox": 0.069, "loss_giou": 0.193, "loss_self_iou": 0.088, "cardinality_error": 3.747, "loss_ce_0": 0.306, "loss_counter_0": 0.12, "loss_bbox_0": 0.069, "loss_giou_0": 0.195, "loss_self_iou_0": 0.089, "cardinality_error_0": 3.747, "loss_caption_0": 3.276, "loss_caption": 3.278, "total_loss": 16.005}, "38000": {"loss_ce": 0.295, "loss_counter": 0.122, "loss_bbox": 0.073, "loss_giou": 0.198, "loss_self_iou": 0.096, "cardinality_error": 3.747, "loss_ce_0": 0.295, "loss_counter_0": 0.122, "loss_bbox_0": 0.074, "loss_giou_0": 0.199, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.747, "loss_caption_0": 3.26, "loss_caption": 3.267, "total_loss": 15.944}, "39000": {"loss_ce": 0.301, "loss_counter": 0.12, "loss_bbox": 0.073, "loss_giou": 0.194, "loss_self_iou": 0.096, "cardinality_error": 3.714, "loss_ce_0": 0.3, "loss_counter_0": 0.12, "loss_bbox_0": 0.074, "loss_giou_0": 0.196, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.714, "loss_caption_0": 3.29, "loss_caption": 3.284, "total_loss": 16.029}, "40000": {"loss_ce": 0.302, "loss_counter": 0.124, "loss_bbox": 0.068, "loss_giou": 0.187, "loss_self_iou": 0.098, "cardinality_error": 3.742, "loss_ce_0": 0.302, "loss_counter_0": 0.124, "loss_bbox_0": 0.069, "loss_giou_0": 0.189, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.742, "loss_caption_0": 3.255, "loss_caption": 3.258, "total_loss": 15.861}, "41000": {"loss_ce": 0.304, "loss_counter": 0.122, "loss_bbox": 0.071, "loss_giou": 0.196, "loss_self_iou": 0.094, "cardinality_error": 3.73, "loss_ce_0": 0.303, "loss_counter_0": 0.121, "loss_bbox_0": 0.071, "loss_giou_0": 0.197, "loss_self_iou_0": 0.095, "cardinality_error_0": 3.73, "loss_caption_0": 3.159, "loss_caption": 3.162, "total_loss": 15.549}, "42000": {"loss_ce": 0.297, "loss_counter": 0.117, "loss_bbox": 0.072, "loss_giou": 0.188, "loss_self_iou": 0.097, "cardinality_error": 3.698, "loss_ce_0": 0.298, "loss_counter_0": 0.116, "loss_bbox_0": 0.071, "loss_giou_0": 0.189, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.698, "loss_caption_0": 3.191, "loss_caption": 3.187, "total_loss": 15.571}, "43000": {"loss_ce": 0.306, "loss_counter": 0.12, "loss_bbox": 0.07, "loss_giou": 0.198, "loss_self_iou": 0.089, "cardinality_error": 3.785, "loss_ce_0": 0.306, "loss_counter_0": 0.119, "loss_bbox_0": 0.069, "loss_giou_0": 0.2, "loss_self_iou_0": 0.087, "cardinality_error_0": 3.785, "loss_caption_0": 3.247, "loss_caption": 3.249, "total_loss": 15.93}, "44000": {"loss_ce": 0.301, "loss_counter": 0.12, "loss_bbox": 0.072, "loss_giou": 0.194, "loss_self_iou": 0.104, "cardinality_error": 3.727, "loss_ce_0": 0.302, "loss_counter_0": 0.12, "loss_bbox_0": 0.072, "loss_giou_0": 0.195, "loss_self_iou_0": 0.102, "cardinality_error_0": 3.727, "loss_caption_0": 3.228, "loss_caption": 3.227, "total_loss": 15.794}, "45000": {"loss_ce": 0.303, "loss_counter": 0.12, "loss_bbox": 0.07, "loss_giou": 0.194, "loss_self_iou": 0.094, "cardinality_error": 3.684, "loss_ce_0": 0.304, "loss_counter_0": 0.12, "loss_bbox_0": 0.07, "loss_giou_0": 0.196, "loss_self_iou_0": 0.094, "cardinality_error_0": 3.684, "loss_caption_0": 3.138, "loss_caption": 3.143, "total_loss": 15.458}, "46000": {"loss_ce": 0.302, "loss_counter": 0.123, "loss_bbox": 0.071, "loss_giou": 0.194, "loss_self_iou": 0.107, "cardinality_error": 3.8, "loss_ce_0": 0.301, "loss_counter_0": 0.122, "loss_bbox_0": 0.071, "loss_giou_0": 0.196, "loss_self_iou_0": 0.107, "cardinality_error_0": 3.8, "loss_caption_0": 3.198, "loss_caption": 3.202, "total_loss": 15.69}, "47000": {"loss_ce": 0.302, "loss_counter": 0.124, "loss_bbox": 0.071, "loss_giou": 0.193, "loss_self_iou": 0.1, "cardinality_error": 3.724, "loss_ce_0": 0.302, "loss_counter_0": 0.123, "loss_bbox_0": 0.072, "loss_giou_0": 0.194, "loss_self_iou_0": 0.101, "cardinality_error_0": 3.724, "loss_caption_0": 3.166, "loss_caption": 3.167, "total_loss": 15.544}, "48000": {"loss_ce": 0.302, "loss_counter": 0.126, "loss_bbox": 0.074, "loss_giou": 0.194, "loss_self_iou": 0.1, "cardinality_error": 3.779, "loss_ce_0": 0.303, "loss_counter_0": 0.126, "loss_bbox_0": 0.073, "loss_giou_0": 0.195, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.779, "loss_caption_0": 3.197, "loss_caption": 3.204, "total_loss": 15.693}, "49000": {"loss_ce": 0.3, "loss_counter": 0.117, "loss_bbox": 0.072, "loss_giou": 0.186, "loss_self_iou": 0.103, "cardinality_error": 3.67, "loss_ce_0": 0.299, "loss_counter_0": 0.117, "loss_bbox_0": 0.073, "loss_giou_0": 0.189, "loss_self_iou_0": 0.103, "cardinality_error_0": 3.67, "loss_caption_0": 3.197, "loss_caption": 3.193, "total_loss": 15.597}, "50000": {"loss_ce": 0.303, "loss_counter": 0.122, "loss_bbox": 0.071, "loss_giou": 0.191, "loss_self_iou": 0.1, "cardinality_error": 3.769, "loss_ce_0": 0.303, "loss_counter_0": 0.121, "loss_bbox_0": 0.07, "loss_giou_0": 0.192, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.769, "loss_caption_0": 3.195, "loss_caption": 3.196, "total_loss": 15.646}, "51000": {"loss_ce": 0.304, "loss_counter": 0.119, "loss_bbox": 0.072, "loss_giou": 0.19, "loss_self_iou": 0.1, "cardinality_error": 3.708, "loss_ce_0": 0.304, "loss_counter_0": 0.119, "loss_bbox_0": 0.07, "loss_giou_0": 0.19, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.708, "loss_caption_0": 3.123, "loss_caption": 3.122, "total_loss": 15.345}, "52000": {"loss_ce": 0.302, "loss_counter": 0.122, "loss_bbox": 0.07, "loss_giou": 0.195, "loss_self_iou": 0.091, "cardinality_error": 3.787, "loss_ce_0": 0.302, "loss_counter_0": 0.121, "loss_bbox_0": 0.07, "loss_giou_0": 0.198, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.787, "loss_caption_0": 3.08, "loss_caption": 3.08, "total_loss": 15.224}, "53000": {"loss_ce": 0.303, "loss_counter": 0.12, "loss_bbox": 0.07, "loss_giou": 0.192, "loss_self_iou": 0.101, "cardinality_error": 3.688, "loss_ce_0": 0.302, "loss_counter_0": 0.12, "loss_bbox_0": 0.071, "loss_giou_0": 0.194, "loss_self_iou_0": 0.102, "cardinality_error_0": 3.688, "loss_caption_0": 3.121, "loss_caption": 3.125, "total_loss": 15.366}, "54000": {"loss_ce": 0.304, "loss_counter": 0.12, "loss_bbox": 0.069, "loss_giou": 0.184, "loss_self_iou": 0.096, "cardinality_error": 3.66, "loss_ce_0": 0.303, "loss_counter_0": 0.12, "loss_bbox_0": 0.07, "loss_giou_0": 0.187, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.66, "loss_caption_0": 3.151, "loss_caption": 3.158, "total_loss": 15.44}, "55000": {"loss_ce": 0.314, "loss_counter": 0.123, "loss_bbox": 0.069, "loss_giou": 0.186, "loss_self_iou": 0.102, "cardinality_error": 3.759, "loss_ce_0": 0.314, "loss_counter_0": 0.124, "loss_bbox_0": 0.069, "loss_giou_0": 0.188, "loss_self_iou_0": 0.103, "cardinality_error_0": 3.759, "loss_caption_0": 3.137, "loss_caption": 3.138, "total_loss": 15.427}, "56000": {"loss_ce": 0.304, "loss_counter": 0.12, "loss_bbox": 0.069, "loss_giou": 0.186, "loss_self_iou": 0.102, "cardinality_error": 3.7, "loss_ce_0": 0.303, "loss_counter_0": 0.119, "loss_bbox_0": 0.07, "loss_giou_0": 0.189, "loss_self_iou_0": 0.102, "cardinality_error_0": 3.7, "loss_caption_0": 3.128, "loss_caption": 3.132, "total_loss": 15.353}, "57000": {"loss_ce": 0.308, "loss_counter": 0.125, "loss_bbox": 0.069, "loss_giou": 0.192, "loss_self_iou": 0.094, "cardinality_error": 3.833, "loss_ce_0": 0.308, "loss_counter_0": 0.125, "loss_bbox_0": 0.069, "loss_giou_0": 0.194, "loss_self_iou_0": 0.093, "cardinality_error_0": 3.833, "loss_caption_0": 3.157, "loss_caption": 3.154, "total_loss": 15.516}, "58000": {"loss_ce": 0.3, "loss_counter": 0.116, "loss_bbox": 0.072, "loss_giou": 0.192, "loss_self_iou": 0.099, "cardinality_error": 3.724, "loss_ce_0": 0.3, "loss_counter_0": 0.116, "loss_bbox_0": 0.073, "loss_giou_0": 0.192, "loss_self_iou_0": 0.1, "cardinality_error_0": 3.724, "loss_caption_0": 3.092, "loss_caption": 3.088, "total_loss": 15.209}, "59000": {"loss_ce": 0.305, "loss_counter": 0.126, "loss_bbox": 0.07, "loss_giou": 0.187, "loss_self_iou": 0.092, "cardinality_error": 3.806, "loss_ce_0": 0.304, "loss_counter_0": 0.126, "loss_bbox_0": 0.07, "loss_giou_0": 0.19, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.806, "loss_caption_0": 3.204, "loss_caption": 3.204, "total_loss": 15.668}, "60000": {"loss_ce": 0.298, "loss_counter": 0.119, "loss_bbox": 0.073, "loss_giou": 0.197, "loss_self_iou": 0.102, "cardinality_error": 3.73, "loss_ce_0": 0.298, "loss_counter_0": 0.118, "loss_bbox_0": 0.074, "loss_giou_0": 0.198, "loss_self_iou_0": 0.101, "cardinality_error_0": 3.73, "loss_caption_0": 3.185, "loss_caption": 3.179, "total_loss": 15.62}, "61000": {"loss_ce": 0.302, "loss_counter": 0.117, "loss_bbox": 0.068, "loss_giou": 0.183, "loss_self_iou": 0.099, "cardinality_error": 3.687, "loss_ce_0": 0.303, "loss_counter_0": 0.117, "loss_bbox_0": 0.067, "loss_giou_0": 0.185, "loss_self_iou_0": 0.1, "cardinality_error_0": 3.687, "loss_caption_0": 3.025, "loss_caption": 3.031, "total_loss": 14.914}, "62000": {"loss_ce": 0.305, "loss_counter": 0.125, "loss_bbox": 0.068, "loss_giou": 0.192, "loss_self_iou": 0.088, "cardinality_error": 3.809, "loss_ce_0": 0.304, "loss_counter_0": 0.125, "loss_bbox_0": 0.069, "loss_giou_0": 0.194, "loss_self_iou_0": 0.089, "cardinality_error_0": 3.809, "loss_caption_0": 3.067, "loss_caption": 3.064, "total_loss": 15.147}, "63000": {"loss_ce": 0.301, "loss_counter": 0.113, "loss_bbox": 0.072, "loss_giou": 0.189, "loss_self_iou": 0.102, "cardinality_error": 3.636, "loss_ce_0": 0.301, "loss_counter_0": 0.113, "loss_bbox_0": 0.073, "loss_giou_0": 0.193, "loss_self_iou_0": 0.104, "cardinality_error_0": 3.636, "loss_caption_0": 3.09, "loss_caption": 3.083, "total_loss": 15.188}, "64000": {"loss_ce": 0.308, "loss_counter": 0.12, "loss_bbox": 0.067, "loss_giou": 0.185, "loss_self_iou": 0.105, "cardinality_error": 3.738, "loss_ce_0": 0.309, "loss_counter_0": 0.12, "loss_bbox_0": 0.067, "loss_giou_0": 0.186, "loss_self_iou_0": 0.104, "cardinality_error_0": 3.738, "loss_caption_0": 3.09, "loss_caption": 3.088, "total_loss": 15.193}, "65000": {"loss_ce": 0.302, "loss_counter": 0.123, "loss_bbox": 0.069, "loss_giou": 0.191, "loss_self_iou": 0.094, "cardinality_error": 3.735, "loss_ce_0": 0.304, "loss_counter_0": 0.123, "loss_bbox_0": 0.069, "loss_giou_0": 0.191, "loss_self_iou_0": 0.094, "cardinality_error_0": 3.735, "loss_caption_0": 3.087, "loss_caption": 3.083, "total_loss": 15.203}, "66000": {"loss_ce": 0.307, "loss_counter": 0.121, "loss_bbox": 0.069, "loss_giou": 0.188, "loss_self_iou": 0.095, "cardinality_error": 3.753, "loss_ce_0": 0.307, "loss_counter_0": 0.121, "loss_bbox_0": 0.07, "loss_giou_0": 0.19, "loss_self_iou_0": 0.095, "cardinality_error_0": 3.753, "loss_caption_0": 3.093, "loss_caption": 3.093, "total_loss": 15.235}, "67000": {"loss_ce": 0.299, "loss_counter": 0.123, "loss_bbox": 0.071, "loss_giou": 0.189, "loss_self_iou": 0.099, "cardinality_error": 3.781, "loss_ce_0": 0.299, "loss_counter_0": 0.123, "loss_bbox_0": 0.072, "loss_giou_0": 0.192, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.781, "loss_caption_0": 3.104, "loss_caption": 3.095, "total_loss": 15.24}, "68000": {"loss_ce": 0.3, "loss_counter": 0.118, "loss_bbox": 0.073, "loss_giou": 0.186, "loss_self_iou": 0.102, "cardinality_error": 3.702, "loss_ce_0": 0.3, "loss_counter_0": 0.118, "loss_bbox_0": 0.073, "loss_giou_0": 0.187, "loss_self_iou_0": 0.104, "cardinality_error_0": 3.702, "loss_caption_0": 3.092, "loss_caption": 3.087, "total_loss": 15.171}, "69000": {"loss_ce": 0.304, "loss_counter": 0.116, "loss_bbox": 0.068, "loss_giou": 0.184, "loss_self_iou": 0.087, "cardinality_error": 3.705, "loss_ce_0": 0.303, "loss_counter_0": 0.116, "loss_bbox_0": 0.069, "loss_giou_0": 0.187, "loss_self_iou_0": 0.088, "cardinality_error_0": 3.705, "loss_caption_0": 3.087, "loss_caption": 3.084, "total_loss": 15.154}, "70000": {"loss_ce": 0.308, "loss_counter": 0.119, "loss_bbox": 0.07, "loss_giou": 0.188, "loss_self_iou": 0.104, "cardinality_error": 3.763, "loss_ce_0": 0.309, "loss_counter_0": 0.12, "loss_bbox_0": 0.069, "loss_giou_0": 0.19, "loss_self_iou_0": 0.104, "cardinality_error_0": 3.763, "loss_caption_0": 3.137, "loss_caption": 3.142, "total_loss": 15.421}, "71000": {"loss_ce": 0.304, "loss_counter": 0.115, "loss_bbox": 0.067, "loss_giou": 0.187, "loss_self_iou": 0.091, "cardinality_error": 3.724, "loss_ce_0": 0.304, "loss_counter_0": 0.115, "loss_bbox_0": 0.068, "loss_giou_0": 0.189, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.724, "loss_caption_0": 2.994, "loss_caption": 2.994, "total_loss": 14.812}, "72000": {"loss_ce": 0.297, "loss_counter": 0.118, "loss_bbox": 0.07, "loss_giou": 0.187, "loss_self_iou": 0.099, "cardinality_error": 3.665, "loss_ce_0": 0.296, "loss_counter_0": 0.118, "loss_bbox_0": 0.072, "loss_giou_0": 0.19, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.665, "loss_caption_0": 2.995, "loss_caption": 3.0, "total_loss": 14.803}, "73000": {"loss_ce": 0.301, "loss_counter": 0.122, "loss_bbox": 0.067, "loss_giou": 0.183, "loss_self_iou": 0.099, "cardinality_error": 3.762, "loss_ce_0": 0.302, "loss_counter_0": 0.122, "loss_bbox_0": 0.067, "loss_giou_0": 0.184, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.762, "loss_caption_0": 3.03, "loss_caption": 3.034, "total_loss": 14.924}, "74000": {"loss_ce": 0.303, "loss_counter": 0.12, "loss_bbox": 0.067, "loss_giou": 0.181, "loss_self_iou": 0.093, "cardinality_error": 3.722, "loss_ce_0": 0.304, "loss_counter_0": 0.12, "loss_bbox_0": 0.068, "loss_giou_0": 0.183, "loss_self_iou_0": 0.095, "cardinality_error_0": 3.722, "loss_caption_0": 3.061, "loss_caption": 3.062, "total_loss": 15.037}, "75000": {"loss_ce": 0.3, "loss_counter": 0.124, "loss_bbox": 0.069, "loss_giou": 0.188, "loss_self_iou": 0.097, "cardinality_error": 3.835, "loss_ce_0": 0.302, "loss_counter_0": 0.124, "loss_bbox_0": 0.069, "loss_giou_0": 0.19, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.835, "loss_caption_0": 3.102, "loss_caption": 3.108, "total_loss": 15.261}, "76000": {"loss_ce": 0.304, "loss_counter": 0.118, "loss_bbox": 0.069, "loss_giou": 0.19, "loss_self_iou": 0.096, "cardinality_error": 3.787, "loss_ce_0": 0.305, "loss_counter_0": 0.118, "loss_bbox_0": 0.069, "loss_giou_0": 0.192, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.787, "loss_caption_0": 3.055, "loss_caption": 3.056, "total_loss": 15.081}, "77000": {"loss_ce": 0.3, "loss_counter": 0.122, "loss_bbox": 0.07, "loss_giou": 0.191, "loss_self_iou": 0.101, "cardinality_error": 3.753, "loss_ce_0": 0.3, "loss_counter_0": 0.122, "loss_bbox_0": 0.071, "loss_giou_0": 0.192, "loss_self_iou_0": 0.102, "cardinality_error_0": 3.753, "loss_caption_0": 3.064, "loss_caption": 3.063, "total_loss": 15.105}, "78000": {"loss_ce": 0.303, "loss_counter": 0.118, "loss_bbox": 0.069, "loss_giou": 0.192, "loss_self_iou": 0.094, "cardinality_error": 3.812, "loss_ce_0": 0.302, "loss_counter_0": 0.118, "loss_bbox_0": 0.071, "loss_giou_0": 0.194, "loss_self_iou_0": 0.095, "cardinality_error_0": 3.812, "loss_caption_0": 3.075, "loss_caption": 3.081, "total_loss": 15.186}, "79000": {"loss_ce": 0.303, "loss_counter": 0.119, "loss_bbox": 0.068, "loss_giou": 0.184, "loss_self_iou": 0.099, "cardinality_error": 3.712, "loss_ce_0": 0.304, "loss_counter_0": 0.119, "loss_bbox_0": 0.068, "loss_giou_0": 0.187, "loss_self_iou_0": 0.1, "cardinality_error_0": 3.712, "loss_caption_0": 3.004, "loss_caption": 3.004, "total_loss": 14.833}, "80000": {"loss_ce": 0.297, "loss_counter": 0.117, "loss_bbox": 0.068, "loss_giou": 0.184, "loss_self_iou": 0.099, "cardinality_error": 3.639, "loss_ce_0": 0.298, "loss_counter_0": 0.117, "loss_bbox_0": 0.069, "loss_giou_0": 0.185, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.639, "loss_caption_0": 3.011, "loss_caption": 3.021, "total_loss": 14.846}, "81000": {"loss_ce": 0.3, "loss_counter": 0.116, "loss_bbox": 0.064, "loss_giou": 0.177, "loss_self_iou": 0.098, "cardinality_error": 3.664, "loss_ce_0": 0.3, "loss_counter_0": 0.116, "loss_bbox_0": 0.065, "loss_giou_0": 0.178, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.664, "loss_caption_0": 2.972, "loss_caption": 2.974, "total_loss": 14.63}, "82000": {"loss_ce": 0.301, "loss_counter": 0.113, "loss_bbox": 0.067, "loss_giou": 0.179, "loss_self_iou": 0.098, "cardinality_error": 3.692, "loss_ce_0": 0.301, "loss_counter_0": 0.113, "loss_bbox_0": 0.068, "loss_giou_0": 0.181, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.692, "loss_caption_0": 2.914, "loss_caption": 2.912, "total_loss": 14.413}, "83000": {"loss_ce": 0.297, "loss_counter": 0.117, "loss_bbox": 0.067, "loss_giou": 0.188, "loss_self_iou": 0.097, "cardinality_error": 3.764, "loss_ce_0": 0.298, "loss_counter_0": 0.117, "loss_bbox_0": 0.068, "loss_giou_0": 0.19, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.764, "loss_caption_0": 2.939, "loss_caption": 2.933, "total_loss": 14.562}, "84000": {"loss_ce": 0.299, "loss_counter": 0.119, "loss_bbox": 0.066, "loss_giou": 0.18, "loss_self_iou": 0.086, "cardinality_error": 3.724, "loss_ce_0": 0.3, "loss_counter_0": 0.119, "loss_bbox_0": 0.066, "loss_giou_0": 0.181, "loss_self_iou_0": 0.086, "cardinality_error_0": 3.724, "loss_caption_0": 2.964, "loss_caption": 2.963, "total_loss": 14.614}, "85000": {"loss_ce": 0.301, "loss_counter": 0.114, "loss_bbox": 0.066, "loss_giou": 0.187, "loss_self_iou": 0.094, "cardinality_error": 3.73, "loss_ce_0": 0.301, "loss_counter_0": 0.114, "loss_bbox_0": 0.066, "loss_giou_0": 0.189, "loss_self_iou_0": 0.095, "cardinality_error_0": 3.73, "loss_caption_0": 2.942, "loss_caption": 2.945, "total_loss": 14.596}, "86000": {"loss_ce": 0.297, "loss_counter": 0.118, "loss_bbox": 0.067, "loss_giou": 0.184, "loss_self_iou": 0.096, "cardinality_error": 3.764, "loss_ce_0": 0.298, "loss_counter_0": 0.118, "loss_bbox_0": 0.068, "loss_giou_0": 0.187, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.764, "loss_caption_0": 2.989, "loss_caption": 2.988, "total_loss": 14.745}, "87000": {"loss_ce": 0.295, "loss_counter": 0.119, "loss_bbox": 0.067, "loss_giou": 0.178, "loss_self_iou": 0.096, "cardinality_error": 3.692, "loss_ce_0": 0.298, "loss_counter_0": 0.119, "loss_bbox_0": 0.068, "loss_giou_0": 0.182, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.692, "loss_caption_0": 2.93, "loss_caption": 2.931, "total_loss": 14.465}, "88000": {"loss_ce": 0.299, "loss_counter": 0.117, "loss_bbox": 0.068, "loss_giou": 0.181, "loss_self_iou": 0.102, "cardinality_error": 3.74, "loss_ce_0": 0.298, "loss_counter_0": 0.117, "loss_bbox_0": 0.07, "loss_giou_0": 0.184, "loss_self_iou_0": 0.105, "cardinality_error_0": 3.74, "loss_caption_0": 2.945, "loss_caption": 2.939, "total_loss": 14.538}, "89000": {"loss_ce": 0.302, "loss_counter": 0.124, "loss_bbox": 0.069, "loss_giou": 0.186, "loss_self_iou": 0.096, "cardinality_error": 3.911, "loss_ce_0": 0.303, "loss_counter_0": 0.124, "loss_bbox_0": 0.069, "loss_giou_0": 0.188, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.911, "loss_caption_0": 2.981, "loss_caption": 2.985, "total_loss": 14.762}, "90000": {"loss_ce": 0.298, "loss_counter": 0.113, "loss_bbox": 0.066, "loss_giou": 0.174, "loss_self_iou": 0.099, "cardinality_error": 3.667, "loss_ce_0": 0.3, "loss_counter_0": 0.112, "loss_bbox_0": 0.067, "loss_giou_0": 0.177, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.667, "loss_caption_0": 2.946, "loss_caption": 2.945, "total_loss": 14.493}, "91000": {"loss_ce": 0.296, "loss_counter": 0.121, "loss_bbox": 0.066, "loss_giou": 0.179, "loss_self_iou": 0.097, "cardinality_error": 3.807, "loss_ce_0": 0.298, "loss_counter_0": 0.12, "loss_bbox_0": 0.065, "loss_giou_0": 0.182, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.807, "loss_caption_0": 2.916, "loss_caption": 2.914, "total_loss": 14.411}, "92000": {"loss_ce": 0.298, "loss_counter": 0.121, "loss_bbox": 0.067, "loss_giou": 0.179, "loss_self_iou": 0.093, "cardinality_error": 3.784, "loss_ce_0": 0.298, "loss_counter_0": 0.121, "loss_bbox_0": 0.068, "loss_giou_0": 0.182, "loss_self_iou_0": 0.094, "cardinality_error_0": 3.784, "loss_caption_0": 2.916, "loss_caption": 2.915, "total_loss": 14.422}, "93000": {"loss_ce": 0.298, "loss_counter": 0.117, "loss_bbox": 0.065, "loss_giou": 0.18, "loss_self_iou": 0.091, "cardinality_error": 3.806, "loss_ce_0": 0.3, "loss_counter_0": 0.117, "loss_bbox_0": 0.065, "loss_giou_0": 0.183, "loss_self_iou_0": 0.091, "cardinality_error_0": 3.806, "loss_caption_0": 2.9, "loss_caption": 2.905, "total_loss": 14.377}, "94000": {"loss_ce": 0.293, "loss_counter": 0.109, "loss_bbox": 0.068, "loss_giou": 0.174, "loss_self_iou": 0.105, "cardinality_error": 3.616, "loss_ce_0": 0.293, "loss_counter_0": 0.109, "loss_bbox_0": 0.069, "loss_giou_0": 0.178, "loss_self_iou_0": 0.106, "cardinality_error_0": 3.616, "loss_caption_0": 2.912, "loss_caption": 2.914, "total_loss": 14.339}, "95000": {"loss_ce": 0.295, "loss_counter": 0.12, "loss_bbox": 0.066, "loss_giou": 0.185, "loss_self_iou": 0.093, "cardinality_error": 3.805, "loss_ce_0": 0.296, "loss_counter_0": 0.12, "loss_bbox_0": 0.068, "loss_giou_0": 0.187, "loss_self_iou_0": 0.094, "cardinality_error_0": 3.805, "loss_caption_0": 2.938, "loss_caption": 2.941, "total_loss": 14.546}, "96000": {"loss_ce": 0.292, "loss_counter": 0.114, "loss_bbox": 0.069, "loss_giou": 0.177, "loss_self_iou": 0.103, "cardinality_error": 3.684, "loss_ce_0": 0.293, "loss_counter_0": 0.114, "loss_bbox_0": 0.07, "loss_giou_0": 0.181, "loss_self_iou_0": 0.105, "cardinality_error_0": 3.684, "loss_caption_0": 2.928, "loss_caption": 2.931, "total_loss": 14.434}, "97000": {"loss_ce": 0.297, "loss_counter": 0.111, "loss_bbox": 0.066, "loss_giou": 0.184, "loss_self_iou": 0.095, "cardinality_error": 3.693, "loss_ce_0": 0.298, "loss_counter_0": 0.111, "loss_bbox_0": 0.068, "loss_giou_0": 0.187, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.693, "loss_caption_0": 2.902, "loss_caption": 2.903, "total_loss": 14.392}, "98000": {"loss_ce": 0.296, "loss_counter": 0.115, "loss_bbox": 0.068, "loss_giou": 0.181, "loss_self_iou": 0.089, "cardinality_error": 3.738, "loss_ce_0": 0.298, "loss_counter_0": 0.115, "loss_bbox_0": 0.068, "loss_giou_0": 0.184, "loss_self_iou_0": 0.09, "cardinality_error_0": 3.738, "loss_caption_0": 2.896, "loss_caption": 2.902, "total_loss": 14.361}, "99000": {"loss_ce": 0.295, "loss_counter": 0.115, "loss_bbox": 0.064, "loss_giou": 0.174, "loss_self_iou": 0.095, "cardinality_error": 3.702, "loss_ce_0": 0.296, "loss_counter_0": 0.115, "loss_bbox_0": 0.065, "loss_giou_0": 0.177, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.702, "loss_caption_0": 2.956, "loss_caption": 2.956, "total_loss": 14.525}, "100000": {"loss_ce": 0.296, "loss_counter": 0.114, "loss_bbox": 0.066, "loss_giou": 0.177, "loss_self_iou": 0.092, "cardinality_error": 3.751, "loss_ce_0": 0.298, "loss_counter_0": 0.113, "loss_bbox_0": 0.066, "loss_giou_0": 0.179, "loss_self_iou_0": 0.093, "cardinality_error_0": 3.751, "loss_caption_0": 2.932, "loss_caption": 2.932, "total_loss": 14.453}, "101000": {"loss_ce": 0.29, "loss_counter": 0.111, "loss_bbox": 0.065, "loss_giou": 0.173, "loss_self_iou": 0.093, "cardinality_error": 3.699, "loss_ce_0": 0.292, "loss_counter_0": 0.111, "loss_bbox_0": 0.066, "loss_giou_0": 0.176, "loss_self_iou_0": 0.095, "cardinality_error_0": 3.699, "loss_caption_0": 2.849, "loss_caption": 2.847, "total_loss": 14.064}, "102000": {"loss_ce": 0.292, "loss_counter": 0.116, "loss_bbox": 0.065, "loss_giou": 0.174, "loss_self_iou": 0.093, "cardinality_error": 3.695, "loss_ce_0": 0.293, "loss_counter_0": 0.117, "loss_bbox_0": 0.066, "loss_giou_0": 0.177, "loss_self_iou_0": 0.094, "cardinality_error_0": 3.695, "loss_caption_0": 2.85, "loss_caption": 2.848, "total_loss": 14.087}, "103000": {"loss_ce": 0.293, "loss_counter": 0.115, "loss_bbox": 0.066, "loss_giou": 0.173, "loss_self_iou": 0.093, "cardinality_error": 3.724, "loss_ce_0": 0.293, "loss_counter_0": 0.116, "loss_bbox_0": 0.067, "loss_giou_0": 0.178, "loss_self_iou_0": 0.094, "cardinality_error_0": 3.724, "loss_caption_0": 2.846, "loss_caption": 2.854, "total_loss": 14.092}, "104000": {"loss_ce": 0.289, "loss_counter": 0.113, "loss_bbox": 0.064, "loss_giou": 0.178, "loss_self_iou": 0.097, "cardinality_error": 3.736, "loss_ce_0": 0.29, "loss_counter_0": 0.112, "loss_bbox_0": 0.065, "loss_giou_0": 0.181, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.736, "loss_caption_0": 2.916, "loss_caption": 2.913, "total_loss": 14.362}, "105000": {"loss_ce": 0.288, "loss_counter": 0.117, "loss_bbox": 0.067, "loss_giou": 0.18, "loss_self_iou": 0.091, "cardinality_error": 3.736, "loss_ce_0": 0.29, "loss_counter_0": 0.116, "loss_bbox_0": 0.068, "loss_giou_0": 0.183, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.736, "loss_caption_0": 2.907, "loss_caption": 2.902, "total_loss": 14.342}, "106000": {"loss_ce": 0.292, "loss_counter": 0.113, "loss_bbox": 0.068, "loss_giou": 0.184, "loss_self_iou": 0.11, "cardinality_error": 3.775, "loss_ce_0": 0.293, "loss_counter_0": 0.112, "loss_bbox_0": 0.069, "loss_giou_0": 0.187, "loss_self_iou_0": 0.11, "cardinality_error_0": 3.775, "loss_caption_0": 2.876, "loss_caption": 2.875, "total_loss": 14.264}, "107000": {"loss_ce": 0.291, "loss_counter": 0.114, "loss_bbox": 0.069, "loss_giou": 0.178, "loss_self_iou": 0.099, "cardinality_error": 3.743, "loss_ce_0": 0.291, "loss_counter_0": 0.114, "loss_bbox_0": 0.07, "loss_giou_0": 0.183, "loss_self_iou_0": 0.101, "cardinality_error_0": 3.743, "loss_caption_0": 2.91, "loss_caption": 2.909, "total_loss": 14.358}, "108000": {"loss_ce": 0.295, "loss_counter": 0.118, "loss_bbox": 0.066, "loss_giou": 0.177, "loss_self_iou": 0.1, "cardinality_error": 3.81, "loss_ce_0": 0.296, "loss_counter_0": 0.117, "loss_bbox_0": 0.067, "loss_giou_0": 0.181, "loss_self_iou_0": 0.1, "cardinality_error_0": 3.81, "loss_caption_0": 2.928, "loss_caption": 2.93, "total_loss": 14.446}, "109000": {"loss_ce": 0.294, "loss_counter": 0.118, "loss_bbox": 0.063, "loss_giou": 0.178, "loss_self_iou": 0.091, "cardinality_error": 3.78, "loss_ce_0": 0.296, "loss_counter_0": 0.117, "loss_bbox_0": 0.065, "loss_giou_0": 0.182, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.78, "loss_caption_0": 2.916, "loss_caption": 2.912, "total_loss": 14.396}, "110000": {"loss_ce": 0.297, "loss_counter": 0.113, "loss_bbox": 0.064, "loss_giou": 0.178, "loss_self_iou": 0.087, "cardinality_error": 3.72, "loss_ce_0": 0.297, "loss_counter_0": 0.113, "loss_bbox_0": 0.065, "loss_giou_0": 0.184, "loss_self_iou_0": 0.088, "cardinality_error_0": 3.72, "loss_caption_0": 2.948, "loss_caption": 2.948, "total_loss": 14.539}, "111000": {"loss_ce": 0.286, "loss_counter": 0.114, "loss_bbox": 0.066, "loss_giou": 0.173, "loss_self_iou": 0.095, "cardinality_error": 3.718, "loss_ce_0": 0.287, "loss_counter_0": 0.113, "loss_bbox_0": 0.068, "loss_giou_0": 0.179, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.718, "loss_caption_0": 2.867, "loss_caption": 2.869, "total_loss": 14.14}, "112000": {"loss_ce": 0.287, "loss_counter": 0.111, "loss_bbox": 0.064, "loss_giou": 0.169, "loss_self_iou": 0.098, "cardinality_error": 3.725, "loss_ce_0": 0.289, "loss_counter_0": 0.111, "loss_bbox_0": 0.065, "loss_giou_0": 0.176, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.725, "loss_caption_0": 2.844, "loss_caption": 2.842, "total_loss": 14.015}, "113000": {"loss_ce": 0.284, "loss_counter": 0.111, "loss_bbox": 0.064, "loss_giou": 0.172, "loss_self_iou": 0.097, "cardinality_error": 3.734, "loss_ce_0": 0.286, "loss_counter_0": 0.111, "loss_bbox_0": 0.065, "loss_giou_0": 0.176, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.734, "loss_caption_0": 2.837, "loss_caption": 2.834, "total_loss": 13.981}, "114000": {"loss_ce": 0.283, "loss_counter": 0.112, "loss_bbox": 0.064, "loss_giou": 0.174, "loss_self_iou": 0.096, "cardinality_error": 3.739, "loss_ce_0": 0.285, "loss_counter_0": 0.111, "loss_bbox_0": 0.065, "loss_giou_0": 0.18, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.739, "loss_caption_0": 2.855, "loss_caption": 2.857, "total_loss": 14.084}, "115000": {"loss_ce": 0.284, "loss_counter": 0.111, "loss_bbox": 0.064, "loss_giou": 0.175, "loss_self_iou": 0.092, "cardinality_error": 3.74, "loss_ce_0": 0.284, "loss_counter_0": 0.111, "loss_bbox_0": 0.066, "loss_giou_0": 0.18, "loss_self_iou_0": 0.093, "cardinality_error_0": 3.74, "loss_caption_0": 2.823, "loss_caption": 2.824, "total_loss": 13.959}, "116000": {"loss_ce": 0.286, "loss_counter": 0.113, "loss_bbox": 0.065, "loss_giou": 0.177, "loss_self_iou": 0.088, "cardinality_error": 3.753, "loss_ce_0": 0.288, "loss_counter_0": 0.113, "loss_bbox_0": 0.066, "loss_giou_0": 0.181, "loss_self_iou_0": 0.088, "cardinality_error_0": 3.753, "loss_caption_0": 2.846, "loss_caption": 2.843, "total_loss": 14.073}, "117000": {"loss_ce": 0.285, "loss_counter": 0.113, "loss_bbox": 0.064, "loss_giou": 0.174, "loss_self_iou": 0.096, "cardinality_error": 3.755, "loss_ce_0": 0.287, "loss_counter_0": 0.113, "loss_bbox_0": 0.064, "loss_giou_0": 0.179, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.755, "loss_caption_0": 2.804, "loss_caption": 2.81, "total_loss": 13.896}, "118000": {"loss_ce": 0.284, "loss_counter": 0.109, "loss_bbox": 0.066, "loss_giou": 0.175, "loss_self_iou": 0.093, "cardinality_error": 3.715, "loss_ce_0": 0.285, "loss_counter_0": 0.108, "loss_bbox_0": 0.068, "loss_giou_0": 0.181, "loss_self_iou_0": 0.094, "cardinality_error_0": 3.715, "loss_caption_0": 2.863, "loss_caption": 2.866, "total_loss": 14.129}, "119000": {"loss_ce": 0.286, "loss_counter": 0.114, "loss_bbox": 0.064, "loss_giou": 0.176, "loss_self_iou": 0.098, "cardinality_error": 3.735, "loss_ce_0": 0.287, "loss_counter_0": 0.114, "loss_bbox_0": 0.066, "loss_giou_0": 0.181, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.735, "loss_caption_0": 2.844, "loss_caption": 2.843, "total_loss": 14.061}, "120000": {"loss_ce": 0.284, "loss_counter": 0.113, "loss_bbox": 0.065, "loss_giou": 0.175, "loss_self_iou": 0.101, "cardinality_error": 3.755, "loss_ce_0": 0.285, "loss_counter_0": 0.113, "loss_bbox_0": 0.068, "loss_giou_0": 0.181, "loss_self_iou_0": 0.102, "cardinality_error_0": 3.755, "loss_caption_0": 2.868, "loss_caption": 2.878, "total_loss": 14.168}, "121000": {"loss_ce": 0.283, "loss_counter": 0.108, "loss_bbox": 0.063, "loss_giou": 0.166, "loss_self_iou": 0.095, "cardinality_error": 3.691, "loss_ce_0": 0.284, "loss_counter_0": 0.108, "loss_bbox_0": 0.066, "loss_giou_0": 0.174, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.691, "loss_caption_0": 2.809, "loss_caption": 2.808, "total_loss": 13.835}, "122000": {"loss_ce": 0.28, "loss_counter": 0.109, "loss_bbox": 0.064, "loss_giou": 0.17, "loss_self_iou": 0.093, "cardinality_error": 3.706, "loss_ce_0": 0.281, "loss_counter_0": 0.108, "loss_bbox_0": 0.066, "loss_giou_0": 0.177, "loss_self_iou_0": 0.093, "cardinality_error_0": 3.706, "loss_caption_0": 2.811, "loss_caption": 2.814, "total_loss": 13.867}, "123000": {"loss_ce": 0.28, "loss_counter": 0.109, "loss_bbox": 0.066, "loss_giou": 0.172, "loss_self_iou": 0.097, "cardinality_error": 3.691, "loss_ce_0": 0.281, "loss_counter_0": 0.11, "loss_bbox_0": 0.067, "loss_giou_0": 0.179, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.691, "loss_caption_0": 2.789, "loss_caption": 2.797, "total_loss": 13.808}, "124000": {"loss_ce": 0.282, "loss_counter": 0.112, "loss_bbox": 0.063, "loss_giou": 0.17, "loss_self_iou": 0.092, "cardinality_error": 3.76, "loss_ce_0": 0.281, "loss_counter_0": 0.112, "loss_bbox_0": 0.065, "loss_giou_0": 0.176, "loss_self_iou_0": 0.093, "cardinality_error_0": 3.76, "loss_caption_0": 2.839, "loss_caption": 2.842, "total_loss": 13.984}, "125000": {"loss_ce": 0.281, "loss_counter": 0.112, "loss_bbox": 0.064, "loss_giou": 0.174, "loss_self_iou": 0.097, "cardinality_error": 3.763, "loss_ce_0": 0.282, "loss_counter_0": 0.112, "loss_bbox_0": 0.066, "loss_giou_0": 0.179, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.763, "loss_caption_0": 2.81, "loss_caption": 2.815, "total_loss": 13.898}, "126000": {"loss_ce": 0.282, "loss_counter": 0.112, "loss_bbox": 0.064, "loss_giou": 0.177, "loss_self_iou": 0.095, "cardinality_error": 3.717, "loss_ce_0": 0.283, "loss_counter_0": 0.112, "loss_bbox_0": 0.066, "loss_giou_0": 0.183, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.717, "loss_caption_0": 2.789, "loss_caption": 2.787, "total_loss": 13.835}, "127000": {"loss_ce": 0.277, "loss_counter": 0.112, "loss_bbox": 0.064, "loss_giou": 0.172, "loss_self_iou": 0.097, "cardinality_error": 3.764, "loss_ce_0": 0.277, "loss_counter_0": 0.112, "loss_bbox_0": 0.065, "loss_giou_0": 0.178, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.764, "loss_caption_0": 2.867, "loss_caption": 2.871, "total_loss": 14.097}, "128000": {"loss_ce": 0.281, "loss_counter": 0.113, "loss_bbox": 0.063, "loss_giou": 0.173, "loss_self_iou": 0.092, "cardinality_error": 3.793, "loss_ce_0": 0.283, "loss_counter_0": 0.112, "loss_bbox_0": 0.064, "loss_giou_0": 0.179, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.793, "loss_caption_0": 2.868, "loss_caption": 2.863, "total_loss": 14.111}, "129000": {"loss_ce": 0.279, "loss_counter": 0.106, "loss_bbox": 0.066, "loss_giou": 0.175, "loss_self_iou": 0.1, "cardinality_error": 3.686, "loss_ce_0": 0.283, "loss_counter_0": 0.105, "loss_bbox_0": 0.068, "loss_giou_0": 0.181, "loss_self_iou_0": 0.101, "cardinality_error_0": 3.686, "loss_caption_0": 2.812, "loss_caption": 2.813, "total_loss": 13.903}, "130000": {"loss_ce": 0.283, "loss_counter": 0.111, "loss_bbox": 0.065, "loss_giou": 0.174, "loss_self_iou": 0.097, "cardinality_error": 3.772, "loss_ce_0": 0.286, "loss_counter_0": 0.111, "loss_bbox_0": 0.066, "loss_giou_0": 0.179, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.772, "loss_caption_0": 2.86, "loss_caption": 2.861, "total_loss": 14.105}, "131000": {"loss_ce": 0.277, "loss_counter": 0.107, "loss_bbox": 0.062, "loss_giou": 0.17, "loss_self_iou": 0.092, "cardinality_error": 3.75, "loss_ce_0": 0.279, "loss_counter_0": 0.107, "loss_bbox_0": 0.064, "loss_giou_0": 0.178, "loss_self_iou_0": 0.093, "cardinality_error_0": 3.75, "loss_caption_0": 2.817, "loss_caption": 2.826, "total_loss": 13.897}, "132000": {"loss_ce": 0.271, "loss_counter": 0.109, "loss_bbox": 0.065, "loss_giou": 0.174, "loss_self_iou": 0.089, "cardinality_error": 3.814, "loss_ce_0": 0.274, "loss_counter_0": 0.109, "loss_bbox_0": 0.066, "loss_giou_0": 0.181, "loss_self_iou_0": 0.09, "cardinality_error_0": 3.814, "loss_caption_0": 2.778, "loss_caption": 2.776, "total_loss": 13.726}, "133000": {"loss_ce": 0.277, "loss_counter": 0.113, "loss_bbox": 0.064, "loss_giou": 0.172, "loss_self_iou": 0.095, "cardinality_error": 3.773, "loss_ce_0": 0.277, "loss_counter_0": 0.112, "loss_bbox_0": 0.066, "loss_giou_0": 0.179, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.773, "loss_caption_0": 2.843, "loss_caption": 2.843, "total_loss": 13.999}, "134000": {"loss_ce": 0.273, "loss_counter": 0.108, "loss_bbox": 0.065, "loss_giou": 0.171, "loss_self_iou": 0.101, "cardinality_error": 3.743, "loss_ce_0": 0.276, "loss_counter_0": 0.107, "loss_bbox_0": 0.067, "loss_giou_0": 0.179, "loss_self_iou_0": 0.101, "cardinality_error_0": 3.743, "loss_caption_0": 2.786, "loss_caption": 2.787, "total_loss": 13.756}, "135000": {"loss_ce": 0.28, "loss_counter": 0.115, "loss_bbox": 0.061, "loss_giou": 0.168, "loss_self_iou": 0.096, "cardinality_error": 3.794, "loss_ce_0": 0.281, "loss_counter_0": 0.115, "loss_bbox_0": 0.064, "loss_giou_0": 0.177, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.794, "loss_caption_0": 2.785, "loss_caption": 2.784, "total_loss": 13.759}, "136000": {"loss_ce": 0.279, "loss_counter": 0.106, "loss_bbox": 0.065, "loss_giou": 0.168, "loss_self_iou": 0.092, "cardinality_error": 3.653, "loss_ce_0": 0.279, "loss_counter_0": 0.105, "loss_bbox_0": 0.067, "loss_giou_0": 0.175, "loss_self_iou_0": 0.093, "cardinality_error_0": 3.653, "loss_caption_0": 2.828, "loss_caption": 2.834, "total_loss": 13.919}, "137000": {"loss_ce": 0.279, "loss_counter": 0.105, "loss_bbox": 0.065, "loss_giou": 0.173, "loss_self_iou": 0.099, "cardinality_error": 3.654, "loss_ce_0": 0.281, "loss_counter_0": 0.105, "loss_bbox_0": 0.067, "loss_giou_0": 0.179, "loss_self_iou_0": 0.1, "cardinality_error_0": 3.654, "loss_caption_0": 2.79, "loss_caption": 2.799, "total_loss": 13.806}, "138000": {"loss_ce": 0.278, "loss_counter": 0.109, "loss_bbox": 0.064, "loss_giou": 0.171, "loss_self_iou": 0.095, "cardinality_error": 3.714, "loss_ce_0": 0.28, "loss_counter_0": 0.108, "loss_bbox_0": 0.065, "loss_giou_0": 0.178, "loss_self_iou_0": 0.095, "cardinality_error_0": 3.714, "loss_caption_0": 2.835, "loss_caption": 2.828, "total_loss": 13.945}, "139000": {"loss_ce": 0.281, "loss_counter": 0.115, "loss_bbox": 0.062, "loss_giou": 0.167, "loss_self_iou": 0.098, "cardinality_error": 3.813, "loss_ce_0": 0.283, "loss_counter_0": 0.114, "loss_bbox_0": 0.064, "loss_giou_0": 0.175, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.813, "loss_caption_0": 2.83, "loss_caption": 2.828, "total_loss": 13.924}, "140000": {"loss_ce": 0.277, "loss_counter": 0.107, "loss_bbox": 0.063, "loss_giou": 0.171, "loss_self_iou": 0.09, "cardinality_error": 3.664, "loss_ce_0": 0.28, "loss_counter_0": 0.107, "loss_bbox_0": 0.064, "loss_giou_0": 0.178, "loss_self_iou_0": 0.091, "cardinality_error_0": 3.664, "loss_caption_0": 2.821, "loss_caption": 2.823, "total_loss": 13.905}, "141000": {"loss_ce": 0.268, "loss_counter": 0.108, "loss_bbox": 0.066, "loss_giou": 0.171, "loss_self_iou": 0.106, "cardinality_error": 3.774, "loss_ce_0": 0.27, "loss_counter_0": 0.108, "loss_bbox_0": 0.067, "loss_giou_0": 0.177, "loss_self_iou_0": 0.108, "cardinality_error_0": 3.774, "loss_caption_0": 2.75, "loss_caption": 2.748, "total_loss": 13.572}, "142000": {"loss_ce": 0.27, "loss_counter": 0.109, "loss_bbox": 0.062, "loss_giou": 0.173, "loss_self_iou": 0.091, "cardinality_error": 3.797, "loss_ce_0": 0.272, "loss_counter_0": 0.108, "loss_bbox_0": 0.065, "loss_giou_0": 0.181, "loss_self_iou_0": 0.091, "cardinality_error_0": 3.797, "loss_caption_0": 2.72, "loss_caption": 2.722, "total_loss": 13.492}, "143000": {"loss_ce": 0.265, "loss_counter": 0.1, "loss_bbox": 0.063, "loss_giou": 0.162, "loss_self_iou": 0.095, "cardinality_error": 3.637, "loss_ce_0": 0.268, "loss_counter_0": 0.1, "loss_bbox_0": 0.066, "loss_giou_0": 0.171, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.637, "loss_caption_0": 2.782, "loss_caption": 2.782, "total_loss": 13.626}, "144000": {"loss_ce": 0.27, "loss_counter": 0.112, "loss_bbox": 0.062, "loss_giou": 0.172, "loss_self_iou": 0.094, "cardinality_error": 3.831, "loss_ce_0": 0.273, "loss_counter_0": 0.112, "loss_bbox_0": 0.064, "loss_giou_0": 0.18, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.831, "loss_caption_0": 2.793, "loss_caption": 2.79, "total_loss": 13.773}, "145000": {"loss_ce": 0.269, "loss_counter": 0.101, "loss_bbox": 0.061, "loss_giou": 0.16, "loss_self_iou": 0.093, "cardinality_error": 3.665, "loss_ce_0": 0.273, "loss_counter_0": 0.101, "loss_bbox_0": 0.063, "loss_giou_0": 0.168, "loss_self_iou_0": 0.093, "cardinality_error_0": 3.665, "loss_caption_0": 2.762, "loss_caption": 2.767, "total_loss": 13.554}, "146000": {"loss_ce": 0.275, "loss_counter": 0.109, "loss_bbox": 0.061, "loss_giou": 0.164, "loss_self_iou": 0.091, "cardinality_error": 3.725, "loss_ce_0": 0.276, "loss_counter_0": 0.109, "loss_bbox_0": 0.064, "loss_giou_0": 0.172, "loss_self_iou_0": 0.093, "cardinality_error_0": 3.725, "loss_caption_0": 2.813, "loss_caption": 2.813, "total_loss": 13.811}, "147000": {"loss_ce": 0.272, "loss_counter": 0.104, "loss_bbox": 0.063, "loss_giou": 0.171, "loss_self_iou": 0.097, "cardinality_error": 3.714, "loss_ce_0": 0.273, "loss_counter_0": 0.103, "loss_bbox_0": 0.065, "loss_giou_0": 0.179, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.714, "loss_caption_0": 2.747, "loss_caption": 2.745, "total_loss": 13.578}, "148000": {"loss_ce": 0.271, "loss_counter": 0.108, "loss_bbox": 0.063, "loss_giou": 0.168, "loss_self_iou": 0.096, "cardinality_error": 3.728, "loss_ce_0": 0.274, "loss_counter_0": 0.107, "loss_bbox_0": 0.066, "loss_giou_0": 0.177, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.728, "loss_caption_0": 2.843, "loss_caption": 2.84, "total_loss": 13.944}, "149000": {"loss_ce": 0.269, "loss_counter": 0.108, "loss_bbox": 0.066, "loss_giou": 0.169, "loss_self_iou": 0.098, "cardinality_error": 3.799, "loss_ce_0": 0.273, "loss_counter_0": 0.109, "loss_bbox_0": 0.068, "loss_giou_0": 0.178, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.799, "loss_caption_0": 2.836, "loss_caption": 2.836, "total_loss": 13.926}, "150000": {"loss_ce": 0.27, "loss_counter": 0.107, "loss_bbox": 0.063, "loss_giou": 0.169, "loss_self_iou": 0.087, "cardinality_error": 3.703, "loss_ce_0": 0.272, "loss_counter_0": 0.107, "loss_bbox_0": 0.066, "loss_giou_0": 0.176, "loss_self_iou_0": 0.088, "cardinality_error_0": 3.703, "loss_caption_0": 2.806, "loss_caption": 2.806, "total_loss": 13.795}, "151000": {"loss_ce": 0.264, "loss_counter": 0.101, "loss_bbox": 0.063, "loss_giou": 0.163, "loss_self_iou": 0.097, "cardinality_error": 3.645, "loss_ce_0": 0.266, "loss_counter_0": 0.101, "loss_bbox_0": 0.065, "loss_giou_0": 0.171, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.645, "loss_caption_0": 2.762, "loss_caption": 2.759, "total_loss": 13.537}, "152000": {"loss_ce": 0.265, "loss_counter": 0.103, "loss_bbox": 0.06, "loss_giou": 0.166, "loss_self_iou": 0.087, "cardinality_error": 3.722, "loss_ce_0": 0.269, "loss_counter_0": 0.103, "loss_bbox_0": 0.063, "loss_giou_0": 0.175, "loss_self_iou_0": 0.087, "cardinality_error_0": 3.722, "loss_caption_0": 2.762, "loss_caption": 2.766, "total_loss": 13.59}, "153000": {"loss_ce": 0.264, "loss_counter": 0.111, "loss_bbox": 0.062, "loss_giou": 0.168, "loss_self_iou": 0.083, "cardinality_error": 3.813, "loss_ce_0": 0.267, "loss_counter_0": 0.111, "loss_bbox_0": 0.064, "loss_giou_0": 0.177, "loss_self_iou_0": 0.085, "cardinality_error_0": 3.813, "loss_caption_0": 2.777, "loss_caption": 2.778, "total_loss": 13.663}, "154000": {"loss_ce": 0.268, "loss_counter": 0.106, "loss_bbox": 0.061, "loss_giou": 0.168, "loss_self_iou": 0.092, "cardinality_error": 3.769, "loss_ce_0": 0.272, "loss_counter_0": 0.105, "loss_bbox_0": 0.064, "loss_giou_0": 0.178, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.769, "loss_caption_0": 2.787, "loss_caption": 2.787, "total_loss": 13.717}, "155000": {"loss_ce": 0.264, "loss_counter": 0.104, "loss_bbox": 0.063, "loss_giou": 0.169, "loss_self_iou": 0.09, "cardinality_error": 3.714, "loss_ce_0": 0.267, "loss_counter_0": 0.104, "loss_bbox_0": 0.065, "loss_giou_0": 0.179, "loss_self_iou_0": 0.091, "cardinality_error_0": 3.714, "loss_caption_0": 2.758, "loss_caption": 2.76, "total_loss": 13.593}, "156000": {"loss_ce": 0.265, "loss_counter": 0.106, "loss_bbox": 0.064, "loss_giou": 0.167, "loss_self_iou": 0.102, "cardinality_error": 3.675, "loss_ce_0": 0.269, "loss_counter_0": 0.106, "loss_bbox_0": 0.066, "loss_giou_0": 0.174, "loss_self_iou_0": 0.102, "cardinality_error_0": 3.675, "loss_caption_0": 2.741, "loss_caption": 2.742, "total_loss": 13.504}, "157000": {"loss_ce": 0.267, "loss_counter": 0.104, "loss_bbox": 0.065, "loss_giou": 0.167, "loss_self_iou": 0.103, "cardinality_error": 3.722, "loss_ce_0": 0.268, "loss_counter_0": 0.104, "loss_bbox_0": 0.068, "loss_giou_0": 0.176, "loss_self_iou_0": 0.105, "cardinality_error_0": 3.722, "loss_caption_0": 2.777, "loss_caption": 2.783, "total_loss": 13.668}, "158000": {"loss_ce": 0.266, "loss_counter": 0.106, "loss_bbox": 0.062, "loss_giou": 0.164, "loss_self_iou": 0.099, "cardinality_error": 3.758, "loss_ce_0": 0.27, "loss_counter_0": 0.106, "loss_bbox_0": 0.065, "loss_giou_0": 0.173, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.758, "loss_caption_0": 2.815, "loss_caption": 2.817, "total_loss": 13.789}, "159000": {"loss_ce": 0.272, "loss_counter": 0.108, "loss_bbox": 0.062, "loss_giou": 0.169, "loss_self_iou": 0.098, "cardinality_error": 3.729, "loss_ce_0": 0.275, "loss_counter_0": 0.108, "loss_bbox_0": 0.065, "loss_giou_0": 0.177, "loss_self_iou_0": 0.1, "cardinality_error_0": 3.729, "loss_caption_0": 2.783, "loss_caption": 2.785, "total_loss": 13.721}, "160000": {"loss_ce": 0.269, "loss_counter": 0.109, "loss_bbox": 0.063, "loss_giou": 0.166, "loss_self_iou": 0.098, "cardinality_error": 3.816, "loss_ce_0": 0.271, "loss_counter_0": 0.109, "loss_bbox_0": 0.066, "loss_giou_0": 0.176, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.816, "loss_caption_0": 2.78, "loss_caption": 2.784, "total_loss": 13.686}, "161000": {"loss_ce": 0.26, "loss_counter": 0.103, "loss_bbox": 0.061, "loss_giou": 0.163, "loss_self_iou": 0.097, "cardinality_error": 3.695, "loss_ce_0": 0.263, "loss_counter_0": 0.103, "loss_bbox_0": 0.063, "loss_giou_0": 0.171, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.695, "loss_caption_0": 2.766, "loss_caption": 2.768, "total_loss": 13.553}, "162000": {"loss_ce": 0.262, "loss_counter": 0.103, "loss_bbox": 0.063, "loss_giou": 0.164, "loss_self_iou": 0.091, "cardinality_error": 3.694, "loss_ce_0": 0.266, "loss_counter_0": 0.103, "loss_bbox_0": 0.065, "loss_giou_0": 0.174, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.694, "loss_caption_0": 2.768, "loss_caption": 2.764, "total_loss": 13.573}, "163000": {"loss_ce": 0.262, "loss_counter": 0.105, "loss_bbox": 0.064, "loss_giou": 0.173, "loss_self_iou": 0.097, "cardinality_error": 3.769, "loss_ce_0": 0.266, "loss_counter_0": 0.104, "loss_bbox_0": 0.065, "loss_giou_0": 0.179, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.769, "loss_caption_0": 2.765, "loss_caption": 2.766, "total_loss": 13.63}, "164000": {"loss_ce": 0.265, "loss_counter": 0.11, "loss_bbox": 0.061, "loss_giou": 0.164, "loss_self_iou": 0.092, "cardinality_error": 3.774, "loss_ce_0": 0.269, "loss_counter_0": 0.11, "loss_bbox_0": 0.063, "loss_giou_0": 0.173, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.774, "loss_caption_0": 2.772, "loss_caption": 2.776, "total_loss": 13.625}, "165000": {"loss_ce": 0.264, "loss_counter": 0.102, "loss_bbox": 0.063, "loss_giou": 0.164, "loss_self_iou": 0.092, "cardinality_error": 3.699, "loss_ce_0": 0.267, "loss_counter_0": 0.102, "loss_bbox_0": 0.065, "loss_giou_0": 0.173, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.699, "loss_caption_0": 2.711, "loss_caption": 2.716, "total_loss": 13.368}, "166000": {"loss_ce": 0.264, "loss_counter": 0.105, "loss_bbox": 0.061, "loss_giou": 0.163, "loss_self_iou": 0.094, "cardinality_error": 3.72, "loss_ce_0": 0.268, "loss_counter_0": 0.105, "loss_bbox_0": 0.063, "loss_giou_0": 0.174, "loss_self_iou_0": 0.095, "cardinality_error_0": 3.72, "loss_caption_0": 2.754, "loss_caption": 2.755, "total_loss": 13.534}, "167000": {"loss_ce": 0.261, "loss_counter": 0.101, "loss_bbox": 0.062, "loss_giou": 0.168, "loss_self_iou": 0.095, "cardinality_error": 3.712, "loss_ce_0": 0.266, "loss_counter_0": 0.1, "loss_bbox_0": 0.065, "loss_giou_0": 0.176, "loss_self_iou_0": 0.095, "cardinality_error_0": 3.712, "loss_caption_0": 2.771, "loss_caption": 2.772, "total_loss": 13.617}, "168000": {"loss_ce": 0.265, "loss_counter": 0.108, "loss_bbox": 0.062, "loss_giou": 0.168, "loss_self_iou": 0.09, "cardinality_error": 3.816, "loss_ce_0": 0.269, "loss_counter_0": 0.107, "loss_bbox_0": 0.064, "loss_giou_0": 0.177, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.816, "loss_caption_0": 2.814, "loss_caption": 2.82, "total_loss": 13.826}, "169000": {"loss_ce": 0.258, "loss_counter": 0.106, "loss_bbox": 0.064, "loss_giou": 0.166, "loss_self_iou": 0.106, "cardinality_error": 3.697, "loss_ce_0": 0.261, "loss_counter_0": 0.106, "loss_bbox_0": 0.067, "loss_giou_0": 0.176, "loss_self_iou_0": 0.107, "cardinality_error_0": 3.697, "loss_caption_0": 2.769, "loss_caption": 2.775, "total_loss": 13.598}, "170000": {"loss_ce": 0.268, "loss_counter": 0.105, "loss_bbox": 0.062, "loss_giou": 0.165, "loss_self_iou": 0.093, "cardinality_error": 3.799, "loss_ce_0": 0.272, "loss_counter_0": 0.106, "loss_bbox_0": 0.064, "loss_giou_0": 0.174, "loss_self_iou_0": 0.094, "cardinality_error_0": 3.799, "loss_caption_0": 2.794, "loss_caption": 2.798, "total_loss": 13.727}, "171000": {"loss_ce": 0.256, "loss_counter": 0.101, "loss_bbox": 0.062, "loss_giou": 0.161, "loss_self_iou": 0.094, "cardinality_error": 3.694, "loss_ce_0": 0.261, "loss_counter_0": 0.101, "loss_bbox_0": 0.064, "loss_giou_0": 0.169, "loss_self_iou_0": 0.093, "cardinality_error_0": 3.694, "loss_caption_0": 2.772, "loss_caption": 2.77, "total_loss": 13.544}, "172000": {"loss_ce": 0.258, "loss_counter": 0.1, "loss_bbox": 0.063, "loss_giou": 0.165, "loss_self_iou": 0.096, "cardinality_error": 3.667, "loss_ce_0": 0.262, "loss_counter_0": 0.1, "loss_bbox_0": 0.066, "loss_giou_0": 0.175, "loss_self_iou_0": 0.095, "cardinality_error_0": 3.667, "loss_caption_0": 2.741, "loss_caption": 2.743, "total_loss": 13.47}, "173000": {"loss_ce": 0.258, "loss_counter": 0.104, "loss_bbox": 0.062, "loss_giou": 0.165, "loss_self_iou": 0.09, "cardinality_error": 3.753, "loss_ce_0": 0.261, "loss_counter_0": 0.104, "loss_bbox_0": 0.064, "loss_giou_0": 0.175, "loss_self_iou_0": 0.091, "cardinality_error_0": 3.753, "loss_caption_0": 2.786, "loss_caption": 2.785, "total_loss": 13.646}, "174000": {"loss_ce": 0.259, "loss_counter": 0.107, "loss_bbox": 0.06, "loss_giou": 0.166, "loss_self_iou": 0.094, "cardinality_error": 3.832, "loss_ce_0": 0.261, "loss_counter_0": 0.107, "loss_bbox_0": 0.064, "loss_giou_0": 0.177, "loss_self_iou_0": 0.096, "cardinality_error_0": 3.832, "loss_caption_0": 2.733, "loss_caption": 2.738, "total_loss": 13.457}, "175000": {"loss_ce": 0.255, "loss_counter": 0.103, "loss_bbox": 0.06, "loss_giou": 0.163, "loss_self_iou": 0.098, "cardinality_error": 3.731, "loss_ce_0": 0.259, "loss_counter_0": 0.103, "loss_bbox_0": 0.062, "loss_giou_0": 0.173, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.731, "loss_caption_0": 2.745, "loss_caption": 2.744, "total_loss": 13.454}, "176000": {"loss_ce": 0.261, "loss_counter": 0.103, "loss_bbox": 0.06, "loss_giou": 0.164, "loss_self_iou": 0.095, "cardinality_error": 3.795, "loss_ce_0": 0.264, "loss_counter_0": 0.103, "loss_bbox_0": 0.064, "loss_giou_0": 0.176, "loss_self_iou_0": 0.095, "cardinality_error_0": 3.795, "loss_caption_0": 2.761, "loss_caption": 2.77, "total_loss": 13.575}, "177000": {"loss_ce": 0.255, "loss_counter": 0.1, "loss_bbox": 0.063, "loss_giou": 0.161, "loss_self_iou": 0.096, "cardinality_error": 3.652, "loss_ce_0": 0.261, "loss_counter_0": 0.1, "loss_bbox_0": 0.065, "loss_giou_0": 0.169, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.652, "loss_caption_0": 2.743, "loss_caption": 2.745, "total_loss": 13.43}, "178000": {"loss_ce": 0.255, "loss_counter": 0.103, "loss_bbox": 0.063, "loss_giou": 0.164, "loss_self_iou": 0.103, "cardinality_error": 3.664, "loss_ce_0": 0.26, "loss_counter_0": 0.102, "loss_bbox_0": 0.066, "loss_giou_0": 0.175, "loss_self_iou_0": 0.104, "cardinality_error_0": 3.664, "loss_caption_0": 2.682, "loss_caption": 2.68, "total_loss": 13.211}, "179000": {"loss_ce": 0.261, "loss_counter": 0.105, "loss_bbox": 0.06, "loss_giou": 0.164, "loss_self_iou": 0.09, "cardinality_error": 3.825, "loss_ce_0": 0.266, "loss_counter_0": 0.104, "loss_bbox_0": 0.063, "loss_giou_0": 0.173, "loss_self_iou_0": 0.091, "cardinality_error_0": 3.825, "loss_caption_0": 2.788, "loss_caption": 2.796, "total_loss": 13.671}, "180000": {"loss_ce": 0.255, "loss_counter": 0.102, "loss_bbox": 0.064, "loss_giou": 0.166, "loss_self_iou": 0.093, "cardinality_error": 3.729, "loss_ce_0": 0.261, "loss_counter_0": 0.102, "loss_bbox_0": 0.066, "loss_giou_0": 0.175, "loss_self_iou_0": 0.094, "cardinality_error_0": 3.729, "loss_caption_0": 2.781, "loss_caption": 2.775, "total_loss": 13.608}, "181000": {"loss_ce": 0.256, "loss_counter": 0.102, "loss_bbox": 0.061, "loss_giou": 0.163, "loss_self_iou": 0.094, "cardinality_error": 3.781, "loss_ce_0": 0.261, "loss_counter_0": 0.102, "loss_bbox_0": 0.063, "loss_giou_0": 0.172, "loss_self_iou_0": 0.094, "cardinality_error_0": 3.781, "loss_caption_0": 2.743, "loss_caption": 2.746, "total_loss": 13.452}, "182000": {"loss_ce": 0.255, "loss_counter": 0.101, "loss_bbox": 0.062, "loss_giou": 0.164, "loss_self_iou": 0.1, "cardinality_error": 3.726, "loss_ce_0": 0.26, "loss_counter_0": 0.101, "loss_bbox_0": 0.065, "loss_giou_0": 0.174, "loss_self_iou_0": 0.1, "cardinality_error_0": 3.726, "loss_caption_0": 2.748, "loss_caption": 2.746, "total_loss": 13.472}, "183000": {"loss_ce": 0.256, "loss_counter": 0.102, "loss_bbox": 0.061, "loss_giou": 0.163, "loss_self_iou": 0.097, "cardinality_error": 3.722, "loss_ce_0": 0.26, "loss_counter_0": 0.102, "loss_bbox_0": 0.065, "loss_giou_0": 0.174, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.722, "loss_caption_0": 2.729, "loss_caption": 2.734, "total_loss": 13.405}, "184000": {"loss_ce": 0.253, "loss_counter": 0.104, "loss_bbox": 0.061, "loss_giou": 0.161, "loss_self_iou": 0.098, "cardinality_error": 3.726, "loss_ce_0": 0.257, "loss_counter_0": 0.104, "loss_bbox_0": 0.064, "loss_giou_0": 0.17, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.726, "loss_caption_0": 2.783, "loss_caption": 2.787, "total_loss": 13.591}, "185000": {"loss_ce": 0.255, "loss_counter": 0.098, "loss_bbox": 0.063, "loss_giou": 0.165, "loss_self_iou": 0.087, "cardinality_error": 3.667, "loss_ce_0": 0.26, "loss_counter_0": 0.098, "loss_bbox_0": 0.065, "loss_giou_0": 0.175, "loss_self_iou_0": 0.088, "cardinality_error_0": 3.667, "loss_caption_0": 2.718, "loss_caption": 2.716, "total_loss": 13.354}, "186000": {"loss_ce": 0.254, "loss_counter": 0.099, "loss_bbox": 0.062, "loss_giou": 0.166, "loss_self_iou": 0.093, "cardinality_error": 3.776, "loss_ce_0": 0.259, "loss_counter_0": 0.099, "loss_bbox_0": 0.065, "loss_giou_0": 0.176, "loss_self_iou_0": 0.094, "cardinality_error_0": 3.776, "loss_caption_0": 2.75, "loss_caption": 2.75, "total_loss": 13.494}, "187000": {"loss_ce": 0.258, "loss_counter": 0.109, "loss_bbox": 0.062, "loss_giou": 0.165, "loss_self_iou": 0.089, "cardinality_error": 3.803, "loss_ce_0": 0.264, "loss_counter_0": 0.109, "loss_bbox_0": 0.065, "loss_giou_0": 0.176, "loss_self_iou_0": 0.09, "cardinality_error_0": 3.803, "loss_caption_0": 2.788, "loss_caption": 2.791, "total_loss": 13.678}, "188000": {"loss_ce": 0.253, "loss_counter": 0.1, "loss_bbox": 0.062, "loss_giou": 0.163, "loss_self_iou": 0.091, "cardinality_error": 3.71, "loss_ce_0": 0.259, "loss_counter_0": 0.1, "loss_bbox_0": 0.065, "loss_giou_0": 0.173, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.71, "loss_caption_0": 2.745, "loss_caption": 2.743, "total_loss": 13.444}, "189000": {"loss_ce": 0.25, "loss_counter": 0.105, "loss_bbox": 0.064, "loss_giou": 0.165, "loss_self_iou": 0.1, "cardinality_error": 3.748, "loss_ce_0": 0.256, "loss_counter_0": 0.105, "loss_bbox_0": 0.067, "loss_giou_0": 0.175, "loss_self_iou_0": 0.101, "cardinality_error_0": 3.748, "loss_caption_0": 2.751, "loss_caption": 2.753, "total_loss": 13.484}, "190000": {"loss_ce": 0.257, "loss_counter": 0.104, "loss_bbox": 0.06, "loss_giou": 0.161, "loss_self_iou": 0.098, "cardinality_error": 3.742, "loss_ce_0": 0.264, "loss_counter_0": 0.104, "loss_bbox_0": 0.063, "loss_giou_0": 0.172, "loss_self_iou_0": 0.099, "cardinality_error_0": 3.742, "loss_caption_0": 2.729, "loss_caption": 2.73, "total_loss": 13.395}, "191000": {"loss_ce": 0.251, "loss_counter": 0.099, "loss_bbox": 0.062, "loss_giou": 0.167, "loss_self_iou": 0.086, "cardinality_error": 3.653, "loss_ce_0": 0.257, "loss_counter_0": 0.099, "loss_bbox_0": 0.065, "loss_giou_0": 0.176, "loss_self_iou_0": 0.087, "cardinality_error_0": 3.653, "loss_caption_0": 2.754, "loss_caption": 2.752, "total_loss": 13.501}, "192000": {"loss_ce": 0.252, "loss_counter": 0.1, "loss_bbox": 0.061, "loss_giou": 0.164, "loss_self_iou": 0.094, "cardinality_error": 3.767, "loss_ce_0": 0.258, "loss_counter_0": 0.1, "loss_bbox_0": 0.065, "loss_giou_0": 0.173, "loss_self_iou_0": 0.097, "cardinality_error_0": 3.767, "loss_caption_0": 2.717, "loss_caption": 2.72, "total_loss": 13.343}, "193000": {"loss_ce": 0.25, "loss_counter": 0.106, "loss_bbox": 0.06, "loss_giou": 0.164, "loss_self_iou": 0.093, "cardinality_error": 3.847, "loss_ce_0": 0.256, "loss_counter_0": 0.105, "loss_bbox_0": 0.063, "loss_giou_0": 0.174, "loss_self_iou_0": 0.093, "cardinality_error_0": 3.847, "loss_caption_0": 2.754, "loss_caption": 2.759, "total_loss": 13.499}, "194000": {"loss_ce": 0.256, "loss_counter": 0.102, "loss_bbox": 0.061, "loss_giou": 0.165, "loss_self_iou": 0.097, "cardinality_error": 3.775, "loss_ce_0": 0.262, "loss_counter_0": 0.102, "loss_bbox_0": 0.063, "loss_giou_0": 0.176, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.775, "loss_caption_0": 2.769, "loss_caption": 2.772, "total_loss": 13.587}, "195000": {"loss_ce": 0.257, "loss_counter": 0.106, "loss_bbox": 0.062, "loss_giou": 0.165, "loss_self_iou": 0.089, "cardinality_error": 3.794, "loss_ce_0": 0.261, "loss_counter_0": 0.105, "loss_bbox_0": 0.064, "loss_giou_0": 0.175, "loss_self_iou_0": 0.089, "cardinality_error_0": 3.794, "loss_caption_0": 2.751, "loss_caption": 2.751, "total_loss": 13.506}, "196000": {"loss_ce": 0.251, "loss_counter": 0.095, "loss_bbox": 0.061, "loss_giou": 0.162, "loss_self_iou": 0.1, "cardinality_error": 3.652, "loss_ce_0": 0.258, "loss_counter_0": 0.095, "loss_bbox_0": 0.064, "loss_giou_0": 0.172, "loss_self_iou_0": 0.101, "cardinality_error_0": 3.652, "loss_caption_0": 2.743, "loss_caption": 2.735, "total_loss": 13.403}, "197000": {"loss_ce": 0.251, "loss_counter": 0.104, "loss_bbox": 0.061, "loss_giou": 0.162, "loss_self_iou": 0.091, "cardinality_error": 3.759, "loss_ce_0": 0.258, "loss_counter_0": 0.104, "loss_bbox_0": 0.064, "loss_giou_0": 0.171, "loss_self_iou_0": 0.091, "cardinality_error_0": 3.759, "loss_caption_0": 2.74, "loss_caption": 2.743, "total_loss": 13.418}, "198000": {"loss_ce": 0.249, "loss_counter": 0.098, "loss_bbox": 0.062, "loss_giou": 0.162, "loss_self_iou": 0.092, "cardinality_error": 3.664, "loss_ce_0": 0.255, "loss_counter_0": 0.098, "loss_bbox_0": 0.064, "loss_giou_0": 0.171, "loss_self_iou_0": 0.092, "cardinality_error_0": 3.664, "loss_caption_0": 2.718, "loss_caption": 2.72, "total_loss": 13.31}, "199000": {"loss_ce": 0.252, "loss_counter": 0.101, "loss_bbox": 0.062, "loss_giou": 0.162, "loss_self_iou": 0.101, "cardinality_error": 3.736, "loss_ce_0": 0.257, "loss_counter_0": 0.101, "loss_bbox_0": 0.065, "loss_giou_0": 0.173, "loss_self_iou_0": 0.102, "cardinality_error_0": 3.736, "loss_caption_0": 2.759, "loss_caption": 2.76, "total_loss": 13.502}, "200000": {"loss_ce": 0.253, "loss_counter": 0.102, "loss_bbox": 0.061, "loss_giou": 0.159, "loss_self_iou": 0.098, "cardinality_error": 3.701, "loss_ce_0": 0.259, "loss_counter_0": 0.102, "loss_bbox_0": 0.065, "loss_giou_0": 0.17, "loss_self_iou_0": 0.098, "cardinality_error_0": 3.701, "loss_caption_0": 2.766, "loss_caption": 2.771, "total_loss": 13.518}}, "lr_history": {"1000": 5e-05, "2000": 5e-05, "3000": 5e-05, "4000": 5e-05, "5000": 5e-05, "6000": 5e-05, "7000": 5e-05, "8000": 5e-05, "9000": 5e-05, "10000": 5e-05, "11000": 5e-05, "12000": 5e-05, "13000": 5e-05, "14000": 5e-05, "15000": 5e-05, "16000": 5e-05, "17000": 5e-05, "18000": 5e-05, "19000": 5e-05, "20000": 5e-05, "21000": 5e-05, "22000": 5e-05, "23000": 5e-05, "24000": 5e-05, "25000": 5e-05, "26000": 5e-05, "27000": 5e-05, "28000": 5e-05, "29000": 5e-05, "30000": 5e-05, "31000": 5e-05, "32000": 5e-05, "33000": 5e-05, "34000": 5e-05, "35000": 5e-05, "36000": 5e-05, "37000": 5e-05, "38000": 5e-05, "39000": 5e-05, "40000": 5e-05, "41000": 5e-05, "42000": 5e-05, "43000": 5e-05, "44000": 5e-05, "45000": 5e-05, "46000": 5e-05, "47000": 5e-05, "48000": 5e-05, "49000": 5e-05, "50000": 5e-05, "51000": 5e-05, "52000": 5e-05, "53000": 5e-05, "54000": 5e-05, "55000": 5e-05, "56000": 5e-05, "57000": 5e-05, "58000": 5e-05, "59000": 5e-05, "60000": 5e-05, "61000": 5e-05, "62000": 5e-05, "63000": 5e-05, "64000": 5e-05, "65000": 5e-05, "66000": 5e-05, "67000": 5e-05, "68000": 5e-05, "69000": 5e-05, "70000": 5e-05, "71000": 5e-05, "72000": 5e-05, "73000": 5e-05, "74000": 5e-05, "75000": 5e-05, "76000": 5e-05, "77000": 5e-05, "78000": 5e-05, "79000": 5e-05, "80000": 5e-05, "81000": 2.5e-05, "82000": 2.5e-05, "83000": 2.5e-05, "84000": 2.5e-05, "85000": 2.5e-05, "86000": 2.5e-05, "87000": 2.5e-05, "88000": 2.5e-05, "89000": 2.5e-05, "90000": 2.5e-05, "91000": 2.5e-05, "92000": 2.5e-05, "93000": 2.5e-05, "94000": 2.5e-05, "95000": 2.5e-05, "96000": 2.5e-05, "97000": 2.5e-05, "98000": 2.5e-05, "99000": 2.5e-05, "100000": 2.5e-05, "101000": 2.5e-05, "102000": 2.5e-05, "103000": 2.5e-05, "104000": 2.5e-05, "105000": 2.5e-05, "106000": 2.5e-05, "107000": 2.5e-05, "108000": 2.5e-05, "109000": 2.5e-05, "110000": 2.5e-05, "111000": 1.25e-05, "112000": 1.25e-05, "113000": 1.25e-05, "114000": 1.25e-05, "115000": 1.25e-05, "116000": 1.25e-05, "117000": 1.25e-05, "118000": 1.25e-05, "119000": 1.25e-05, "120000": 1.25e-05, "121000": 1.25e-05, "122000": 1.25e-05, "123000": 1.25e-05, "124000": 1.25e-05, "125000": 1.25e-05, "126000": 1.25e-05, "127000": 1.25e-05, "128000": 1.25e-05, "129000": 1.25e-05, "130000": 1.25e-05, "131000": 1.25e-05, "132000": 1.25e-05, "133000": 1.25e-05, "134000": 1.25e-05, "135000": 1.25e-05, "136000": 1.25e-05, "137000": 1.25e-05, "138000": 1.25e-05, "139000": 1.25e-05, "140000": 1.25e-05, "141000": 6.25e-06, "142000": 6.25e-06, "143000": 6.25e-06, "144000": 6.25e-06, "145000": 6.25e-06, "146000": 6.25e-06, "147000": 6.25e-06, "148000": 6.25e-06, "149000": 6.25e-06, "150000": 6.25e-06, "151000": 6.25e-06, "152000": 6.25e-06, "153000": 6.25e-06, "154000": 6.25e-06, "155000": 6.25e-06, "156000": 6.25e-06, "157000": 6.25e-06, "158000": 6.25e-06, "159000": 6.25e-06, "160000": 6.25e-06, "161000": 6.25e-06, "162000": 6.25e-06, "163000": 6.25e-06, "164000": 6.25e-06, "165000": 6.25e-06, "166000": 6.25e-06, "167000": 6.25e-06, "168000": 6.25e-06, "169000": 6.25e-06, "170000": 6.25e-06, "171000": 3.125e-06, "172000": 3.125e-06, "173000": 3.125e-06, "174000": 3.125e-06, "175000": 3.125e-06, "176000": 3.125e-06, "177000": 3.125e-06, "178000": 3.125e-06, "179000": 3.125e-06, "180000": 3.125e-06, "181000": 3.125e-06, "182000": 3.125e-06, "183000": 3.125e-06, "184000": 3.125e-06, "185000": 3.125e-06, "186000": 3.125e-06, "187000": 3.125e-06, "188000": 3.125e-06, "189000": 3.125e-06, "190000": 3.125e-06, "191000": 3.125e-06, "192000": 3.125e-06, "193000": 3.125e-06, "194000": 3.125e-06, "195000": 3.125e-06, "196000": 3.125e-06, "197000": 3.125e-06, "198000": 3.125e-06, "199000": 3.125e-06, "200000": 3.125e-06}}, "eval_history": {}} \ No newline at end of file +{"best": {"opt": {"cfg_path": "cfgs_ft_gt/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml", "id": "seq2-ft(mix)-gt_percent-1.0", "gpu_id": [], "disable_tqdm": false, "seed": 777, "random_seed": false, "disable_cudnn": 0, "debug": false, "device": "cuda", "map": true, "train_caption_file": ["data/howto/captiondata/howto100m_train.json", "data/yc2/captiondata/yc2_train.json"], "invalid_video_json": [], "val_caption_file": "data/yc2/captiondata/yc2_val.json", "visual_feature_folder": ["/mnt/data/Gvlab/wuhao/features/howto100m/UniVL/visual", "/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_visual/"], "text_feature_folder": ["/mnt/data/Gvlab/wuhao/features/howto100m/UniVL/text", "/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_text/"], "gt_file_for_auc": "data/anet/captiondata/val_all.json", "gt_file_for_eval": ["data/yc2/captiondata/yc2_val.json"], "gt_file_for_para_eval": ["data/yc2/captiondata/para/para_yc2_val.json"], "dict_file": "data/howto/vocabulary_howto_rate2_yc2.json", "criteria_for_best_ckpt": "overall", "visual_feature_type": ["UniVL"], "feature_dim": 768, "start_from": "", "start_from_mode": "last", "pretrain": null, "pretrain_path": "", "nthreads": 4, "data_norm": 0, "data_rescale": 1, "feature_sample_rate": 1, "train_proposal_sample_num": 30, "gt_proposal_sample_num": 20, "ft_gt_percent": 1.0, "pre_percent": 1.0, "vocab_size": 14538, "wordRNN_input_feats_type": "C", "caption_decoder_type": "standard", "rnn_size": 512, "num_layers": 1, "input_encoding_size": 512, "att_hid_size": 512, "drop_prob": 0.5, "max_caption_len": 50, "hidden_dim": 512, "num_queries": 100, "hidden_dropout_prob": 0.5, "layer_norm_eps": 1e-12, "caption_cost_type": "loss", "set_cost_caption": 0, "set_cost_class": 2, "set_cost_bbox": 0, "set_cost_giou": 4, "cost_alpha": 0.25, "cost_gamma": 2, "bbox_loss_coef": 0, "giou_loss_coef": 4, "count_loss_coef": 0.5, "caption_loss_coef": 2, "eos_coef": 0.1, "num_classes": 1, "dec_layers": 2, "enc_layers": 2, "transformer_ff_dim": 512, "transformer_dropout_prob": 0.1, "frame_embedding_num": 200, "sample_method": "nearest", "fix_xcw": 1, "use_anchor": 0, "random_anchor_init": true, "prior_anchor_duration_init": true, "matcher_type": "default", "pretrained_language_model": "UniVL", "text_hidden_dim": 768, "max_text_input_len": 32, "max_pos_num": 500, "huggingface_cache_dir": ".cache", "text_encoder_learning_strategy": "frozen", "use_pseudo_box": false, "pseudo_box_type": "similarity_op_order_v2", "top_frames": 25, "window_size": 3, "statistic_mode": "mode", "width_ratio": 1, "beta": 1, "width_th": 1, "iteration": 3, "pseudo_box_aug": false, "pseudo_box_aug_num": 8, "pseudo_box_aug_ratio": 0.02, "pseudo_box_aug_mode": "random_range", "refine_pseudo_box": false, "use_additional_score_layer": false, "use_additional_cap_layer": false, "merge_k_boxes": 3, "merge_criterion": "ins_cap_topk", "merge_mode": "weighted_sum", "refine_pseudo_stage_num": 2, "use_query_box_for_refine": 0, "norm_ins_score": "sigmoid", "cap_prob_clip": false, "use_neg_pseudo_box": false, "num_neg_box": 10, "weighted_mil_loss": false, "focal_mil": false, "disable_rematch": false, "start_refine_epoch": -1, "align_keep_percentile": 0.1, "align_top_band_size": 0, "align_drop_z": 0, "align_one_to_many": false, "align_many_to_one": false, "align_contiguous": false, "set_cost_sim": 1.0, "enable_contrastive": false, "disable_contrastive_projection": 1, "contrastive_hidden_size": 128, "contrastive_loss_start_coef": 0.0, "contrastive_loss_temperature": 0.1, "enable_cross_video_cl": true, "enable_e2t_cl": true, "enable_bg_for_cl": true, "set_cost_cl": 0.0, "cl_schedule_val": [0, 0.1], "cl_schedule_time": [0, 2], "prior_manner": "all", "training_scheme": "all", "epoch": 20, "batch_size": 1, "batch_size_for_eval": 1, "grad_clip": 100.0, "optimizer_type": "adam", "weight_decay": 0.0001, "lr": 5e-05, "learning_rate_decay_start": 8, "learning_rate_decay_every": 3, "learning_rate_decay_rate": 0.5, "min_epoch_when_save": -1, "save_checkpoint_every": 1, "save_all_checkpoint": 0, "save_dir": "/mnt/data/pjlab-3090-sport/wuhao/logs/dibs", "lr_backbone_names": ["None"], "lr_backbone": 2e-05, "lr_proj": 0, "lr_linear_proj_names": ["reference_points", "sampling_offsets"], "lr_linear_proj_mult": 0.1, "with_box_refine": 1, "transformer_input_type": "queries", "backbone": null, "dilation": false, "position_embedding": "sine", "position_embedding_scale": 6.283185307179586, "num_feature_levels": 4, "nheads": 8, "dec_n_points": 4, "enc_n_points": 4, "share_caption_head": 1, "cap_nheads": 1, "cap_dec_n_points": 4, "cap_num_feature_levels": 4, "disable_mid_caption_heads": false, "aux_loss": true, "cls_loss_coef": 2, "self_iou_loss_coef": 0.0, "ref_rank_loss_coef": 0.0, "mil_loss_coef": 0, "focal_alpha": 0.25, "focal_gamma": 2.0, "max_eseq_length": 20, "lloss_gau_mask": 1, "lloss_beta": 1, "scheduled_sampling_start": -1, "basic_ss_prob": 0, "scheduled_sampling_increase_every": 2, "scheduled_sampling_increase_prob": 0.05, "scheduled_sampling_max_prob": 0.25, "ec_alpha": 1.0, "test": false, "train_proposal_type": "gt", "lloss_cross_entropy": 0, "lloss_focal_loss": 0, "base_cfg_path": "cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml", "visual_feature_folder_val": ["/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_visual/"], "text_feature_folder_val": ["/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_text/"], "soft_attention": 1, "id_ori": "", "dict_file_val": "data/howto/vocabulary_howto_rate2_yc2.json", "vocab_size_val": 14538, "current_lr": 3.125e-06, "event_context_dim": null, "clip_context_dim": 512}, "iter": 15996, "epoch": 11, "best_val_score": 0.5868440997381064, "result_json_path": "/mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/prediction/num457_epoch11.json", "avg_proposal_num": -1, "Precision": 0.4513424333993264, "Recall": 0.30795469953703025}, "last": {"opt": {"cfg_path": "cfgs_ft_gt/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml", "id": "seq2-ft(mix)-gt_percent-1.0", "gpu_id": [], "disable_tqdm": false, "seed": 777, "random_seed": false, "disable_cudnn": 0, "debug": false, "device": "cuda", "map": true, "train_caption_file": ["data/howto/captiondata/howto100m_train.json", "data/yc2/captiondata/yc2_train.json"], "invalid_video_json": [], "val_caption_file": "data/yc2/captiondata/yc2_val.json", "visual_feature_folder": ["/mnt/data/Gvlab/wuhao/features/howto100m/UniVL/visual", "/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_visual/"], "text_feature_folder": ["/mnt/data/Gvlab/wuhao/features/howto100m/UniVL/text", "/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_text/"], "gt_file_for_auc": "data/anet/captiondata/val_all.json", "gt_file_for_eval": ["data/yc2/captiondata/yc2_val.json"], "gt_file_for_para_eval": ["data/yc2/captiondata/para/para_yc2_val.json"], "dict_file": "data/howto/vocabulary_howto_rate2_yc2.json", "criteria_for_best_ckpt": "overall", "visual_feature_type": ["UniVL"], "feature_dim": 768, "start_from": "", "start_from_mode": "last", "pretrain": null, "pretrain_path": "", "nthreads": 4, "data_norm": 0, "data_rescale": 1, "feature_sample_rate": 1, "train_proposal_sample_num": 30, "gt_proposal_sample_num": 20, "ft_gt_percent": 1.0, "pre_percent": 1.0, "vocab_size": 14538, "wordRNN_input_feats_type": "C", "caption_decoder_type": "standard", "rnn_size": 512, "num_layers": 1, "input_encoding_size": 512, "att_hid_size": 512, "drop_prob": 0.5, "max_caption_len": 50, "hidden_dim": 512, "num_queries": 100, "hidden_dropout_prob": 0.5, "layer_norm_eps": 1e-12, "caption_cost_type": "loss", "set_cost_caption": 0, "set_cost_class": 2, "set_cost_bbox": 0, "set_cost_giou": 4, "cost_alpha": 0.25, "cost_gamma": 2, "bbox_loss_coef": 0, "giou_loss_coef": 4, "count_loss_coef": 0.5, "caption_loss_coef": 2, "eos_coef": 0.1, "num_classes": 1, "dec_layers": 2, "enc_layers": 2, "transformer_ff_dim": 512, "transformer_dropout_prob": 0.1, "frame_embedding_num": 200, "sample_method": "nearest", "fix_xcw": 1, "use_anchor": 0, "random_anchor_init": true, "prior_anchor_duration_init": true, "matcher_type": "default", "pretrained_language_model": "UniVL", "text_hidden_dim": 768, "max_text_input_len": 32, "max_pos_num": 500, "huggingface_cache_dir": ".cache", "text_encoder_learning_strategy": "frozen", "use_pseudo_box": false, "pseudo_box_type": "similarity_op_order_v2", "top_frames": 25, "window_size": 3, "statistic_mode": "mode", "width_ratio": 1, "beta": 1, "width_th": 1, "iteration": 3, "pseudo_box_aug": false, "pseudo_box_aug_num": 8, "pseudo_box_aug_ratio": 0.02, "pseudo_box_aug_mode": "random_range", "refine_pseudo_box": false, "use_additional_score_layer": false, "use_additional_cap_layer": false, "merge_k_boxes": 3, "merge_criterion": "ins_cap_topk", "merge_mode": "weighted_sum", "refine_pseudo_stage_num": 2, "use_query_box_for_refine": 0, "norm_ins_score": "sigmoid", "cap_prob_clip": false, "use_neg_pseudo_box": false, "num_neg_box": 10, "weighted_mil_loss": false, "focal_mil": false, "disable_rematch": false, "start_refine_epoch": -1, "align_keep_percentile": 0.1, "align_top_band_size": 0, "align_drop_z": 0, "align_one_to_many": false, "align_many_to_one": false, "align_contiguous": false, "set_cost_sim": 1.0, "enable_contrastive": false, "disable_contrastive_projection": 1, "contrastive_hidden_size": 128, "contrastive_loss_start_coef": 0.0, "contrastive_loss_temperature": 0.1, "enable_cross_video_cl": true, "enable_e2t_cl": true, "enable_bg_for_cl": true, "set_cost_cl": 0.0, "cl_schedule_val": [0, 0.1], "cl_schedule_time": [0, 2], "prior_manner": "all", "training_scheme": "all", "epoch": 20, "batch_size": 1, "batch_size_for_eval": 1, "grad_clip": 100.0, "optimizer_type": "adam", "weight_decay": 0.0001, "lr": 5e-05, "learning_rate_decay_start": 8, "learning_rate_decay_every": 3, "learning_rate_decay_rate": 0.5, "min_epoch_when_save": -1, "save_checkpoint_every": 1, "save_all_checkpoint": 0, "save_dir": "/mnt/data/pjlab-3090-sport/wuhao/logs/dibs", "lr_backbone_names": ["None"], "lr_backbone": 2e-05, "lr_proj": 0, "lr_linear_proj_names": ["reference_points", "sampling_offsets"], "lr_linear_proj_mult": 0.1, "with_box_refine": 1, "transformer_input_type": "queries", "backbone": null, "dilation": false, "position_embedding": "sine", "position_embedding_scale": 6.283185307179586, "num_feature_levels": 4, "nheads": 8, "dec_n_points": 4, "enc_n_points": 4, "share_caption_head": 1, "cap_nheads": 1, "cap_dec_n_points": 4, "cap_num_feature_levels": 4, "disable_mid_caption_heads": false, "aux_loss": true, "cls_loss_coef": 2, "self_iou_loss_coef": 0.0, "ref_rank_loss_coef": 0.0, "mil_loss_coef": 0, "focal_alpha": 0.25, "focal_gamma": 2.0, "max_eseq_length": 20, "lloss_gau_mask": 1, "lloss_beta": 1, "scheduled_sampling_start": -1, "basic_ss_prob": 0, "scheduled_sampling_increase_every": 2, "scheduled_sampling_increase_prob": 0.05, "scheduled_sampling_max_prob": 0.25, "ec_alpha": 1.0, "test": false, "train_proposal_type": "gt", "lloss_cross_entropy": 0, "lloss_focal_loss": 0, "base_cfg_path": "cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml", "visual_feature_folder_val": ["/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_visual/"], "text_feature_folder_val": ["/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_text/"], "soft_attention": 1, "id_ori": "", "dict_file_val": "data/howto/vocabulary_howto_rate2_yc2.json", "vocab_size_val": 14538, "current_lr": 3.125e-06, "event_context_dim": null, "clip_context_dim": 512}, "iter": 26660, "epoch": 19, "best_val_score": 0.5868440997381064}, "history": {"val_result_history": {"0": {"eval_score": {"Bleu_1": 0.16894357888730638, "Bleu_2": 0.09902176620134434, "Bleu_3": 0.05312286436412136, "Bleu_4": 0.026212861867102137, "METEOR": 0.0791142699299577, "ROUGE_L": 0.15563765109454591, "CIDEr": 0.4087091055845523, "Recall": 0.1991554685892762, "Precision": 0.40083793546594454, "soda_c": 0.05642652494419026, "para_Bleu_1": 0.28013834967939705, "para_Bleu_2": 0.16393959632782257, "para_Bleu_3": 0.09809744775628881, "para_Bleu_4": 0.060378126412557326, "para_METEOR": 0.1286956339033507, "para_ROUGE_L": 0.29903071052996405, "para_CIDEr": 0.14675303603221324, "avg_proposal_number": -1}}, "1": {"eval_score": {"Bleu_1": 0.18247710374533507, "Bleu_2": 0.10433126216854799, "Bleu_3": 0.05471515540980739, "Bleu_4": 0.025315544998990337, "METEOR": 0.08392673175891194, "ROUGE_L": 0.16810710582244187, "CIDEr": 0.48711946137609907, "Recall": 0.23104975652842194, "Precision": 0.4442690424090867, "soda_c": 0.06454827356060923, "para_Bleu_1": 0.27953804293947354, "para_Bleu_2": 0.1635778619591909, "para_Bleu_3": 0.09761782578266559, "para_Bleu_4": 0.060085255296605154, "para_METEOR": 0.13134445752685775, "para_ROUGE_L": 0.3040652157082556, "para_CIDEr": 0.15701615141849948, "avg_proposal_number": -1}}, "2": {"eval_score": {"Bleu_1": 0.18812761655735627, "Bleu_2": 0.11394688266117041, "Bleu_3": 0.06350983100569632, "Bleu_4": 0.03295035253718016, "METEOR": 0.08673497362280043, "ROUGE_L": 0.17099683701262633, "CIDEr": 0.534654554166069, "Recall": 0.2545535313519452, "Precision": 0.4357073390990242, "soda_c": 0.06940030844072555, "para_Bleu_1": 0.31911536052560924, "para_Bleu_2": 0.19074275606485158, "para_Bleu_3": 0.11503629156908896, "para_Bleu_4": 0.07096292455051724, "para_METEOR": 0.14141970569772275, "para_ROUGE_L": 0.3133292457236414, "para_CIDEr": 0.18756071216976763, "avg_proposal_number": -1}}, "3": {"eval_score": {"Bleu_1": 0.19536023703614988, "Bleu_2": 0.11676341716851109, "Bleu_3": 0.06337153157323498, "Bleu_4": 0.031788948303475714, "METEOR": 0.09287502887069582, "ROUGE_L": 0.18168372139225142, "CIDEr": 0.5345089450528974, "Recall": 0.26186565000159123, "Precision": 0.4578470702650138, "soda_c": 0.06891495599002981, "para_Bleu_1": 0.3645537642333956, "para_Bleu_2": 0.21504928179111618, "para_Bleu_3": 0.1297486406737134, "para_Bleu_4": 0.08010111193897063, "para_METEOR": 0.1518569517959942, "para_ROUGE_L": 0.3241825281759821, "para_CIDEr": 0.22211083978975357, "avg_proposal_number": -1}}, "4": {"eval_score": {"Bleu_1": 0.19366491706119263, "Bleu_2": 0.1161802397372496, "Bleu_3": 0.06381908710297783, "Bleu_4": 0.0310996008751752, "METEOR": 0.0900086447067842, "ROUGE_L": 0.1772625018945245, "CIDEr": 0.5329339889166991, "Recall": 0.27822837264850414, "Precision": 0.4414053002674447, "soda_c": 0.0725148309247326, "para_Bleu_1": 0.36779729697992286, "para_Bleu_2": 0.2189609464261768, "para_Bleu_3": 0.13170237886801614, "para_Bleu_4": 0.08102932652379062, "para_METEOR": 0.15287168689015676, "para_ROUGE_L": 0.32609559286330886, "para_CIDEr": 0.24981796796266917, "avg_proposal_number": -1}}, "5": {"eval_score": {"Bleu_1": 0.19874944106127662, "Bleu_2": 0.12266046915797622, "Bleu_3": 0.07150852984916518, "Bleu_4": 0.036185181004552064, "METEOR": 0.09274687098087099, "ROUGE_L": 0.18413336093424784, "CIDEr": 0.5727051685734265, "Recall": 0.259037909270404, "Precision": 0.451289465457956, "soda_c": 0.07263494732248185, "para_Bleu_1": 0.32307562783294125, "para_Bleu_2": 0.1944214796418441, "para_Bleu_3": 0.11901149393254483, "para_Bleu_4": 0.07454555120453704, "para_METEOR": 0.14324209261218024, "para_ROUGE_L": 0.31918573126228, "para_CIDEr": 0.23096832321460165}}, "6": {"eval_score": {"Bleu_1": 0.2003309018825777, "Bleu_2": 0.1225756065112458, "Bleu_3": 0.06724461390362559, "Bleu_4": 0.033684328156599955, "METEOR": 0.0938288297360794, "ROUGE_L": 0.1832565856913202, "CIDEr": 0.5805494889367487, "Recall": 0.28578288505804933, "Precision": 0.4570872842207636, "soda_c": 0.07457933387713374, "para_Bleu_1": 0.3713316702717572, "para_Bleu_2": 0.22391267992808692, "para_Bleu_3": 0.1360620228892395, "para_Bleu_4": 0.08475146307949002, "para_METEOR": 0.15553928732702577, "para_ROUGE_L": 0.3279787647771023, "para_CIDEr": 0.24807495620487915, "avg_proposal_number": -1}}, "7": {"eval_score": {"Bleu_1": 0.19584871429233122, "Bleu_2": 0.1203954133477019, "Bleu_3": 0.06765236989260215, "Bleu_4": 0.03515047236439923, "METEOR": 0.09347581038898298, "ROUGE_L": 0.18336361365161372, "CIDEr": 0.5642570328531701, "Recall": 0.287053410514844, "Precision": 0.4506790316418327, "soda_c": 0.07315525040409161, "para_Bleu_1": 0.39595219023577966, "para_Bleu_2": 0.23717913606151478, "para_Bleu_3": 0.14480681642134902, "para_Bleu_4": 0.0901695364250172, "para_METEOR": 0.16127903027678414, "para_ROUGE_L": 0.3324403291093838, "para_CIDEr": 0.23804687234043756, "avg_proposal_number": -1}}, "8": {"eval_score": {"Bleu_1": 0.19696025394358163, "Bleu_2": 0.12042554867022627, "Bleu_3": 0.06805715701089529, "Bleu_4": 0.034063345644385214, "METEOR": 0.09208296372249718, "ROUGE_L": 0.1803782633150628, "CIDEr": 0.5812603125344058, "Recall": 0.29169024735901117, "Precision": 0.44299129936438486, "soda_c": 0.07606608300691252, "para_Bleu_1": 0.383549187276652, "para_Bleu_2": 0.23192713278728125, "para_Bleu_3": 0.14217181061136971, "para_Bleu_4": 0.0892715976218228, "para_METEOR": 0.16074434603101373, "para_ROUGE_L": 0.3336567463040183, "para_CIDEr": 0.2859809872200661, "avg_proposal_number": -1}}, "9": {"eval_score": {"Bleu_1": 0.20446290018298774, "Bleu_2": 0.12418412895577716, "Bleu_3": 0.06899010124646034, "Bleu_4": 0.03428116460131532, "METEOR": 0.09595521703655657, "ROUGE_L": 0.1876517650928566, "CIDEr": 0.5887832993219201, "Recall": 0.3017153873964599, "Precision": 0.4588439095550697, "soda_c": 0.07875391677883807, "para_Bleu_1": 0.3953706124668704, "para_Bleu_2": 0.24043007714841402, "para_Bleu_3": 0.14833197751929023, "para_Bleu_4": 0.09386644902900565, "para_METEOR": 0.16476396966168239, "para_ROUGE_L": 0.33760319454244797, "para_CIDEr": 0.31194480042956774, "avg_proposal_number": -1}}, "10": {"eval_score": {"Bleu_1": 0.19267153393038786, "Bleu_2": 0.11732781330402656, "Bleu_3": 0.06746115616325608, "Bleu_4": 0.03425583839334337, "METEOR": 0.08963300348041837, "ROUGE_L": 0.17480207136309905, "CIDEr": 0.575137603362526, "Recall": 0.30432682743951917, "Precision": 0.4353044354138446, "soda_c": 0.07762847290423684, "para_Bleu_1": 0.393384019586376, "para_Bleu_2": 0.23835405770332685, "para_Bleu_3": 0.14545808678454117, "para_Bleu_4": 0.09085202435904723, "para_METEOR": 0.16354570345255123, "para_ROUGE_L": 0.3343729651839732, "para_CIDEr": 0.27098453497923136}}, "11": {"eval_score": {"Bleu_1": 0.1989422607268001, "Bleu_2": 0.12223038556953512, "Bleu_3": 0.06835990671747892, "Bleu_4": 0.03486159828438583, "METEOR": 0.09408978838449876, "ROUGE_L": 0.18200142867223945, "CIDEr": 0.593480700759431, "Recall": 0.30795469953703025, "Precision": 0.4513424333993264, "soda_c": 0.0796861065455984, "para_Bleu_1": 0.39594509057043764, "para_Bleu_2": 0.24087109399513515, "para_Bleu_3": 0.14790262814870953, "para_Bleu_4": 0.09321042711819619, "para_METEOR": 0.1655617051143519, "para_ROUGE_L": 0.3391051008488012, "para_CIDEr": 0.32807196750555834, "avg_proposal_number": -1}}, "12": {"eval_score": {"Bleu_1": 0.19294534256446427, "Bleu_2": 0.11789730285267924, "Bleu_3": 0.06601509377472357, "Bleu_4": 0.03274421971508606, "METEOR": 0.0906445074413136, "ROUGE_L": 0.17678145420382357, "CIDEr": 0.5750907875125135, "Recall": 0.3073352674556176, "Precision": 0.4434536834427428, "soda_c": 0.07896521325127955, "para_Bleu_1": 0.39483511792471604, "para_Bleu_2": 0.23988438429479647, "para_Bleu_3": 0.1464330354033768, "para_Bleu_4": 0.09122283851671699, "para_METEOR": 0.16480200992253577, "para_ROUGE_L": 0.33317486176302236, "para_CIDEr": 0.29080350784714515}}, "13": {"eval_score": {"Bleu_1": 0.1916652028982354, "Bleu_2": 0.11864819375256218, "Bleu_3": 0.06801290454817709, "Bleu_4": 0.03421778123301331, "METEOR": 0.08890100804282676, "ROUGE_L": 0.17229926562968575, "CIDEr": 0.5719694906113042, "Recall": 0.3115151404333572, "Precision": 0.42734448265082836, "soda_c": 0.07979305036983636, "para_Bleu_1": 0.3972508455506424, "para_Bleu_2": 0.24317507500304622, "para_Bleu_3": 0.1497047997976745, "para_Bleu_4": 0.09437727320664267, "para_METEOR": 0.16651343432042678, "para_ROUGE_L": 0.33875534436877147, "para_CIDEr": 0.29220356232363026}}, "14": {"eval_score": {"Bleu_1": 0.19012877786294885, "Bleu_2": 0.11743680046097797, "Bleu_3": 0.06623934110461578, "Bleu_4": 0.03314975306654321, "METEOR": 0.08857227272587216, "ROUGE_L": 0.17208518718096077, "CIDEr": 0.5689998070546577, "Recall": 0.3090681299310951, "Precision": 0.43095498593310433, "soda_c": 0.08081534748318767, "para_Bleu_1": 0.3949292262433903, "para_Bleu_2": 0.24183495416706074, "para_Bleu_3": 0.1493168425692173, "para_Bleu_4": 0.0941904023418332, "para_METEOR": 0.16661877157717606, "para_ROUGE_L": 0.3391544295873436, "para_CIDEr": 0.3057631644012313}}, "15": {"eval_score": {"Bleu_1": 0.1927355202990476, "Bleu_2": 0.11755729236198051, "Bleu_3": 0.06532950485231373, "Bleu_4": 0.0318670348131602, "METEOR": 0.08966953019840175, "ROUGE_L": 0.17549405824640266, "CIDEr": 0.5708533801009449, "Recall": 0.31055728552993345, "Precision": 0.4412863394810881, "soda_c": 0.08079399116249976, "para_Bleu_1": 0.3847850395827542, "para_Bleu_2": 0.23591168028694995, "para_Bleu_3": 0.14500000021146267, "para_Bleu_4": 0.09097906463153684, "para_METEOR": 0.1633729521776342, "para_ROUGE_L": 0.33764324525807, "para_CIDEr": 0.3225522700715415}}, "16": {"eval_score": {"Bleu_1": 0.1905629005997804, "Bleu_2": 0.11689699082903934, "Bleu_3": 0.06544029555928756, "Bleu_4": 0.03330988693345351, "METEOR": 0.08938496175202132, "ROUGE_L": 0.17298359351524648, "CIDEr": 0.5732307929342625, "Recall": 0.309604513071417, "Precision": 0.43046524955715343, "soda_c": 0.08056479007503722, "para_Bleu_1": 0.3975304274857351, "para_Bleu_2": 0.24253918136446623, "para_Bleu_3": 0.14848895422464012, "para_Bleu_4": 0.09337330751749118, "para_METEOR": 0.16677196164785574, "para_ROUGE_L": 0.33750187221117683, "para_CIDEr": 0.31278894258081524}}, "17": {"eval_score": {"Bleu_1": 0.19099469488969467, "Bleu_2": 0.11646897839764006, "Bleu_3": 0.06451308365995856, "Bleu_4": 0.032200079484133, "METEOR": 0.08912416771202449, "ROUGE_L": 0.1730757893125124, "CIDEr": 0.5693051160396969, "Recall": 0.3097042977992106, "Precision": 0.43274547601681085, "soda_c": 0.08084297498321232, "para_Bleu_1": 0.3924031546442418, "para_Bleu_2": 0.23911474626028398, "para_Bleu_3": 0.14600811918196227, "para_Bleu_4": 0.09107950853175292, "para_METEOR": 0.16594454181978452, "para_ROUGE_L": 0.33729101832099057, "para_CIDEr": 0.30892642009784}}, "18": {"eval_score": {"Bleu_1": 0.19191750615066444, "Bleu_2": 0.11783589874301872, "Bleu_3": 0.06597231596326529, "Bleu_4": 0.03167603834812624, "METEOR": 0.08996609888818348, "ROUGE_L": 0.1746391859525846, "CIDEr": 0.5689023016363987, "Recall": 0.31503357525649683, "Precision": 0.4376628112951966, "soda_c": 0.08097707611185051, "para_Bleu_1": 0.3977375551078834, "para_Bleu_2": 0.24323062675170298, "para_Bleu_3": 0.1488548587270082, "para_Bleu_4": 0.09292110149283073, "para_METEOR": 0.16716298804356167, "para_ROUGE_L": 0.33781551083855066, "para_CIDEr": 0.31014493696748857}}, "19": {"eval_score": {"Bleu_1": 0.1908811984292725, "Bleu_2": 0.11664270449592412, "Bleu_3": 0.06546844271584715, "Bleu_4": 0.03266470081303028, "METEOR": 0.08981101020496235, "ROUGE_L": 0.17382953846907112, "CIDEr": 0.5716745559959934, "Recall": 0.31292035599338697, "Precision": 0.4345220728699943, "soda_c": 0.08127095018359767, "para_Bleu_1": 0.40170065588267356, "para_Bleu_2": 0.2447870245859959, "para_Bleu_3": 0.14990588787772124, "para_Bleu_4": 0.09419227635900729, "para_METEOR": 0.16780671784283924, "para_ROUGE_L": 0.33845945539662686, "para_CIDEr": 0.3198675630646056}}}, "loss_history": {"133": {"loss_ce": 0.336, "loss_counter": 0.129, "loss_bbox": 0.039, "loss_giou": 0.368, "loss_self_iou": 0.028, "cardinality_error": 7.797, "loss_ce_0": 0.337, "loss_counter_0": 0.13, "loss_bbox_0": 0.041, "loss_giou_0": 0.381, "loss_self_iou_0": 0.03, "cardinality_error_0": 7.797, "loss_caption_0": 2.755, "loss_caption": 2.681, "total_loss": 15.341}, "266": {"loss_ce": 0.324, "loss_counter": 0.129, "loss_bbox": 0.036, "loss_giou": 0.369, "loss_self_iou": 0.018, "cardinality_error": 7.812, "loss_ce_0": 0.341, "loss_counter_0": 0.132, "loss_bbox_0": 0.039, "loss_giou_0": 0.38, "loss_self_iou_0": 0.019, "cardinality_error_0": 7.812, "loss_caption_0": 2.803, "loss_caption": 2.638, "total_loss": 15.341}, "399": {"loss_ce": 0.312, "loss_counter": 0.13, "loss_bbox": 0.039, "loss_giou": 0.375, "loss_self_iou": 0.02, "cardinality_error": 7.835, "loss_ce_0": 0.324, "loss_counter_0": 0.132, "loss_bbox_0": 0.043, "loss_giou_0": 0.395, "loss_self_iou_0": 0.021, "cardinality_error_0": 7.835, "loss_caption_0": 2.81, "loss_caption": 2.676, "total_loss": 15.459}, "532": {"loss_ce": 0.307, "loss_counter": 0.133, "loss_bbox": 0.044, "loss_giou": 0.394, "loss_self_iou": 0.02, "cardinality_error": 7.902, "loss_ce_0": 0.319, "loss_counter_0": 0.133, "loss_bbox_0": 0.05, "loss_giou_0": 0.421, "loss_self_iou_0": 0.026, "cardinality_error_0": 7.902, "loss_caption_0": 2.817, "loss_caption": 2.654, "total_loss": 15.588}, "665": {"loss_ce": 0.312, "loss_counter": 0.135, "loss_bbox": 0.034, "loss_giou": 0.345, "loss_self_iou": 0.017, "cardinality_error": 7.805, "loss_ce_0": 0.319, "loss_counter_0": 0.131, "loss_bbox_0": 0.038, "loss_giou_0": 0.372, "loss_self_iou_0": 0.019, "cardinality_error_0": 7.805, "loss_caption_0": 2.758, "loss_caption": 2.635, "total_loss": 15.049}, "798": {"loss_ce": 0.321, "loss_counter": 0.125, "loss_bbox": 0.03, "loss_giou": 0.319, "loss_self_iou": 0.015, "cardinality_error": 7.774, "loss_ce_0": 0.331, "loss_counter_0": 0.124, "loss_bbox_0": 0.032, "loss_giou_0": 0.344, "loss_self_iou_0": 0.015, "cardinality_error_0": 7.774, "loss_caption_0": 2.66, "loss_caption": 2.559, "total_loss": 14.519}, "931": {"loss_ce": 0.327, "loss_counter": 0.122, "loss_bbox": 0.027, "loss_giou": 0.306, "loss_self_iou": 0.011, "cardinality_error": 7.865, "loss_ce_0": 0.346, "loss_counter_0": 0.123, "loss_bbox_0": 0.029, "loss_giou_0": 0.327, "loss_self_iou_0": 0.012, "cardinality_error_0": 7.865, "loss_caption_0": 2.54, "loss_caption": 2.468, "total_loss": 14.017}, "1064": {"loss_ce": 0.331, "loss_counter": 0.121, "loss_bbox": 0.027, "loss_giou": 0.292, "loss_self_iou": 0.01, "cardinality_error": 7.579, "loss_ce_0": 0.345, "loss_counter_0": 0.127, "loss_bbox_0": 0.028, "loss_giou_0": 0.311, "loss_self_iou_0": 0.009, "cardinality_error_0": 7.579, "loss_caption_0": 2.639, "loss_caption": 2.626, "total_loss": 14.419}, "1197": {"loss_ce": 0.325, "loss_counter": 0.118, "loss_bbox": 0.026, "loss_giou": 0.296, "loss_self_iou": 0.011, "cardinality_error": 7.241, "loss_ce_0": 0.339, "loss_counter_0": 0.121, "loss_bbox_0": 0.028, "loss_giou_0": 0.317, "loss_self_iou_0": 0.011, "cardinality_error_0": 7.241, "loss_caption_0": 2.501, "loss_caption": 2.496, "total_loss": 13.892}, "1330": {"loss_ce": 0.327, "loss_counter": 0.126, "loss_bbox": 0.026, "loss_giou": 0.304, "loss_self_iou": 0.011, "cardinality_error": 7.94, "loss_ce_0": 0.334, "loss_counter_0": 0.127, "loss_bbox_0": 0.029, "loss_giou_0": 0.332, "loss_self_iou_0": 0.011, "cardinality_error_0": 7.94, "loss_caption_0": 2.635, "loss_caption": 2.619, "total_loss": 14.504}, "1463": {"loss_ce": 0.322, "loss_counter": 0.128, "loss_bbox": 0.026, "loss_giou": 0.301, "loss_self_iou": 0.011, "cardinality_error": 7.699, "loss_ce_0": 0.335, "loss_counter_0": 0.129, "loss_bbox_0": 0.026, "loss_giou_0": 0.316, "loss_self_iou_0": 0.011, "cardinality_error_0": 7.699, "loss_caption_0": 2.448, "loss_caption": 2.462, "total_loss": 13.729}, "1596": {"loss_ce": 0.311, "loss_counter": 0.126, "loss_bbox": 0.022, "loss_giou": 0.284, "loss_self_iou": 0.01, "cardinality_error": 8.233, "loss_ce_0": 0.322, "loss_counter_0": 0.123, "loss_bbox_0": 0.024, "loss_giou_0": 0.31, "loss_self_iou_0": 0.01, "cardinality_error_0": 8.233, "loss_caption_0": 2.348, "loss_caption": 2.348, "total_loss": 13.16}, "1729": {"loss_ce": 0.311, "loss_counter": 0.124, "loss_bbox": 0.023, "loss_giou": 0.273, "loss_self_iou": 0.01, "cardinality_error": 7.632, "loss_ce_0": 0.32, "loss_counter_0": 0.124, "loss_bbox_0": 0.026, "loss_giou_0": 0.307, "loss_self_iou_0": 0.012, "cardinality_error_0": 7.632, "loss_caption_0": 2.363, "loss_caption": 2.353, "total_loss": 13.14}, "1862": {"loss_ce": 0.316, "loss_counter": 0.12, "loss_bbox": 0.023, "loss_giou": 0.268, "loss_self_iou": 0.01, "cardinality_error": 7.609, "loss_ce_0": 0.32, "loss_counter_0": 0.119, "loss_bbox_0": 0.025, "loss_giou_0": 0.29, "loss_self_iou_0": 0.01, "cardinality_error_0": 7.609, "loss_caption_0": 2.439, "loss_caption": 2.419, "total_loss": 13.343}, "1995": {"loss_ce": 0.314, "loss_counter": 0.122, "loss_bbox": 0.022, "loss_giou": 0.281, "loss_self_iou": 0.009, "cardinality_error": 7.541, "loss_ce_0": 0.322, "loss_counter_0": 0.122, "loss_bbox_0": 0.025, "loss_giou_0": 0.309, "loss_self_iou_0": 0.011, "cardinality_error_0": 7.541, "loss_caption_0": 2.503, "loss_caption": 2.503, "total_loss": 13.766}, "2128": {"loss_ce": 0.316, "loss_counter": 0.126, "loss_bbox": 0.024, "loss_giou": 0.284, "loss_self_iou": 0.009, "cardinality_error": 7.789, "loss_ce_0": 0.324, "loss_counter_0": 0.125, "loss_bbox_0": 0.026, "loss_giou_0": 0.301, "loss_self_iou_0": 0.009, "cardinality_error_0": 7.789, "loss_caption_0": 2.5, "loss_caption": 2.493, "total_loss": 13.73}, "2261": {"loss_ce": 0.31, "loss_counter": 0.122, "loss_bbox": 0.023, "loss_giou": 0.285, "loss_self_iou": 0.012, "cardinality_error": 7.902, "loss_ce_0": 0.316, "loss_counter_0": 0.12, "loss_bbox_0": 0.025, "loss_giou_0": 0.304, "loss_self_iou_0": 0.011, "cardinality_error_0": 7.902, "loss_caption_0": 2.425, "loss_caption": 2.424, "total_loss": 13.426}, "2394": {"loss_ce": 0.315, "loss_counter": 0.126, "loss_bbox": 0.025, "loss_giou": 0.29, "loss_self_iou": 0.011, "cardinality_error": 7.534, "loss_ce_0": 0.323, "loss_counter_0": 0.125, "loss_bbox_0": 0.026, "loss_giou_0": 0.308, "loss_self_iou_0": 0.01, "cardinality_error_0": 7.534, "loss_caption_0": 2.439, "loss_caption": 2.435, "total_loss": 13.54}, "2527": {"loss_ce": 0.313, "loss_counter": 0.125, "loss_bbox": 0.023, "loss_giou": 0.276, "loss_self_iou": 0.009, "cardinality_error": 7.647, "loss_ce_0": 0.319, "loss_counter_0": 0.123, "loss_bbox_0": 0.025, "loss_giou_0": 0.296, "loss_self_iou_0": 0.01, "cardinality_error_0": 7.647, "loss_caption_0": 2.454, "loss_caption": 2.455, "total_loss": 13.492}, "2660": {"loss_ce": 0.313, "loss_counter": 0.131, "loss_bbox": 0.023, "loss_giou": 0.273, "loss_self_iou": 0.01, "cardinality_error": 8.0, "loss_ce_0": 0.317, "loss_counter_0": 0.128, "loss_bbox_0": 0.026, "loss_giou_0": 0.294, "loss_self_iou_0": 0.01, "cardinality_error_0": 8.0, "loss_caption_0": 2.464, "loss_caption": 2.451, "total_loss": 13.487}, "2793": {"loss_ce": 0.309, "loss_counter": 0.119, "loss_bbox": 0.021, "loss_giou": 0.26, "loss_self_iou": 0.01, "cardinality_error": 7.556, "loss_ce_0": 0.312, "loss_counter_0": 0.118, "loss_bbox_0": 0.024, "loss_giou_0": 0.285, "loss_self_iou_0": 0.011, "cardinality_error_0": 7.556, "loss_caption_0": 2.27, "loss_caption": 2.276, "total_loss": 12.632}, "2926": {"loss_ce": 0.313, "loss_counter": 0.121, "loss_bbox": 0.023, "loss_giou": 0.266, "loss_self_iou": 0.008, "cardinality_error": 7.444, "loss_ce_0": 0.317, "loss_counter_0": 0.118, "loss_bbox_0": 0.025, "loss_giou_0": 0.287, "loss_self_iou_0": 0.01, "cardinality_error_0": 7.444, "loss_caption_0": 2.276, "loss_caption": 2.291, "total_loss": 12.726}, "3059": {"loss_ce": 0.298, "loss_counter": 0.127, "loss_bbox": 0.02, "loss_giou": 0.272, "loss_self_iou": 0.008, "cardinality_error": 8.135, "loss_ce_0": 0.302, "loss_counter_0": 0.125, "loss_bbox_0": 0.023, "loss_giou_0": 0.296, "loss_self_iou_0": 0.009, "cardinality_error_0": 8.135, "loss_caption_0": 2.364, "loss_caption": 2.364, "total_loss": 13.057}, "3192": {"loss_ce": 0.301, "loss_counter": 0.122, "loss_bbox": 0.022, "loss_giou": 0.266, "loss_self_iou": 0.008, "cardinality_error": 7.699, "loss_ce_0": 0.306, "loss_counter_0": 0.121, "loss_bbox_0": 0.023, "loss_giou_0": 0.286, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.699, "loss_caption_0": 2.367, "loss_caption": 2.381, "total_loss": 13.038}, "3325": {"loss_ce": 0.3, "loss_counter": 0.123, "loss_bbox": 0.021, "loss_giou": 0.274, "loss_self_iou": 0.009, "cardinality_error": 7.932, "loss_ce_0": 0.3, "loss_counter_0": 0.121, "loss_bbox_0": 0.023, "loss_giou_0": 0.291, "loss_self_iou_0": 0.009, "cardinality_error_0": 7.932, "loss_caption_0": 2.323, "loss_caption": 2.33, "total_loss": 12.887}, "3458": {"loss_ce": 0.31, "loss_counter": 0.124, "loss_bbox": 0.021, "loss_giou": 0.277, "loss_self_iou": 0.01, "cardinality_error": 7.865, "loss_ce_0": 0.31, "loss_counter_0": 0.123, "loss_bbox_0": 0.023, "loss_giou_0": 0.295, "loss_self_iou_0": 0.011, "cardinality_error_0": 7.865, "loss_caption_0": 2.351, "loss_caption": 2.341, "total_loss": 13.038}, "3591": {"loss_ce": 0.306, "loss_counter": 0.114, "loss_bbox": 0.022, "loss_giou": 0.263, "loss_self_iou": 0.009, "cardinality_error": 7.586, "loss_ce_0": 0.308, "loss_counter_0": 0.114, "loss_bbox_0": 0.024, "loss_giou_0": 0.285, "loss_self_iou_0": 0.009, "cardinality_error_0": 7.586, "loss_caption_0": 2.222, "loss_caption": 2.223, "total_loss": 12.425}, "3724": {"loss_ce": 0.305, "loss_counter": 0.123, "loss_bbox": 0.023, "loss_giou": 0.265, "loss_self_iou": 0.009, "cardinality_error": 7.624, "loss_ce_0": 0.307, "loss_counter_0": 0.121, "loss_bbox_0": 0.024, "loss_giou_0": 0.279, "loss_self_iou_0": 0.009, "cardinality_error_0": 7.624, "loss_caption_0": 2.38, "loss_caption": 2.368, "total_loss": 13.014}, "3857": {"loss_ce": 0.306, "loss_counter": 0.115, "loss_bbox": 0.021, "loss_giou": 0.264, "loss_self_iou": 0.009, "cardinality_error": 7.489, "loss_ce_0": 0.312, "loss_counter_0": 0.114, "loss_bbox_0": 0.023, "loss_giou_0": 0.279, "loss_self_iou_0": 0.009, "cardinality_error_0": 7.489, "loss_caption_0": 2.343, "loss_caption": 2.344, "total_loss": 12.897}, "3990": {"loss_ce": 0.299, "loss_counter": 0.134, "loss_bbox": 0.02, "loss_giou": 0.268, "loss_self_iou": 0.012, "cardinality_error": 8.301, "loss_ce_0": 0.299, "loss_counter_0": 0.131, "loss_bbox_0": 0.022, "loss_giou_0": 0.289, "loss_self_iou_0": 0.013, "cardinality_error_0": 8.301, "loss_caption_0": 2.327, "loss_caption": 2.346, "total_loss": 12.9}, "4123": {"loss_ce": 0.305, "loss_counter": 0.129, "loss_bbox": 0.021, "loss_giou": 0.256, "loss_self_iou": 0.008, "cardinality_error": 7.925, "loss_ce_0": 0.307, "loss_counter_0": 0.126, "loss_bbox_0": 0.023, "loss_giou_0": 0.275, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.925, "loss_caption_0": 2.272, "loss_caption": 2.28, "total_loss": 12.579}, "4256": {"loss_ce": 0.308, "loss_counter": 0.121, "loss_bbox": 0.02, "loss_giou": 0.256, "loss_self_iou": 0.008, "cardinality_error": 7.632, "loss_ce_0": 0.31, "loss_counter_0": 0.12, "loss_bbox_0": 0.022, "loss_giou_0": 0.276, "loss_self_iou_0": 0.009, "cardinality_error_0": 7.632, "loss_caption_0": 2.247, "loss_caption": 2.252, "total_loss": 12.484}, "4389": {"loss_ce": 0.305, "loss_counter": 0.12, "loss_bbox": 0.021, "loss_giou": 0.26, "loss_self_iou": 0.011, "cardinality_error": 7.526, "loss_ce_0": 0.309, "loss_counter_0": 0.119, "loss_bbox_0": 0.022, "loss_giou_0": 0.272, "loss_self_iou_0": 0.01, "cardinality_error_0": 7.526, "loss_caption_0": 2.194, "loss_caption": 2.205, "total_loss": 12.273}, "4522": {"loss_ce": 0.305, "loss_counter": 0.115, "loss_bbox": 0.019, "loss_giou": 0.248, "loss_self_iou": 0.007, "cardinality_error": 7.519, "loss_ce_0": 0.303, "loss_counter_0": 0.113, "loss_bbox_0": 0.021, "loss_giou_0": 0.262, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.519, "loss_caption_0": 2.335, "loss_caption": 2.326, "total_loss": 12.689}, "4655": {"loss_ce": 0.297, "loss_counter": 0.122, "loss_bbox": 0.02, "loss_giou": 0.263, "loss_self_iou": 0.008, "cardinality_error": 7.97, "loss_ce_0": 0.298, "loss_counter_0": 0.121, "loss_bbox_0": 0.022, "loss_giou_0": 0.285, "loss_self_iou_0": 0.009, "cardinality_error_0": 7.97, "loss_caption_0": 2.254, "loss_caption": 2.267, "total_loss": 12.545}, "4788": {"loss_ce": 0.308, "loss_counter": 0.118, "loss_bbox": 0.021, "loss_giou": 0.253, "loss_self_iou": 0.008, "cardinality_error": 7.481, "loss_ce_0": 0.308, "loss_counter_0": 0.118, "loss_bbox_0": 0.022, "loss_giou_0": 0.268, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.481, "loss_caption_0": 2.208, "loss_caption": 2.195, "total_loss": 12.24}, "4921": {"loss_ce": 0.306, "loss_counter": 0.12, "loss_bbox": 0.019, "loss_giou": 0.262, "loss_self_iou": 0.01, "cardinality_error": 7.842, "loss_ce_0": 0.305, "loss_counter_0": 0.119, "loss_bbox_0": 0.021, "loss_giou_0": 0.284, "loss_self_iou_0": 0.011, "cardinality_error_0": 7.842, "loss_caption_0": 2.186, "loss_caption": 2.196, "total_loss": 12.289}, "5054": {"loss_ce": 0.303, "loss_counter": 0.121, "loss_bbox": 0.022, "loss_giou": 0.26, "loss_self_iou": 0.009, "cardinality_error": 7.887, "loss_ce_0": 0.305, "loss_counter_0": 0.12, "loss_bbox_0": 0.023, "loss_giou_0": 0.271, "loss_self_iou_0": 0.009, "cardinality_error_0": 7.887, "loss_caption_0": 2.242, "loss_caption": 2.239, "total_loss": 12.422}, "5187": {"loss_ce": 0.303, "loss_counter": 0.124, "loss_bbox": 0.021, "loss_giou": 0.262, "loss_self_iou": 0.009, "cardinality_error": 7.932, "loss_ce_0": 0.305, "loss_counter_0": 0.123, "loss_bbox_0": 0.022, "loss_giou_0": 0.277, "loss_self_iou_0": 0.009, "cardinality_error_0": 7.932, "loss_caption_0": 2.25, "loss_caption": 2.246, "total_loss": 12.483}, "5320": {"loss_ce": 0.299, "loss_counter": 0.12, "loss_bbox": 0.022, "loss_giou": 0.26, "loss_self_iou": 0.006, "cardinality_error": 7.729, "loss_ce_0": 0.298, "loss_counter_0": 0.12, "loss_bbox_0": 0.024, "loss_giou_0": 0.279, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.729, "loss_caption_0": 2.287, "loss_caption": 2.298, "total_loss": 12.64}, "5453": {"loss_ce": 0.301, "loss_counter": 0.113, "loss_bbox": 0.022, "loss_giou": 0.25, "loss_self_iou": 0.011, "cardinality_error": 7.519, "loss_ce_0": 0.298, "loss_counter_0": 0.113, "loss_bbox_0": 0.023, "loss_giou_0": 0.269, "loss_self_iou_0": 0.011, "cardinality_error_0": 7.519, "loss_caption_0": 2.175, "loss_caption": 2.176, "total_loss": 12.088}, "5586": {"loss_ce": 0.294, "loss_counter": 0.12, "loss_bbox": 0.018, "loss_giou": 0.252, "loss_self_iou": 0.007, "cardinality_error": 7.662, "loss_ce_0": 0.292, "loss_counter_0": 0.119, "loss_bbox_0": 0.02, "loss_giou_0": 0.274, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.662, "loss_caption_0": 2.16, "loss_caption": 2.132, "total_loss": 11.979}, "5719": {"loss_ce": 0.305, "loss_counter": 0.13, "loss_bbox": 0.02, "loss_giou": 0.255, "loss_self_iou": 0.008, "cardinality_error": 8.451, "loss_ce_0": 0.302, "loss_counter_0": 0.127, "loss_bbox_0": 0.021, "loss_giou_0": 0.273, "loss_self_iou_0": 0.008, "cardinality_error_0": 8.451, "loss_caption_0": 2.166, "loss_caption": 2.164, "total_loss": 12.113}, "5852": {"loss_ce": 0.301, "loss_counter": 0.12, "loss_bbox": 0.019, "loss_giou": 0.246, "loss_self_iou": 0.007, "cardinality_error": 7.835, "loss_ce_0": 0.302, "loss_counter_0": 0.12, "loss_bbox_0": 0.02, "loss_giou_0": 0.267, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.835, "loss_caption_0": 2.122, "loss_caption": 2.111, "total_loss": 11.841}, "5985": {"loss_ce": 0.304, "loss_counter": 0.122, "loss_bbox": 0.02, "loss_giou": 0.243, "loss_self_iou": 0.009, "cardinality_error": 7.474, "loss_ce_0": 0.298, "loss_counter_0": 0.12, "loss_bbox_0": 0.022, "loss_giou_0": 0.263, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.474, "loss_caption_0": 2.149, "loss_caption": 2.14, "total_loss": 11.926}, "6118": {"loss_ce": 0.3, "loss_counter": 0.113, "loss_bbox": 0.018, "loss_giou": 0.241, "loss_self_iou": 0.008, "cardinality_error": 7.639, "loss_ce_0": 0.302, "loss_counter_0": 0.112, "loss_bbox_0": 0.019, "loss_giou_0": 0.259, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.639, "loss_caption_0": 2.235, "loss_caption": 2.215, "total_loss": 12.218}, "6251": {"loss_ce": 0.301, "loss_counter": 0.125, "loss_bbox": 0.02, "loss_giou": 0.251, "loss_self_iou": 0.007, "cardinality_error": 7.857, "loss_ce_0": 0.301, "loss_counter_0": 0.125, "loss_bbox_0": 0.022, "loss_giou_0": 0.268, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.857, "loss_caption_0": 2.235, "loss_caption": 2.226, "total_loss": 12.328}, "6384": {"loss_ce": 0.302, "loss_counter": 0.124, "loss_bbox": 0.02, "loss_giou": 0.246, "loss_self_iou": 0.006, "cardinality_error": 7.82, "loss_ce_0": 0.301, "loss_counter_0": 0.124, "loss_bbox_0": 0.021, "loss_giou_0": 0.265, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.82, "loss_caption_0": 2.208, "loss_caption": 2.183, "total_loss": 12.157}, "6517": {"loss_ce": 0.297, "loss_counter": 0.12, "loss_bbox": 0.02, "loss_giou": 0.256, "loss_self_iou": 0.008, "cardinality_error": 7.872, "loss_ce_0": 0.295, "loss_counter_0": 0.118, "loss_bbox_0": 0.022, "loss_giou_0": 0.271, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.872, "loss_caption_0": 2.135, "loss_caption": 2.155, "total_loss": 11.99}, "6650": {"loss_ce": 0.297, "loss_counter": 0.112, "loss_bbox": 0.021, "loss_giou": 0.244, "loss_self_iou": 0.008, "cardinality_error": 7.398, "loss_ce_0": 0.297, "loss_counter_0": 0.112, "loss_bbox_0": 0.023, "loss_giou_0": 0.26, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.398, "loss_caption_0": 2.205, "loss_caption": 2.202, "total_loss": 12.127}, "6783": {"loss_ce": 0.29, "loss_counter": 0.117, "loss_bbox": 0.019, "loss_giou": 0.24, "loss_self_iou": 0.007, "cardinality_error": 7.586, "loss_ce_0": 0.29, "loss_counter_0": 0.116, "loss_bbox_0": 0.02, "loss_giou_0": 0.257, "loss_self_iou_0": 0.009, "cardinality_error_0": 7.586, "loss_caption_0": 2.02, "loss_caption": 2.014, "total_loss": 11.332}, "6916": {"loss_ce": 0.301, "loss_counter": 0.118, "loss_bbox": 0.021, "loss_giou": 0.249, "loss_self_iou": 0.008, "cardinality_error": 7.519, "loss_ce_0": 0.302, "loss_counter_0": 0.116, "loss_bbox_0": 0.023, "loss_giou_0": 0.264, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.519, "loss_caption_0": 2.118, "loss_caption": 2.101, "total_loss": 11.817}, "7049": {"loss_ce": 0.294, "loss_counter": 0.119, "loss_bbox": 0.019, "loss_giou": 0.25, "loss_self_iou": 0.007, "cardinality_error": 7.699, "loss_ce_0": 0.292, "loss_counter_0": 0.118, "loss_bbox_0": 0.02, "loss_giou_0": 0.265, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.699, "loss_caption_0": 2.105, "loss_caption": 2.111, "total_loss": 11.78}, "7182": {"loss_ce": 0.29, "loss_counter": 0.115, "loss_bbox": 0.021, "loss_giou": 0.242, "loss_self_iou": 0.008, "cardinality_error": 7.594, "loss_ce_0": 0.288, "loss_counter_0": 0.115, "loss_bbox_0": 0.022, "loss_giou_0": 0.257, "loss_self_iou_0": 0.009, "cardinality_error_0": 7.594, "loss_caption_0": 2.194, "loss_caption": 2.195, "total_loss": 12.045}, "7315": {"loss_ce": 0.29, "loss_counter": 0.123, "loss_bbox": 0.02, "loss_giou": 0.254, "loss_self_iou": 0.009, "cardinality_error": 8.301, "loss_ce_0": 0.291, "loss_counter_0": 0.123, "loss_bbox_0": 0.02, "loss_giou_0": 0.268, "loss_self_iou_0": 0.009, "cardinality_error_0": 8.301, "loss_caption_0": 2.096, "loss_caption": 2.09, "total_loss": 11.741}, "7448": {"loss_ce": 0.296, "loss_counter": 0.12, "loss_bbox": 0.019, "loss_giou": 0.234, "loss_self_iou": 0.006, "cardinality_error": 7.677, "loss_ce_0": 0.292, "loss_counter_0": 0.12, "loss_bbox_0": 0.02, "loss_giou_0": 0.251, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.677, "loss_caption_0": 2.076, "loss_caption": 2.063, "total_loss": 11.513}, "7581": {"loss_ce": 0.298, "loss_counter": 0.116, "loss_bbox": 0.019, "loss_giou": 0.238, "loss_self_iou": 0.008, "cardinality_error": 7.534, "loss_ce_0": 0.295, "loss_counter_0": 0.116, "loss_bbox_0": 0.02, "loss_giou_0": 0.253, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.534, "loss_caption_0": 2.114, "loss_caption": 2.112, "total_loss": 11.718}, "7714": {"loss_ce": 0.295, "loss_counter": 0.117, "loss_bbox": 0.018, "loss_giou": 0.235, "loss_self_iou": 0.008, "cardinality_error": 7.677, "loss_ce_0": 0.291, "loss_counter_0": 0.116, "loss_bbox_0": 0.02, "loss_giou_0": 0.253, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.677, "loss_caption_0": 2.167, "loss_caption": 2.179, "total_loss": 11.932}, "7847": {"loss_ce": 0.293, "loss_counter": 0.118, "loss_bbox": 0.019, "loss_giou": 0.252, "loss_self_iou": 0.009, "cardinality_error": 8.053, "loss_ce_0": 0.289, "loss_counter_0": 0.117, "loss_bbox_0": 0.021, "loss_giou_0": 0.269, "loss_self_iou_0": 0.009, "cardinality_error_0": 8.053, "loss_caption_0": 2.106, "loss_caption": 2.115, "total_loss": 11.804}, "7980": {"loss_ce": 0.3, "loss_counter": 0.118, "loss_bbox": 0.019, "loss_giou": 0.249, "loss_self_iou": 0.007, "cardinality_error": 7.902, "loss_ce_0": 0.295, "loss_counter_0": 0.117, "loss_bbox_0": 0.021, "loss_giou_0": 0.268, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.902, "loss_caption_0": 2.151, "loss_caption": 2.153, "total_loss": 11.979}, "8113": {"loss_ce": 0.297, "loss_counter": 0.114, "loss_bbox": 0.019, "loss_giou": 0.236, "loss_self_iou": 0.008, "cardinality_error": 7.617, "loss_ce_0": 0.295, "loss_counter_0": 0.112, "loss_bbox_0": 0.021, "loss_giou_0": 0.257, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.617, "loss_caption_0": 2.036, "loss_caption": 2.044, "total_loss": 11.427}, "8246": {"loss_ce": 0.286, "loss_counter": 0.119, "loss_bbox": 0.019, "loss_giou": 0.237, "loss_self_iou": 0.006, "cardinality_error": 7.827, "loss_ce_0": 0.283, "loss_counter_0": 0.119, "loss_bbox_0": 0.02, "loss_giou_0": 0.257, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.827, "loss_caption_0": 2.055, "loss_caption": 2.057, "total_loss": 11.458}, "8379": {"loss_ce": 0.29, "loss_counter": 0.118, "loss_bbox": 0.018, "loss_giou": 0.225, "loss_self_iou": 0.005, "cardinality_error": 7.82, "loss_ce_0": 0.286, "loss_counter_0": 0.117, "loss_bbox_0": 0.019, "loss_giou_0": 0.246, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.82, "loss_caption_0": 2.046, "loss_caption": 2.041, "total_loss": 11.331}, "8512": {"loss_ce": 0.286, "loss_counter": 0.114, "loss_bbox": 0.018, "loss_giou": 0.228, "loss_self_iou": 0.006, "cardinality_error": 7.654, "loss_ce_0": 0.283, "loss_counter_0": 0.114, "loss_bbox_0": 0.019, "loss_giou_0": 0.245, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.654, "loss_caption_0": 1.991, "loss_caption": 1.997, "total_loss": 11.118}, "8645": {"loss_ce": 0.29, "loss_counter": 0.115, "loss_bbox": 0.02, "loss_giou": 0.251, "loss_self_iou": 0.007, "cardinality_error": 8.068, "loss_ce_0": 0.287, "loss_counter_0": 0.115, "loss_bbox_0": 0.022, "loss_giou_0": 0.265, "loss_self_iou_0": 0.008, "cardinality_error_0": 8.068, "loss_caption_0": 2.094, "loss_caption": 2.097, "total_loss": 11.714}, "8778": {"loss_ce": 0.288, "loss_counter": 0.121, "loss_bbox": 0.019, "loss_giou": 0.24, "loss_self_iou": 0.008, "cardinality_error": 8.008, "loss_ce_0": 0.286, "loss_counter_0": 0.121, "loss_bbox_0": 0.02, "loss_giou_0": 0.258, "loss_self_iou_0": 0.008, "cardinality_error_0": 8.008, "loss_caption_0": 2.092, "loss_caption": 2.092, "total_loss": 11.63}, "8911": {"loss_ce": 0.298, "loss_counter": 0.114, "loss_bbox": 0.019, "loss_giou": 0.235, "loss_self_iou": 0.008, "cardinality_error": 7.338, "loss_ce_0": 0.297, "loss_counter_0": 0.114, "loss_bbox_0": 0.02, "loss_giou_0": 0.248, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.338, "loss_caption_0": 2.051, "loss_caption": 2.054, "total_loss": 11.446}, "9044": {"loss_ce": 0.292, "loss_counter": 0.105, "loss_bbox": 0.02, "loss_giou": 0.227, "loss_self_iou": 0.008, "cardinality_error": 7.226, "loss_ce_0": 0.292, "loss_counter_0": 0.105, "loss_bbox_0": 0.021, "loss_giou_0": 0.243, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.226, "loss_caption_0": 2.08, "loss_caption": 2.084, "total_loss": 11.478}, "9177": {"loss_ce": 0.291, "loss_counter": 0.12, "loss_bbox": 0.019, "loss_giou": 0.254, "loss_self_iou": 0.007, "cardinality_error": 7.977, "loss_ce_0": 0.288, "loss_counter_0": 0.119, "loss_bbox_0": 0.02, "loss_giou_0": 0.275, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.977, "loss_caption_0": 2.046, "loss_caption": 2.031, "total_loss": 11.546}, "9310": {"loss_ce": 0.28, "loss_counter": 0.117, "loss_bbox": 0.018, "loss_giou": 0.236, "loss_self_iou": 0.006, "cardinality_error": 7.97, "loss_ce_0": 0.281, "loss_counter_0": 0.118, "loss_bbox_0": 0.019, "loss_giou_0": 0.252, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.97, "loss_caption_0": 1.986, "loss_caption": 1.995, "total_loss": 11.157}, "9443": {"loss_ce": 0.294, "loss_counter": 0.114, "loss_bbox": 0.018, "loss_giou": 0.226, "loss_self_iou": 0.006, "cardinality_error": 7.617, "loss_ce_0": 0.292, "loss_counter_0": 0.114, "loss_bbox_0": 0.019, "loss_giou_0": 0.239, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.617, "loss_caption_0": 2.065, "loss_caption": 2.061, "total_loss": 11.394}, "9576": {"loss_ce": 0.285, "loss_counter": 0.119, "loss_bbox": 0.02, "loss_giou": 0.231, "loss_self_iou": 0.006, "cardinality_error": 7.917, "loss_ce_0": 0.284, "loss_counter_0": 0.119, "loss_bbox_0": 0.021, "loss_giou_0": 0.252, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.917, "loss_caption_0": 1.977, "loss_caption": 1.974, "total_loss": 11.093}, "9709": {"loss_ce": 0.291, "loss_counter": 0.117, "loss_bbox": 0.016, "loss_giou": 0.224, "loss_self_iou": 0.006, "cardinality_error": 8.098, "loss_ce_0": 0.29, "loss_counter_0": 0.117, "loss_bbox_0": 0.018, "loss_giou_0": 0.242, "loss_self_iou_0": 0.006, "cardinality_error_0": 8.098, "loss_caption_0": 2.051, "loss_caption": 2.063, "total_loss": 11.373}, "9842": {"loss_ce": 0.288, "loss_counter": 0.11, "loss_bbox": 0.018, "loss_giou": 0.242, "loss_self_iou": 0.007, "cardinality_error": 7.662, "loss_ce_0": 0.286, "loss_counter_0": 0.11, "loss_bbox_0": 0.02, "loss_giou_0": 0.262, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.662, "loss_caption_0": 1.939, "loss_caption": 1.953, "total_loss": 11.058}, "9975": {"loss_ce": 0.28, "loss_counter": 0.116, "loss_bbox": 0.017, "loss_giou": 0.238, "loss_self_iou": 0.006, "cardinality_error": 8.233, "loss_ce_0": 0.281, "loss_counter_0": 0.116, "loss_bbox_0": 0.018, "loss_giou_0": 0.255, "loss_self_iou_0": 0.007, "cardinality_error_0": 8.233, "loss_caption_0": 2.024, "loss_caption": 2.026, "total_loss": 11.31}, "10108": {"loss_ce": 0.283, "loss_counter": 0.111, "loss_bbox": 0.018, "loss_giou": 0.232, "loss_self_iou": 0.006, "cardinality_error": 7.466, "loss_ce_0": 0.279, "loss_counter_0": 0.112, "loss_bbox_0": 0.02, "loss_giou_0": 0.246, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.466, "loss_caption_0": 1.878, "loss_caption": 1.882, "total_loss": 10.667}, "10241": {"loss_ce": 0.285, "loss_counter": 0.119, "loss_bbox": 0.018, "loss_giou": 0.24, "loss_self_iou": 0.007, "cardinality_error": 7.722, "loss_ce_0": 0.282, "loss_counter_0": 0.119, "loss_bbox_0": 0.019, "loss_giou_0": 0.253, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.722, "loss_caption_0": 1.984, "loss_caption": 1.988, "total_loss": 11.165}, "10374": {"loss_ce": 0.292, "loss_counter": 0.113, "loss_bbox": 0.017, "loss_giou": 0.225, "loss_self_iou": 0.007, "cardinality_error": 7.692, "loss_ce_0": 0.285, "loss_counter_0": 0.112, "loss_bbox_0": 0.019, "loss_giou_0": 0.241, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.692, "loss_caption_0": 2.089, "loss_caption": 2.094, "total_loss": 11.498}, "10507": {"loss_ce": 0.287, "loss_counter": 0.113, "loss_bbox": 0.019, "loss_giou": 0.22, "loss_self_iou": 0.007, "cardinality_error": 7.564, "loss_ce_0": 0.283, "loss_counter_0": 0.113, "loss_bbox_0": 0.021, "loss_giou_0": 0.241, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.564, "loss_caption_0": 1.936, "loss_caption": 1.935, "total_loss": 10.84}, "10640": {"loss_ce": 0.281, "loss_counter": 0.115, "loss_bbox": 0.02, "loss_giou": 0.232, "loss_self_iou": 0.008, "cardinality_error": 7.549, "loss_ce_0": 0.278, "loss_counter_0": 0.116, "loss_bbox_0": 0.022, "loss_giou_0": 0.249, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.549, "loss_caption_0": 2.041, "loss_caption": 2.042, "total_loss": 11.323}, "10773": {"loss_ce": 0.279, "loss_counter": 0.114, "loss_bbox": 0.017, "loss_giou": 0.235, "loss_self_iou": 0.006, "cardinality_error": 7.94, "loss_ce_0": 0.278, "loss_counter_0": 0.115, "loss_bbox_0": 0.018, "loss_giou_0": 0.253, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.94, "loss_caption_0": 1.851, "loss_caption": 1.84, "total_loss": 10.561}, "10906": {"loss_ce": 0.279, "loss_counter": 0.109, "loss_bbox": 0.017, "loss_giou": 0.215, "loss_self_iou": 0.006, "cardinality_error": 7.218, "loss_ce_0": 0.278, "loss_counter_0": 0.109, "loss_bbox_0": 0.018, "loss_giou_0": 0.231, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.218, "loss_caption_0": 1.945, "loss_caption": 1.948, "total_loss": 10.791}, "11039": {"loss_ce": 0.288, "loss_counter": 0.108, "loss_bbox": 0.017, "loss_giou": 0.207, "loss_self_iou": 0.006, "cardinality_error": 7.579, "loss_ce_0": 0.283, "loss_counter_0": 0.109, "loss_bbox_0": 0.018, "loss_giou_0": 0.223, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.579, "loss_caption_0": 1.92, "loss_caption": 1.927, "total_loss": 10.664}, "11172": {"loss_ce": 0.28, "loss_counter": 0.11, "loss_bbox": 0.018, "loss_giou": 0.215, "loss_self_iou": 0.006, "cardinality_error": 7.451, "loss_ce_0": 0.279, "loss_counter_0": 0.11, "loss_bbox_0": 0.019, "loss_giou_0": 0.231, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.451, "loss_caption_0": 1.91, "loss_caption": 1.9, "total_loss": 10.635}, "11305": {"loss_ce": 0.278, "loss_counter": 0.125, "loss_bbox": 0.017, "loss_giou": 0.233, "loss_self_iou": 0.006, "cardinality_error": 8.09, "loss_ce_0": 0.276, "loss_counter_0": 0.126, "loss_bbox_0": 0.018, "loss_giou_0": 0.244, "loss_self_iou_0": 0.006, "cardinality_error_0": 8.09, "loss_caption_0": 1.876, "loss_caption": 1.877, "total_loss": 10.648}, "11438": {"loss_ce": 0.273, "loss_counter": 0.113, "loss_bbox": 0.016, "loss_giou": 0.211, "loss_self_iou": 0.005, "cardinality_error": 7.744, "loss_ce_0": 0.269, "loss_counter_0": 0.113, "loss_bbox_0": 0.017, "loss_giou_0": 0.231, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.744, "loss_caption_0": 1.981, "loss_caption": 1.968, "total_loss": 10.865}, "11571": {"loss_ce": 0.281, "loss_counter": 0.114, "loss_bbox": 0.018, "loss_giou": 0.225, "loss_self_iou": 0.006, "cardinality_error": 7.699, "loss_ce_0": 0.277, "loss_counter_0": 0.115, "loss_bbox_0": 0.02, "loss_giou_0": 0.243, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.699, "loss_caption_0": 1.833, "loss_caption": 1.846, "total_loss": 10.461}, "11704": {"loss_ce": 0.28, "loss_counter": 0.115, "loss_bbox": 0.017, "loss_giou": 0.21, "loss_self_iou": 0.006, "cardinality_error": 7.82, "loss_ce_0": 0.278, "loss_counter_0": 0.116, "loss_bbox_0": 0.017, "loss_giou_0": 0.226, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.82, "loss_caption_0": 1.91, "loss_caption": 1.915, "total_loss": 10.628}, "11837": {"loss_ce": 0.271, "loss_counter": 0.111, "loss_bbox": 0.016, "loss_giou": 0.215, "loss_self_iou": 0.007, "cardinality_error": 8.0, "loss_ce_0": 0.273, "loss_counter_0": 0.112, "loss_bbox_0": 0.018, "loss_giou_0": 0.23, "loss_self_iou_0": 0.007, "cardinality_error_0": 8.0, "loss_caption_0": 1.936, "loss_caption": 1.939, "total_loss": 10.726}, "11970": {"loss_ce": 0.272, "loss_counter": 0.115, "loss_bbox": 0.017, "loss_giou": 0.22, "loss_self_iou": 0.006, "cardinality_error": 8.158, "loss_ce_0": 0.27, "loss_counter_0": 0.116, "loss_bbox_0": 0.018, "loss_giou_0": 0.242, "loss_self_iou_0": 0.005, "cardinality_error_0": 8.158, "loss_caption_0": 1.953, "loss_caption": 1.962, "total_loss": 10.881}, "12103": {"loss_ce": 0.275, "loss_counter": 0.111, "loss_bbox": 0.016, "loss_giou": 0.216, "loss_self_iou": 0.006, "cardinality_error": 8.038, "loss_ce_0": 0.274, "loss_counter_0": 0.111, "loss_bbox_0": 0.017, "loss_giou_0": 0.231, "loss_self_iou_0": 0.007, "cardinality_error_0": 8.038, "loss_caption_0": 1.832, "loss_caption": 1.845, "total_loss": 10.35}, "12236": {"loss_ce": 0.272, "loss_counter": 0.111, "loss_bbox": 0.016, "loss_giou": 0.206, "loss_self_iou": 0.005, "cardinality_error": 7.812, "loss_ce_0": 0.266, "loss_counter_0": 0.111, "loss_bbox_0": 0.018, "loss_giou_0": 0.223, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.812, "loss_caption_0": 1.968, "loss_caption": 1.959, "total_loss": 10.757}, "12369": {"loss_ce": 0.273, "loss_counter": 0.118, "loss_bbox": 0.016, "loss_giou": 0.21, "loss_self_iou": 0.005, "cardinality_error": 7.827, "loss_ce_0": 0.27, "loss_counter_0": 0.118, "loss_bbox_0": 0.017, "loss_giou_0": 0.226, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.827, "loss_caption_0": 1.89, "loss_caption": 1.903, "total_loss": 10.534}, "12502": {"loss_ce": 0.27, "loss_counter": 0.108, "loss_bbox": 0.016, "loss_giou": 0.205, "loss_self_iou": 0.006, "cardinality_error": 7.684, "loss_ce_0": 0.268, "loss_counter_0": 0.108, "loss_bbox_0": 0.017, "loss_giou_0": 0.224, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.684, "loss_caption_0": 1.903, "loss_caption": 1.905, "total_loss": 10.519}, "12635": {"loss_ce": 0.27, "loss_counter": 0.111, "loss_bbox": 0.015, "loss_giou": 0.218, "loss_self_iou": 0.005, "cardinality_error": 7.947, "loss_ce_0": 0.269, "loss_counter_0": 0.112, "loss_bbox_0": 0.016, "loss_giou_0": 0.232, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.947, "loss_caption_0": 1.822, "loss_caption": 1.826, "total_loss": 10.284}, "12768": {"loss_ce": 0.277, "loss_counter": 0.111, "loss_bbox": 0.017, "loss_giou": 0.219, "loss_self_iou": 0.008, "cardinality_error": 7.669, "loss_ce_0": 0.276, "loss_counter_0": 0.112, "loss_bbox_0": 0.018, "loss_giou_0": 0.235, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.669, "loss_caption_0": 1.905, "loss_caption": 1.909, "total_loss": 10.662}, "12901": {"loss_ce": 0.269, "loss_counter": 0.106, "loss_bbox": 0.015, "loss_giou": 0.208, "loss_self_iou": 0.005, "cardinality_error": 7.639, "loss_ce_0": 0.267, "loss_counter_0": 0.108, "loss_bbox_0": 0.017, "loss_giou_0": 0.224, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.639, "loss_caption_0": 1.856, "loss_caption": 1.863, "total_loss": 10.344}, "13034": {"loss_ce": 0.273, "loss_counter": 0.112, "loss_bbox": 0.015, "loss_giou": 0.216, "loss_self_iou": 0.005, "cardinality_error": 7.85, "loss_ce_0": 0.274, "loss_counter_0": 0.113, "loss_bbox_0": 0.017, "loss_giou_0": 0.231, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.85, "loss_caption_0": 1.841, "loss_caption": 1.841, "total_loss": 10.356}, "13167": {"loss_ce": 0.275, "loss_counter": 0.109, "loss_bbox": 0.018, "loss_giou": 0.21, "loss_self_iou": 0.005, "cardinality_error": 7.406, "loss_ce_0": 0.273, "loss_counter_0": 0.109, "loss_bbox_0": 0.019, "loss_giou_0": 0.226, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.406, "loss_caption_0": 1.931, "loss_caption": 1.927, "total_loss": 10.663}, "13300": {"loss_ce": 0.274, "loss_counter": 0.113, "loss_bbox": 0.017, "loss_giou": 0.212, "loss_self_iou": 0.005, "cardinality_error": 7.737, "loss_ce_0": 0.272, "loss_counter_0": 0.113, "loss_bbox_0": 0.019, "loss_giou_0": 0.23, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.737, "loss_caption_0": 1.853, "loss_caption": 1.849, "total_loss": 10.379}, "13433": {"loss_ce": 0.271, "loss_counter": 0.112, "loss_bbox": 0.017, "loss_giou": 0.217, "loss_self_iou": 0.006, "cardinality_error": 7.835, "loss_ce_0": 0.267, "loss_counter_0": 0.112, "loss_bbox_0": 0.018, "loss_giou_0": 0.235, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.835, "loss_caption_0": 1.804, "loss_caption": 1.811, "total_loss": 10.223}, "13566": {"loss_ce": 0.266, "loss_counter": 0.116, "loss_bbox": 0.015, "loss_giou": 0.204, "loss_self_iou": 0.005, "cardinality_error": 7.774, "loss_ce_0": 0.266, "loss_counter_0": 0.116, "loss_bbox_0": 0.017, "loss_giou_0": 0.221, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.774, "loss_caption_0": 1.884, "loss_caption": 1.887, "total_loss": 10.42}, "13699": {"loss_ce": 0.261, "loss_counter": 0.111, "loss_bbox": 0.016, "loss_giou": 0.201, "loss_self_iou": 0.006, "cardinality_error": 7.729, "loss_ce_0": 0.259, "loss_counter_0": 0.11, "loss_bbox_0": 0.017, "loss_giou_0": 0.218, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.729, "loss_caption_0": 1.823, "loss_caption": 1.806, "total_loss": 10.083}, "13832": {"loss_ce": 0.269, "loss_counter": 0.111, "loss_bbox": 0.016, "loss_giou": 0.211, "loss_self_iou": 0.005, "cardinality_error": 7.699, "loss_ce_0": 0.271, "loss_counter_0": 0.112, "loss_bbox_0": 0.017, "loss_giou_0": 0.228, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.699, "loss_caption_0": 1.855, "loss_caption": 1.857, "total_loss": 10.374}, "13965": {"loss_ce": 0.275, "loss_counter": 0.105, "loss_bbox": 0.016, "loss_giou": 0.196, "loss_self_iou": 0.006, "cardinality_error": 7.128, "loss_ce_0": 0.271, "loss_counter_0": 0.106, "loss_bbox_0": 0.017, "loss_giou_0": 0.214, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.128, "loss_caption_0": 1.809, "loss_caption": 1.8, "total_loss": 10.055}, "14098": {"loss_ce": 0.273, "loss_counter": 0.112, "loss_bbox": 0.016, "loss_giou": 0.213, "loss_self_iou": 0.007, "cardinality_error": 7.925, "loss_ce_0": 0.273, "loss_counter_0": 0.113, "loss_bbox_0": 0.018, "loss_giou_0": 0.23, "loss_self_iou_0": 0.008, "cardinality_error_0": 7.925, "loss_caption_0": 1.863, "loss_caption": 1.863, "total_loss": 10.433}, "14231": {"loss_ce": 0.261, "loss_counter": 0.113, "loss_bbox": 0.017, "loss_giou": 0.212, "loss_self_iou": 0.007, "cardinality_error": 7.82, "loss_ce_0": 0.262, "loss_counter_0": 0.114, "loss_bbox_0": 0.018, "loss_giou_0": 0.222, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.82, "loss_caption_0": 1.936, "loss_caption": 1.929, "total_loss": 10.624}, "14364": {"loss_ce": 0.263, "loss_counter": 0.104, "loss_bbox": 0.015, "loss_giou": 0.216, "loss_self_iou": 0.005, "cardinality_error": 7.744, "loss_ce_0": 0.263, "loss_counter_0": 0.104, "loss_bbox_0": 0.016, "loss_giou_0": 0.227, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.744, "loss_caption_0": 1.757, "loss_caption": 1.754, "total_loss": 9.948}, "14497": {"loss_ce": 0.266, "loss_counter": 0.11, "loss_bbox": 0.015, "loss_giou": 0.2, "loss_self_iou": 0.005, "cardinality_error": 7.827, "loss_ce_0": 0.265, "loss_counter_0": 0.111, "loss_bbox_0": 0.017, "loss_giou_0": 0.214, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.827, "loss_caption_0": 1.896, "loss_caption": 1.894, "total_loss": 10.407}, "14630": {"loss_ce": 0.263, "loss_counter": 0.113, "loss_bbox": 0.015, "loss_giou": 0.208, "loss_self_iou": 0.005, "cardinality_error": 7.925, "loss_ce_0": 0.261, "loss_counter_0": 0.113, "loss_bbox_0": 0.016, "loss_giou_0": 0.224, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.925, "loss_caption_0": 1.84, "loss_caption": 1.842, "total_loss": 10.253}, "14763": {"loss_ce": 0.266, "loss_counter": 0.111, "loss_bbox": 0.015, "loss_giou": 0.208, "loss_self_iou": 0.006, "cardinality_error": 7.85, "loss_ce_0": 0.264, "loss_counter_0": 0.111, "loss_bbox_0": 0.016, "loss_giou_0": 0.225, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.85, "loss_caption_0": 1.87, "loss_caption": 1.877, "total_loss": 10.398}, "14896": {"loss_ce": 0.26, "loss_counter": 0.112, "loss_bbox": 0.015, "loss_giou": 0.2, "loss_self_iou": 0.005, "cardinality_error": 7.692, "loss_ce_0": 0.259, "loss_counter_0": 0.112, "loss_bbox_0": 0.016, "loss_giou_0": 0.217, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.692, "loss_caption_0": 1.796, "loss_caption": 1.784, "total_loss": 9.979}, "15029": {"loss_ce": 0.264, "loss_counter": 0.103, "loss_bbox": 0.015, "loss_giou": 0.195, "loss_self_iou": 0.006, "cardinality_error": 7.414, "loss_ce_0": 0.264, "loss_counter_0": 0.104, "loss_bbox_0": 0.016, "loss_giou_0": 0.211, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.414, "loss_caption_0": 1.763, "loss_caption": 1.767, "total_loss": 9.842}, "15162": {"loss_ce": 0.263, "loss_counter": 0.113, "loss_bbox": 0.015, "loss_giou": 0.196, "loss_self_iou": 0.004, "cardinality_error": 7.767, "loss_ce_0": 0.262, "loss_counter_0": 0.113, "loss_bbox_0": 0.016, "loss_giou_0": 0.211, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.767, "loss_caption_0": 1.781, "loss_caption": 1.781, "total_loss": 9.916}, "15295": {"loss_ce": 0.257, "loss_counter": 0.106, "loss_bbox": 0.015, "loss_giou": 0.2, "loss_self_iou": 0.005, "cardinality_error": 7.662, "loss_ce_0": 0.255, "loss_counter_0": 0.108, "loss_bbox_0": 0.016, "loss_giou_0": 0.214, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.662, "loss_caption_0": 1.735, "loss_caption": 1.75, "total_loss": 9.755}, "15428": {"loss_ce": 0.258, "loss_counter": 0.114, "loss_bbox": 0.015, "loss_giou": 0.21, "loss_self_iou": 0.005, "cardinality_error": 7.992, "loss_ce_0": 0.261, "loss_counter_0": 0.114, "loss_bbox_0": 0.016, "loss_giou_0": 0.22, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.992, "loss_caption_0": 1.852, "loss_caption": 1.86, "total_loss": 10.298}, "15561": {"loss_ce": 0.256, "loss_counter": 0.112, "loss_bbox": 0.015, "loss_giou": 0.204, "loss_self_iou": 0.006, "cardinality_error": 8.068, "loss_ce_0": 0.257, "loss_counter_0": 0.113, "loss_bbox_0": 0.016, "loss_giou_0": 0.218, "loss_self_iou_0": 0.006, "cardinality_error_0": 8.068, "loss_caption_0": 1.878, "loss_caption": 1.866, "total_loss": 10.314}, "15694": {"loss_ce": 0.256, "loss_counter": 0.106, "loss_bbox": 0.015, "loss_giou": 0.202, "loss_self_iou": 0.004, "cardinality_error": 7.647, "loss_ce_0": 0.257, "loss_counter_0": 0.107, "loss_bbox_0": 0.016, "loss_giou_0": 0.215, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.647, "loss_caption_0": 1.7, "loss_caption": 1.684, "total_loss": 9.569}, "15827": {"loss_ce": 0.259, "loss_counter": 0.104, "loss_bbox": 0.016, "loss_giou": 0.194, "loss_self_iou": 0.005, "cardinality_error": 7.722, "loss_ce_0": 0.257, "loss_counter_0": 0.105, "loss_bbox_0": 0.017, "loss_giou_0": 0.209, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.722, "loss_caption_0": 1.848, "loss_caption": 1.839, "total_loss": 10.119}, "15960": {"loss_ce": 0.26, "loss_counter": 0.107, "loss_bbox": 0.015, "loss_giou": 0.197, "loss_self_iou": 0.004, "cardinality_error": 7.609, "loss_ce_0": 0.257, "loss_counter_0": 0.107, "loss_bbox_0": 0.017, "loss_giou_0": 0.214, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.609, "loss_caption_0": 1.847, "loss_caption": 1.858, "total_loss": 10.198}, "16093": {"loss_ce": 0.257, "loss_counter": 0.109, "loss_bbox": 0.015, "loss_giou": 0.19, "loss_self_iou": 0.004, "cardinality_error": 7.992, "loss_ce_0": 0.258, "loss_counter_0": 0.11, "loss_bbox_0": 0.015, "loss_giou_0": 0.202, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.992, "loss_caption_0": 1.773, "loss_caption": 1.769, "total_loss": 9.789}, "16226": {"loss_ce": 0.26, "loss_counter": 0.11, "loss_bbox": 0.014, "loss_giou": 0.198, "loss_self_iou": 0.004, "cardinality_error": 7.805, "loss_ce_0": 0.259, "loss_counter_0": 0.111, "loss_bbox_0": 0.015, "loss_giou_0": 0.215, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.805, "loss_caption_0": 1.743, "loss_caption": 1.749, "total_loss": 9.786}, "16359": {"loss_ce": 0.265, "loss_counter": 0.116, "loss_bbox": 0.014, "loss_giou": 0.198, "loss_self_iou": 0.005, "cardinality_error": 7.85, "loss_ce_0": 0.264, "loss_counter_0": 0.115, "loss_bbox_0": 0.015, "loss_giou_0": 0.214, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.85, "loss_caption_0": 1.797, "loss_caption": 1.778, "total_loss": 9.972}, "16492": {"loss_ce": 0.254, "loss_counter": 0.105, "loss_bbox": 0.015, "loss_giou": 0.189, "loss_self_iou": 0.004, "cardinality_error": 7.383, "loss_ce_0": 0.257, "loss_counter_0": 0.106, "loss_bbox_0": 0.016, "loss_giou_0": 0.202, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.383, "loss_caption_0": 1.796, "loss_caption": 1.808, "total_loss": 9.899}, "16625": {"loss_ce": 0.258, "loss_counter": 0.109, "loss_bbox": 0.014, "loss_giou": 0.186, "loss_self_iou": 0.005, "cardinality_error": 7.782, "loss_ce_0": 0.256, "loss_counter_0": 0.11, "loss_bbox_0": 0.015, "loss_giou_0": 0.203, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.782, "loss_caption_0": 1.78, "loss_caption": 1.779, "total_loss": 9.812}, "16758": {"loss_ce": 0.252, "loss_counter": 0.106, "loss_bbox": 0.014, "loss_giou": 0.196, "loss_self_iou": 0.005, "cardinality_error": 7.962, "loss_ce_0": 0.252, "loss_counter_0": 0.107, "loss_bbox_0": 0.015, "loss_giou_0": 0.211, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.962, "loss_caption_0": 1.795, "loss_caption": 1.806, "total_loss": 9.948}, "16891": {"loss_ce": 0.258, "loss_counter": 0.109, "loss_bbox": 0.016, "loss_giou": 0.199, "loss_self_iou": 0.005, "cardinality_error": 7.797, "loss_ce_0": 0.255, "loss_counter_0": 0.111, "loss_bbox_0": 0.017, "loss_giou_0": 0.211, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.797, "loss_caption_0": 1.788, "loss_caption": 1.782, "total_loss": 9.914}, "17024": {"loss_ce": 0.262, "loss_counter": 0.11, "loss_bbox": 0.014, "loss_giou": 0.198, "loss_self_iou": 0.005, "cardinality_error": 7.511, "loss_ce_0": 0.26, "loss_counter_0": 0.11, "loss_bbox_0": 0.016, "loss_giou_0": 0.211, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.511, "loss_caption_0": 1.717, "loss_caption": 1.72, "total_loss": 9.666}, "17157": {"loss_ce": 0.25, "loss_counter": 0.104, "loss_bbox": 0.015, "loss_giou": 0.189, "loss_self_iou": 0.004, "cardinality_error": 7.692, "loss_ce_0": 0.252, "loss_counter_0": 0.106, "loss_bbox_0": 0.016, "loss_giou_0": 0.2, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.692, "loss_caption_0": 1.738, "loss_caption": 1.749, "total_loss": 9.638}, "17290": {"loss_ce": 0.254, "loss_counter": 0.109, "loss_bbox": 0.015, "loss_giou": 0.198, "loss_self_iou": 0.005, "cardinality_error": 7.932, "loss_ce_0": 0.254, "loss_counter_0": 0.109, "loss_bbox_0": 0.016, "loss_giou_0": 0.214, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.932, "loss_caption_0": 1.815, "loss_caption": 1.83, "total_loss": 10.067}, "17423": {"loss_ce": 0.262, "loss_counter": 0.111, "loss_bbox": 0.015, "loss_giou": 0.195, "loss_self_iou": 0.007, "cardinality_error": 7.692, "loss_ce_0": 0.259, "loss_counter_0": 0.11, "loss_bbox_0": 0.016, "loss_giou_0": 0.208, "loss_self_iou_0": 0.007, "cardinality_error_0": 7.692, "loss_caption_0": 1.865, "loss_caption": 1.881, "total_loss": 10.261}, "17556": {"loss_ce": 0.252, "loss_counter": 0.111, "loss_bbox": 0.014, "loss_giou": 0.193, "loss_self_iou": 0.004, "cardinality_error": 7.737, "loss_ce_0": 0.253, "loss_counter_0": 0.112, "loss_bbox_0": 0.015, "loss_giou_0": 0.209, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.737, "loss_caption_0": 1.744, "loss_caption": 1.743, "total_loss": 9.707}, "17689": {"loss_ce": 0.259, "loss_counter": 0.109, "loss_bbox": 0.015, "loss_giou": 0.199, "loss_self_iou": 0.006, "cardinality_error": 7.602, "loss_ce_0": 0.262, "loss_counter_0": 0.109, "loss_bbox_0": 0.016, "loss_giou_0": 0.211, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.602, "loss_caption_0": 1.835, "loss_caption": 1.819, "total_loss": 10.1}, "17822": {"loss_ce": 0.25, "loss_counter": 0.108, "loss_bbox": 0.014, "loss_giou": 0.191, "loss_self_iou": 0.005, "cardinality_error": 7.526, "loss_ce_0": 0.249, "loss_counter_0": 0.108, "loss_bbox_0": 0.016, "loss_giou_0": 0.206, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.526, "loss_caption_0": 1.681, "loss_caption": 1.67, "total_loss": 9.397}, "17955": {"loss_ce": 0.255, "loss_counter": 0.102, "loss_bbox": 0.014, "loss_giou": 0.184, "loss_self_iou": 0.005, "cardinality_error": 7.526, "loss_ce_0": 0.252, "loss_counter_0": 0.104, "loss_bbox_0": 0.015, "loss_giou_0": 0.2, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.526, "loss_caption_0": 1.757, "loss_caption": 1.745, "total_loss": 9.658}, "18088": {"loss_ce": 0.251, "loss_counter": 0.106, "loss_bbox": 0.014, "loss_giou": 0.177, "loss_self_iou": 0.004, "cardinality_error": 7.534, "loss_ce_0": 0.251, "loss_counter_0": 0.107, "loss_bbox_0": 0.016, "loss_giou_0": 0.191, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.534, "loss_caption_0": 1.703, "loss_caption": 1.701, "total_loss": 9.39}, "18221": {"loss_ce": 0.252, "loss_counter": 0.111, "loss_bbox": 0.014, "loss_giou": 0.201, "loss_self_iou": 0.005, "cardinality_error": 8.211, "loss_ce_0": 0.252, "loss_counter_0": 0.111, "loss_bbox_0": 0.015, "loss_giou_0": 0.213, "loss_self_iou_0": 0.005, "cardinality_error_0": 8.211, "loss_caption_0": 1.824, "loss_caption": 1.816, "total_loss": 10.053}, "18354": {"loss_ce": 0.253, "loss_counter": 0.104, "loss_bbox": 0.015, "loss_giou": 0.195, "loss_self_iou": 0.004, "cardinality_error": 7.789, "loss_ce_0": 0.249, "loss_counter_0": 0.105, "loss_bbox_0": 0.016, "loss_giou_0": 0.21, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.789, "loss_caption_0": 1.792, "loss_caption": 1.779, "total_loss": 9.874}, "18487": {"loss_ce": 0.255, "loss_counter": 0.111, "loss_bbox": 0.013, "loss_giou": 0.19, "loss_self_iou": 0.004, "cardinality_error": 7.992, "loss_ce_0": 0.251, "loss_counter_0": 0.112, "loss_bbox_0": 0.014, "loss_giou_0": 0.205, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.992, "loss_caption_0": 1.826, "loss_caption": 1.81, "total_loss": 9.979}, "18620": {"loss_ce": 0.251, "loss_counter": 0.109, "loss_bbox": 0.015, "loss_giou": 0.193, "loss_self_iou": 0.003, "cardinality_error": 7.737, "loss_ce_0": 0.251, "loss_counter_0": 0.11, "loss_bbox_0": 0.016, "loss_giou_0": 0.206, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.737, "loss_caption_0": 1.767, "loss_caption": 1.771, "total_loss": 9.784}, "18753": {"loss_ce": 0.247, "loss_counter": 0.115, "loss_bbox": 0.013, "loss_giou": 0.195, "loss_self_iou": 0.004, "cardinality_error": 8.241, "loss_ce_0": 0.251, "loss_counter_0": 0.115, "loss_bbox_0": 0.014, "loss_giou_0": 0.207, "loss_self_iou_0": 0.004, "cardinality_error_0": 8.241, "loss_caption_0": 1.758, "loss_caption": 1.759, "total_loss": 9.756}, "18886": {"loss_ce": 0.247, "loss_counter": 0.103, "loss_bbox": 0.015, "loss_giou": 0.182, "loss_self_iou": 0.004, "cardinality_error": 7.436, "loss_ce_0": 0.245, "loss_counter_0": 0.104, "loss_bbox_0": 0.016, "loss_giou_0": 0.194, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.436, "loss_caption_0": 1.696, "loss_caption": 1.692, "total_loss": 9.366}, "19019": {"loss_ce": 0.243, "loss_counter": 0.104, "loss_bbox": 0.013, "loss_giou": 0.181, "loss_self_iou": 0.003, "cardinality_error": 7.692, "loss_ce_0": 0.242, "loss_counter_0": 0.105, "loss_bbox_0": 0.015, "loss_giou_0": 0.194, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.692, "loss_caption_0": 1.73, "loss_caption": 1.729, "total_loss": 9.496}, "19152": {"loss_ce": 0.251, "loss_counter": 0.112, "loss_bbox": 0.014, "loss_giou": 0.181, "loss_self_iou": 0.006, "cardinality_error": 7.82, "loss_ce_0": 0.251, "loss_counter_0": 0.114, "loss_bbox_0": 0.015, "loss_giou_0": 0.194, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.82, "loss_caption_0": 1.779, "loss_caption": 1.771, "total_loss": 9.714}, "19285": {"loss_ce": 0.25, "loss_counter": 0.105, "loss_bbox": 0.014, "loss_giou": 0.194, "loss_self_iou": 0.004, "cardinality_error": 7.669, "loss_ce_0": 0.25, "loss_counter_0": 0.105, "loss_bbox_0": 0.015, "loss_giou_0": 0.204, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.669, "loss_caption_0": 1.76, "loss_caption": 1.772, "total_loss": 9.759}, "19418": {"loss_ce": 0.244, "loss_counter": 0.115, "loss_bbox": 0.013, "loss_giou": 0.197, "loss_self_iou": 0.004, "cardinality_error": 8.256, "loss_ce_0": 0.245, "loss_counter_0": 0.117, "loss_bbox_0": 0.014, "loss_giou_0": 0.211, "loss_self_iou_0": 0.004, "cardinality_error_0": 8.256, "loss_caption_0": 1.754, "loss_caption": 1.758, "total_loss": 9.747}, "19551": {"loss_ce": 0.249, "loss_counter": 0.109, "loss_bbox": 0.013, "loss_giou": 0.175, "loss_self_iou": 0.004, "cardinality_error": 7.865, "loss_ce_0": 0.253, "loss_counter_0": 0.11, "loss_bbox_0": 0.014, "loss_giou_0": 0.187, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.865, "loss_caption_0": 1.68, "loss_caption": 1.689, "total_loss": 9.3}, "19684": {"loss_ce": 0.263, "loss_counter": 0.104, "loss_bbox": 0.015, "loss_giou": 0.187, "loss_self_iou": 0.005, "cardinality_error": 7.474, "loss_ce_0": 0.262, "loss_counter_0": 0.105, "loss_bbox_0": 0.016, "loss_giou_0": 0.199, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.474, "loss_caption_0": 1.81, "loss_caption": 1.803, "total_loss": 9.923}, "19817": {"loss_ce": 0.246, "loss_counter": 0.106, "loss_bbox": 0.014, "loss_giou": 0.183, "loss_self_iou": 0.005, "cardinality_error": 7.526, "loss_ce_0": 0.247, "loss_counter_0": 0.107, "loss_bbox_0": 0.015, "loss_giou_0": 0.197, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.526, "loss_caption_0": 1.769, "loss_caption": 1.765, "total_loss": 9.677}, "19950": {"loss_ce": 0.254, "loss_counter": 0.108, "loss_bbox": 0.013, "loss_giou": 0.19, "loss_self_iou": 0.005, "cardinality_error": 7.797, "loss_ce_0": 0.253, "loss_counter_0": 0.108, "loss_bbox_0": 0.014, "loss_giou_0": 0.202, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.797, "loss_caption_0": 1.736, "loss_caption": 1.748, "total_loss": 9.654}, "20083": {"loss_ce": 0.254, "loss_counter": 0.102, "loss_bbox": 0.014, "loss_giou": 0.186, "loss_self_iou": 0.004, "cardinality_error": 7.519, "loss_ce_0": 0.257, "loss_counter_0": 0.103, "loss_bbox_0": 0.015, "loss_giou_0": 0.197, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.519, "loss_caption_0": 1.743, "loss_caption": 1.756, "total_loss": 9.655}, "20216": {"loss_ce": 0.244, "loss_counter": 0.105, "loss_bbox": 0.013, "loss_giou": 0.179, "loss_self_iou": 0.003, "cardinality_error": 7.759, "loss_ce_0": 0.244, "loss_counter_0": 0.106, "loss_bbox_0": 0.014, "loss_giou_0": 0.193, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.759, "loss_caption_0": 1.79, "loss_caption": 1.781, "total_loss": 9.713}, "20349": {"loss_ce": 0.246, "loss_counter": 0.11, "loss_bbox": 0.013, "loss_giou": 0.19, "loss_self_iou": 0.004, "cardinality_error": 7.992, "loss_ce_0": 0.245, "loss_counter_0": 0.113, "loss_bbox_0": 0.014, "loss_giou_0": 0.202, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.992, "loss_caption_0": 1.749, "loss_caption": 1.759, "total_loss": 9.675}, "20482": {"loss_ce": 0.244, "loss_counter": 0.109, "loss_bbox": 0.015, "loss_giou": 0.193, "loss_self_iou": 0.005, "cardinality_error": 7.94, "loss_ce_0": 0.244, "loss_counter_0": 0.109, "loss_bbox_0": 0.016, "loss_giou_0": 0.207, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.94, "loss_caption_0": 1.694, "loss_caption": 1.715, "total_loss": 9.502}, "20615": {"loss_ce": 0.257, "loss_counter": 0.107, "loss_bbox": 0.014, "loss_giou": 0.188, "loss_self_iou": 0.005, "cardinality_error": 7.368, "loss_ce_0": 0.257, "loss_counter_0": 0.107, "loss_bbox_0": 0.015, "loss_giou_0": 0.202, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.368, "loss_caption_0": 1.77, "loss_caption": 1.771, "total_loss": 9.775}, "20748": {"loss_ce": 0.247, "loss_counter": 0.107, "loss_bbox": 0.013, "loss_giou": 0.178, "loss_self_iou": 0.004, "cardinality_error": 7.857, "loss_ce_0": 0.247, "loss_counter_0": 0.108, "loss_bbox_0": 0.014, "loss_giou_0": 0.192, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.857, "loss_caption_0": 1.786, "loss_caption": 1.773, "total_loss": 9.695}, "20881": {"loss_ce": 0.243, "loss_counter": 0.103, "loss_bbox": 0.013, "loss_giou": 0.178, "loss_self_iou": 0.003, "cardinality_error": 7.594, "loss_ce_0": 0.242, "loss_counter_0": 0.103, "loss_bbox_0": 0.014, "loss_giou_0": 0.192, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.594, "loss_caption_0": 1.746, "loss_caption": 1.748, "total_loss": 9.541}, "21014": {"loss_ce": 0.249, "loss_counter": 0.108, "loss_bbox": 0.015, "loss_giou": 0.19, "loss_self_iou": 0.005, "cardinality_error": 8.09, "loss_ce_0": 0.249, "loss_counter_0": 0.11, "loss_bbox_0": 0.016, "loss_giou_0": 0.202, "loss_self_iou_0": 0.005, "cardinality_error_0": 8.09, "loss_caption_0": 1.709, "loss_caption": 1.698, "total_loss": 9.49}, "21147": {"loss_ce": 0.246, "loss_counter": 0.115, "loss_bbox": 0.014, "loss_giou": 0.186, "loss_self_iou": 0.004, "cardinality_error": 7.812, "loss_ce_0": 0.248, "loss_counter_0": 0.114, "loss_bbox_0": 0.015, "loss_giou_0": 0.198, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.812, "loss_caption_0": 1.733, "loss_caption": 1.732, "total_loss": 9.57}, "21280": {"loss_ce": 0.246, "loss_counter": 0.104, "loss_bbox": 0.014, "loss_giou": 0.187, "loss_self_iou": 0.004, "cardinality_error": 7.632, "loss_ce_0": 0.245, "loss_counter_0": 0.105, "loss_bbox_0": 0.015, "loss_giou_0": 0.197, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.632, "loss_caption_0": 1.646, "loss_caption": 1.658, "total_loss": 9.233}, "21413": {"loss_ce": 0.24, "loss_counter": 0.107, "loss_bbox": 0.014, "loss_giou": 0.175, "loss_self_iou": 0.004, "cardinality_error": 7.541, "loss_ce_0": 0.239, "loss_counter_0": 0.108, "loss_bbox_0": 0.015, "loss_giou_0": 0.19, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.541, "loss_caption_0": 1.637, "loss_caption": 1.633, "total_loss": 9.069}, "21546": {"loss_ce": 0.245, "loss_counter": 0.102, "loss_bbox": 0.013, "loss_giou": 0.172, "loss_self_iou": 0.004, "cardinality_error": 7.624, "loss_ce_0": 0.243, "loss_counter_0": 0.103, "loss_bbox_0": 0.014, "loss_giou_0": 0.185, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.624, "loss_caption_0": 1.773, "loss_caption": 1.784, "total_loss": 9.621}, "21679": {"loss_ce": 0.239, "loss_counter": 0.107, "loss_bbox": 0.014, "loss_giou": 0.181, "loss_self_iou": 0.004, "cardinality_error": 7.992, "loss_ce_0": 0.238, "loss_counter_0": 0.109, "loss_bbox_0": 0.015, "loss_giou_0": 0.194, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.992, "loss_caption_0": 1.809, "loss_caption": 1.805, "total_loss": 9.791}, "21812": {"loss_ce": 0.246, "loss_counter": 0.107, "loss_bbox": 0.013, "loss_giou": 0.179, "loss_self_iou": 0.003, "cardinality_error": 7.677, "loss_ce_0": 0.25, "loss_counter_0": 0.108, "loss_bbox_0": 0.013, "loss_giou_0": 0.19, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.677, "loss_caption_0": 1.674, "loss_caption": 1.676, "total_loss": 9.277}, "21945": {"loss_ce": 0.244, "loss_counter": 0.108, "loss_bbox": 0.014, "loss_giou": 0.192, "loss_self_iou": 0.004, "cardinality_error": 7.865, "loss_ce_0": 0.244, "loss_counter_0": 0.108, "loss_bbox_0": 0.015, "loss_giou_0": 0.206, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.865, "loss_caption_0": 1.713, "loss_caption": 1.714, "total_loss": 9.531}, "22078": {"loss_ce": 0.251, "loss_counter": 0.11, "loss_bbox": 0.014, "loss_giou": 0.19, "loss_self_iou": 0.005, "cardinality_error": 7.707, "loss_ce_0": 0.247, "loss_counter_0": 0.111, "loss_bbox_0": 0.016, "loss_giou_0": 0.202, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.707, "loss_caption_0": 1.772, "loss_caption": 1.758, "total_loss": 9.738}, "22211": {"loss_ce": 0.249, "loss_counter": 0.101, "loss_bbox": 0.013, "loss_giou": 0.18, "loss_self_iou": 0.005, "cardinality_error": 7.541, "loss_ce_0": 0.249, "loss_counter_0": 0.101, "loss_bbox_0": 0.014, "loss_giou_0": 0.193, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.541, "loss_caption_0": 1.665, "loss_caption": 1.66, "total_loss": 9.243}, "22344": {"loss_ce": 0.246, "loss_counter": 0.113, "loss_bbox": 0.015, "loss_giou": 0.187, "loss_self_iou": 0.004, "cardinality_error": 8.008, "loss_ce_0": 0.248, "loss_counter_0": 0.115, "loss_bbox_0": 0.016, "loss_giou_0": 0.202, "loss_self_iou_0": 0.004, "cardinality_error_0": 8.008, "loss_caption_0": 1.799, "loss_caption": 1.784, "total_loss": 9.823}, "22477": {"loss_ce": 0.246, "loss_counter": 0.102, "loss_bbox": 0.013, "loss_giou": 0.184, "loss_self_iou": 0.004, "cardinality_error": 7.699, "loss_ce_0": 0.247, "loss_counter_0": 0.104, "loss_bbox_0": 0.014, "loss_giou_0": 0.197, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.699, "loss_caption_0": 1.722, "loss_caption": 1.733, "total_loss": 9.525}, "22610": {"loss_ce": 0.243, "loss_counter": 0.106, "loss_bbox": 0.014, "loss_giou": 0.188, "loss_self_iou": 0.004, "cardinality_error": 7.729, "loss_ce_0": 0.245, "loss_counter_0": 0.106, "loss_bbox_0": 0.015, "loss_giou_0": 0.2, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.729, "loss_caption_0": 1.664, "loss_caption": 1.667, "total_loss": 9.297}, "22743": {"loss_ce": 0.244, "loss_counter": 0.108, "loss_bbox": 0.015, "loss_giou": 0.196, "loss_self_iou": 0.005, "cardinality_error": 7.714, "loss_ce_0": 0.244, "loss_counter_0": 0.109, "loss_bbox_0": 0.017, "loss_giou_0": 0.21, "loss_self_iou_0": 0.006, "cardinality_error_0": 7.714, "loss_caption_0": 1.773, "loss_caption": 1.775, "total_loss": 9.803}, "22876": {"loss_ce": 0.245, "loss_counter": 0.11, "loss_bbox": 0.013, "loss_giou": 0.181, "loss_self_iou": 0.004, "cardinality_error": 7.774, "loss_ce_0": 0.249, "loss_counter_0": 0.109, "loss_bbox_0": 0.015, "loss_giou_0": 0.192, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.774, "loss_caption_0": 1.76, "loss_caption": 1.759, "total_loss": 9.631}, "23009": {"loss_ce": 0.237, "loss_counter": 0.105, "loss_bbox": 0.012, "loss_giou": 0.171, "loss_self_iou": 0.003, "cardinality_error": 7.872, "loss_ce_0": 0.237, "loss_counter_0": 0.106, "loss_bbox_0": 0.014, "loss_giou_0": 0.183, "loss_self_iou_0": 0.003, "cardinality_error_0": 7.872, "loss_caption_0": 1.69, "loss_caption": 1.688, "total_loss": 9.229}, "23142": {"loss_ce": 0.242, "loss_counter": 0.098, "loss_bbox": 0.013, "loss_giou": 0.177, "loss_self_iou": 0.004, "cardinality_error": 7.744, "loss_ce_0": 0.239, "loss_counter_0": 0.1, "loss_bbox_0": 0.014, "loss_giou_0": 0.19, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.744, "loss_caption_0": 1.66, "loss_caption": 1.663, "total_loss": 9.173}, "23275": {"loss_ce": 0.242, "loss_counter": 0.108, "loss_bbox": 0.014, "loss_giou": 0.183, "loss_self_iou": 0.004, "cardinality_error": 7.82, "loss_ce_0": 0.242, "loss_counter_0": 0.109, "loss_bbox_0": 0.015, "loss_giou_0": 0.197, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.82, "loss_caption_0": 1.727, "loss_caption": 1.741, "total_loss": 9.535}, "23408": {"loss_ce": 0.235, "loss_counter": 0.104, "loss_bbox": 0.014, "loss_giou": 0.173, "loss_self_iou": 0.004, "cardinality_error": 7.083, "loss_ce_0": 0.235, "loss_counter_0": 0.104, "loss_bbox_0": 0.016, "loss_giou_0": 0.182, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.083, "loss_caption_0": 1.678, "loss_caption": 1.68, "total_loss": 9.181}, "23541": {"loss_ce": 0.25, "loss_counter": 0.112, "loss_bbox": 0.013, "loss_giou": 0.185, "loss_self_iou": 0.003, "cardinality_error": 7.782, "loss_ce_0": 0.253, "loss_counter_0": 0.111, "loss_bbox_0": 0.014, "loss_giou_0": 0.197, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.782, "loss_caption_0": 1.686, "loss_caption": 1.674, "total_loss": 9.361}, "23674": {"loss_ce": 0.242, "loss_counter": 0.104, "loss_bbox": 0.013, "loss_giou": 0.175, "loss_self_iou": 0.004, "cardinality_error": 7.699, "loss_ce_0": 0.242, "loss_counter_0": 0.106, "loss_bbox_0": 0.014, "loss_giou_0": 0.188, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.699, "loss_caption_0": 1.734, "loss_caption": 1.755, "total_loss": 9.502}, "23807": {"loss_ce": 0.247, "loss_counter": 0.109, "loss_bbox": 0.013, "loss_giou": 0.188, "loss_self_iou": 0.004, "cardinality_error": 8.023, "loss_ce_0": 0.248, "loss_counter_0": 0.111, "loss_bbox_0": 0.014, "loss_giou_0": 0.199, "loss_self_iou_0": 0.004, "cardinality_error_0": 8.023, "loss_caption_0": 1.838, "loss_caption": 1.842, "total_loss": 10.01}, "23940": {"loss_ce": 0.242, "loss_counter": 0.107, "loss_bbox": 0.013, "loss_giou": 0.178, "loss_self_iou": 0.004, "cardinality_error": 7.789, "loss_ce_0": 0.246, "loss_counter_0": 0.11, "loss_bbox_0": 0.014, "loss_giou_0": 0.189, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.789, "loss_caption_0": 1.661, "loss_caption": 1.655, "total_loss": 9.188}, "24073": {"loss_ce": 0.244, "loss_counter": 0.11, "loss_bbox": 0.012, "loss_giou": 0.178, "loss_self_iou": 0.003, "cardinality_error": 7.97, "loss_ce_0": 0.246, "loss_counter_0": 0.112, "loss_bbox_0": 0.013, "loss_giou_0": 0.191, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.97, "loss_caption_0": 1.689, "loss_caption": 1.683, "total_loss": 9.309}, "24206": {"loss_ce": 0.237, "loss_counter": 0.118, "loss_bbox": 0.013, "loss_giou": 0.183, "loss_self_iou": 0.005, "cardinality_error": 8.286, "loss_ce_0": 0.236, "loss_counter_0": 0.118, "loss_bbox_0": 0.013, "loss_giou_0": 0.195, "loss_self_iou_0": 0.005, "cardinality_error_0": 8.286, "loss_caption_0": 1.712, "loss_caption": 1.715, "total_loss": 9.432}, "24339": {"loss_ce": 0.245, "loss_counter": 0.098, "loss_bbox": 0.012, "loss_giou": 0.167, "loss_self_iou": 0.003, "cardinality_error": 7.316, "loss_ce_0": 0.247, "loss_counter_0": 0.099, "loss_bbox_0": 0.013, "loss_giou_0": 0.179, "loss_self_iou_0": 0.003, "cardinality_error_0": 7.316, "loss_caption_0": 1.695, "loss_caption": 1.701, "total_loss": 9.257}, "24472": {"loss_ce": 0.243, "loss_counter": 0.108, "loss_bbox": 0.013, "loss_giou": 0.176, "loss_self_iou": 0.003, "cardinality_error": 7.459, "loss_ce_0": 0.248, "loss_counter_0": 0.109, "loss_bbox_0": 0.014, "loss_giou_0": 0.187, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.459, "loss_caption_0": 1.699, "loss_caption": 1.699, "total_loss": 9.337}, "24605": {"loss_ce": 0.242, "loss_counter": 0.103, "loss_bbox": 0.014, "loss_giou": 0.18, "loss_self_iou": 0.004, "cardinality_error": 7.812, "loss_ce_0": 0.243, "loss_counter_0": 0.104, "loss_bbox_0": 0.015, "loss_giou_0": 0.189, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.812, "loss_caption_0": 1.775, "loss_caption": 1.773, "total_loss": 9.644}, "24738": {"loss_ce": 0.243, "loss_counter": 0.101, "loss_bbox": 0.016, "loss_giou": 0.187, "loss_self_iou": 0.004, "cardinality_error": 7.556, "loss_ce_0": 0.246, "loss_counter_0": 0.103, "loss_bbox_0": 0.016, "loss_giou_0": 0.196, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.556, "loss_caption_0": 1.727, "loss_caption": 1.73, "total_loss": 9.525}, "24871": {"loss_ce": 0.239, "loss_counter": 0.104, "loss_bbox": 0.013, "loss_giou": 0.181, "loss_self_iou": 0.004, "cardinality_error": 7.692, "loss_ce_0": 0.241, "loss_counter_0": 0.105, "loss_bbox_0": 0.014, "loss_giou_0": 0.192, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.692, "loss_caption_0": 1.77, "loss_caption": 1.773, "total_loss": 9.641}, "25004": {"loss_ce": 0.246, "loss_counter": 0.109, "loss_bbox": 0.013, "loss_giou": 0.186, "loss_self_iou": 0.004, "cardinality_error": 8.143, "loss_ce_0": 0.247, "loss_counter_0": 0.11, "loss_bbox_0": 0.015, "loss_giou_0": 0.197, "loss_self_iou_0": 0.004, "cardinality_error_0": 8.143, "loss_caption_0": 1.692, "loss_caption": 1.684, "total_loss": 9.379}, "25137": {"loss_ce": 0.245, "loss_counter": 0.111, "loss_bbox": 0.014, "loss_giou": 0.179, "loss_self_iou": 0.004, "cardinality_error": 7.88, "loss_ce_0": 0.245, "loss_counter_0": 0.111, "loss_bbox_0": 0.015, "loss_giou_0": 0.192, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.88, "loss_caption_0": 1.691, "loss_caption": 1.696, "total_loss": 9.347}, "25270": {"loss_ce": 0.237, "loss_counter": 0.103, "loss_bbox": 0.014, "loss_giou": 0.185, "loss_self_iou": 0.004, "cardinality_error": 7.767, "loss_ce_0": 0.238, "loss_counter_0": 0.105, "loss_bbox_0": 0.015, "loss_giou_0": 0.196, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.767, "loss_caption_0": 1.687, "loss_caption": 1.694, "total_loss": 9.34}, "25403": {"loss_ce": 0.247, "loss_counter": 0.102, "loss_bbox": 0.013, "loss_giou": 0.176, "loss_self_iou": 0.005, "cardinality_error": 7.429, "loss_ce_0": 0.248, "loss_counter_0": 0.105, "loss_bbox_0": 0.014, "loss_giou_0": 0.186, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.429, "loss_caption_0": 1.705, "loss_caption": 1.695, "total_loss": 9.343}, "25536": {"loss_ce": 0.241, "loss_counter": 0.107, "loss_bbox": 0.013, "loss_giou": 0.189, "loss_self_iou": 0.003, "cardinality_error": 7.887, "loss_ce_0": 0.246, "loss_counter_0": 0.108, "loss_bbox_0": 0.014, "loss_giou_0": 0.196, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.887, "loss_caption_0": 1.717, "loss_caption": 1.729, "total_loss": 9.517}, "25669": {"loss_ce": 0.239, "loss_counter": 0.111, "loss_bbox": 0.014, "loss_giou": 0.177, "loss_self_iou": 0.004, "cardinality_error": 7.707, "loss_ce_0": 0.243, "loss_counter_0": 0.111, "loss_bbox_0": 0.015, "loss_giou_0": 0.186, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.707, "loss_caption_0": 1.718, "loss_caption": 1.711, "total_loss": 9.385}, "25802": {"loss_ce": 0.24, "loss_counter": 0.111, "loss_bbox": 0.013, "loss_giou": 0.183, "loss_self_iou": 0.004, "cardinality_error": 8.173, "loss_ce_0": 0.242, "loss_counter_0": 0.113, "loss_bbox_0": 0.014, "loss_giou_0": 0.193, "loss_self_iou_0": 0.004, "cardinality_error_0": 8.173, "loss_caption_0": 1.732, "loss_caption": 1.735, "total_loss": 9.515}, "25935": {"loss_ce": 0.241, "loss_counter": 0.105, "loss_bbox": 0.013, "loss_giou": 0.179, "loss_self_iou": 0.005, "cardinality_error": 7.82, "loss_ce_0": 0.241, "loss_counter_0": 0.107, "loss_bbox_0": 0.014, "loss_giou_0": 0.192, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.82, "loss_caption_0": 1.626, "loss_caption": 1.628, "total_loss": 9.063}, "26068": {"loss_ce": 0.24, "loss_counter": 0.102, "loss_bbox": 0.014, "loss_giou": 0.182, "loss_self_iou": 0.005, "cardinality_error": 7.444, "loss_ce_0": 0.243, "loss_counter_0": 0.103, "loss_bbox_0": 0.014, "loss_giou_0": 0.19, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.444, "loss_caption_0": 1.697, "loss_caption": 1.701, "total_loss": 9.35}, "26201": {"loss_ce": 0.239, "loss_counter": 0.097, "loss_bbox": 0.014, "loss_giou": 0.168, "loss_self_iou": 0.005, "cardinality_error": 7.301, "loss_ce_0": 0.237, "loss_counter_0": 0.099, "loss_bbox_0": 0.015, "loss_giou_0": 0.181, "loss_self_iou_0": 0.005, "cardinality_error_0": 7.301, "loss_caption_0": 1.702, "loss_caption": 1.703, "total_loss": 9.254}, "26334": {"loss_ce": 0.238, "loss_counter": 0.112, "loss_bbox": 0.013, "loss_giou": 0.174, "loss_self_iou": 0.003, "cardinality_error": 7.827, "loss_ce_0": 0.242, "loss_counter_0": 0.112, "loss_bbox_0": 0.014, "loss_giou_0": 0.188, "loss_self_iou_0": 0.003, "cardinality_error_0": 7.827, "loss_caption_0": 1.729, "loss_caption": 1.725, "total_loss": 9.424}, "26467": {"loss_ce": 0.247, "loss_counter": 0.109, "loss_bbox": 0.014, "loss_giou": 0.181, "loss_self_iou": 0.003, "cardinality_error": 8.023, "loss_ce_0": 0.245, "loss_counter_0": 0.11, "loss_bbox_0": 0.015, "loss_giou_0": 0.195, "loss_self_iou_0": 0.004, "cardinality_error_0": 8.023, "loss_caption_0": 1.751, "loss_caption": 1.746, "total_loss": 9.586}, "26600": {"loss_ce": 0.242, "loss_counter": 0.108, "loss_bbox": 0.014, "loss_giou": 0.186, "loss_self_iou": 0.004, "cardinality_error": 7.902, "loss_ce_0": 0.242, "loss_counter_0": 0.108, "loss_bbox_0": 0.014, "loss_giou_0": 0.196, "loss_self_iou_0": 0.004, "cardinality_error_0": 7.902, "loss_caption_0": 1.727, "loss_caption": 1.737, "total_loss": 9.533}}, "lr_history": {"133": 5e-05, "266": 5e-05, "399": 5e-05, "532": 5e-05, "665": 5e-05, "798": 5e-05, "931": 5e-05, "1064": 5e-05, "1197": 5e-05, "1330": 5e-05, "1463": 5e-05, "1596": 5e-05, "1729": 5e-05, "1862": 5e-05, "1995": 5e-05, "2128": 5e-05, "2261": 5e-05, "2394": 5e-05, "2527": 5e-05, "2660": 5e-05, "2793": 5e-05, "2926": 5e-05, "3059": 5e-05, "3192": 5e-05, "3325": 5e-05, "3458": 5e-05, "3591": 5e-05, "3724": 5e-05, "3857": 5e-05, "3990": 5e-05, "4123": 5e-05, "4256": 5e-05, "4389": 5e-05, "4522": 5e-05, "4655": 5e-05, "4788": 5e-05, "4921": 5e-05, "5054": 5e-05, "5187": 5e-05, "5320": 5e-05, "5453": 5e-05, "5586": 5e-05, "5719": 5e-05, "5852": 5e-05, "5985": 5e-05, "6118": 5e-05, "6251": 5e-05, "6384": 5e-05, "6517": 5e-05, "6650": 5e-05, "6783": 5e-05, "6916": 5e-05, "7049": 5e-05, "7182": 5e-05, "7315": 5e-05, "7448": 5e-05, "7581": 5e-05, "7714": 5e-05, "7847": 5e-05, "7980": 5e-05, "8113": 5e-05, "8246": 5e-05, "8379": 5e-05, "8512": 5e-05, "8645": 5e-05, "8778": 5e-05, "8911": 5e-05, "9044": 5e-05, "9177": 5e-05, "9310": 5e-05, "9443": 5e-05, "9576": 5e-05, "9709": 5e-05, "9842": 5e-05, "9975": 5e-05, "10108": 5e-05, "10241": 5e-05, "10374": 5e-05, "10507": 5e-05, "10640": 5e-05, "10773": 2.5e-05, "10906": 2.5e-05, "11039": 2.5e-05, "11172": 2.5e-05, "11305": 2.5e-05, "11438": 2.5e-05, "11571": 2.5e-05, "11704": 2.5e-05, "11837": 2.5e-05, "11970": 2.5e-05, "12103": 2.5e-05, "12236": 2.5e-05, "12369": 2.5e-05, "12502": 2.5e-05, "12635": 2.5e-05, "12768": 2.5e-05, "12901": 2.5e-05, "13034": 2.5e-05, "13167": 2.5e-05, "13300": 2.5e-05, "13433": 2.5e-05, "13566": 2.5e-05, "13699": 2.5e-05, "13832": 2.5e-05, "13965": 2.5e-05, "14098": 2.5e-05, "14231": 2.5e-05, "14364": 2.5e-05, "14497": 2.5e-05, "14630": 2.5e-05, "14763": 1.25e-05, "14896": 1.25e-05, "15029": 1.25e-05, "15162": 1.25e-05, "15295": 1.25e-05, "15428": 1.25e-05, "15561": 1.25e-05, "15694": 1.25e-05, "15827": 1.25e-05, "15960": 1.25e-05, "16093": 1.25e-05, "16226": 1.25e-05, "16359": 1.25e-05, "16492": 1.25e-05, "16625": 1.25e-05, "16758": 1.25e-05, "16891": 1.25e-05, "17024": 1.25e-05, "17157": 1.25e-05, "17290": 1.25e-05, "17423": 1.25e-05, "17556": 1.25e-05, "17689": 1.25e-05, "17822": 1.25e-05, "17955": 1.25e-05, "18088": 1.25e-05, "18221": 1.25e-05, "18354": 1.25e-05, "18487": 1.25e-05, "18620": 1.25e-05, "18753": 6.25e-06, "18886": 6.25e-06, "19019": 6.25e-06, "19152": 6.25e-06, "19285": 6.25e-06, "19418": 6.25e-06, "19551": 6.25e-06, "19684": 6.25e-06, "19817": 6.25e-06, "19950": 6.25e-06, "20083": 6.25e-06, "20216": 6.25e-06, "20349": 6.25e-06, "20482": 6.25e-06, "20615": 6.25e-06, "20748": 6.25e-06, "20881": 6.25e-06, "21014": 6.25e-06, "21147": 6.25e-06, "21280": 6.25e-06, "21413": 6.25e-06, "21546": 6.25e-06, "21679": 6.25e-06, "21812": 6.25e-06, "21945": 6.25e-06, "22078": 6.25e-06, "22211": 6.25e-06, "22344": 6.25e-06, "22477": 6.25e-06, "22610": 6.25e-06, "22743": 3.125e-06, "22876": 3.125e-06, "23009": 3.125e-06, "23142": 3.125e-06, "23275": 3.125e-06, "23408": 3.125e-06, "23541": 3.125e-06, "23674": 3.125e-06, "23807": 3.125e-06, "23940": 3.125e-06, "24073": 3.125e-06, "24206": 3.125e-06, "24339": 3.125e-06, "24472": 3.125e-06, "24605": 3.125e-06, "24738": 3.125e-06, "24871": 3.125e-06, "25004": 3.125e-06, "25137": 3.125e-06, "25270": 3.125e-06, "25403": 3.125e-06, "25536": 3.125e-06, "25669": 3.125e-06, "25802": 3.125e-06, "25935": 3.125e-06, "26068": 3.125e-06, "26201": 3.125e-06, "26334": 3.125e-06, "26467": 3.125e-06, "26600": 3.125e-06}}, "eval_history": {}} \ No newline at end of file diff --git a/model-best.pth b/model-best.pth index 2e4a90dfdd7212286f1fa03c604791bc873dd013..42c65282e76faa81b01540eeb0c653178e2e7f49 100644 --- a/model-best.pth +++ b/model-best.pth @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:08a1005b39a23d6a3c9bb047e210d4dff71adb571bba0b1e25498705a7d7b56c -size 397662145 +oid sha256:2c5649639a5e2d91c0e6430f7ef3a969419b6a920dabf8cac4617479f64d1d76 +size 377084545 diff --git a/model-last.pth b/model-last.pth index 2e4a90dfdd7212286f1fa03c604791bc873dd013..dd48389557fb5978c075e01fe7bcfa5562d45dd2 100644 --- a/model-last.pth +++ b/model-last.pth @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:08a1005b39a23d6a3c9bb047e210d4dff71adb571bba0b1e25498705a7d7b56c -size 397662145 +oid sha256:417eb7f05c35061d38ba56c6fab6b264b7e8c9c6d02cbf125c5586c1087696be +size 377084545 diff --git a/tf_summary/events.out.tfevents.1711292828.dlc1ts6z9ib5vxur-master-0 b/tf_summary/events.out.tfevents.1711292828.dlc1ts6z9ib5vxur-master-0 new file mode 100644 index 0000000000000000000000000000000000000000..d19b25d3fe05018fbced327351295a36da6e2a4d --- /dev/null +++ b/tf_summary/events.out.tfevents.1711292828.dlc1ts6z9ib5vxur-master-0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cdd59bc89bebf7407e106fa3a4f04453f379c084fc388a64117a235f79c9746 +size 179068 diff --git a/train.log b/train.log index 765923c87c550a2ac7c30e84294d23b156013f81..bb2d6d2ae6890d5bf94f03e99b8267cbac0b8177 100644 --- a/train.log +++ b/train.log @@ -1,5 +1,5 @@ backup evironment completed ! -Loading pth from /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal +Loading pth from /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal ******************** All args: ************************************************* @@ -12,7 +12,7 @@ align_top_band_size = 0 att_hid_size = 512 aux_loss = True backbone = None -base_cfg_path = cfgs_base/howto/howto-anet_anet_ori_(sim_op_order_v2)_CLIP_refine.yml +base_cfg_path = cfgs_base/howto/howto-yc2_yc2_ori_(sim_op_order_v2)_UniVL_refine.yml basic_ss_prob = 0 batch_size = 1 batch_size_for_eval = 1 @@ -25,7 +25,7 @@ cap_prob_clip = False caption_cost_type = loss caption_decoder_type = standard caption_loss_coef = 2 -cfg_path = cfgs/howto-anet_anet_clip_topk30_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml +cfg_path = cfgs_ft_gt/howto-yc2_yc2_univl_topk25_r1_iter3_th1_refine_aug(8,0.02)_top3_2stage_inscap.yml cl_schedule_time = [0, 2] cl_schedule_val = [0, 0.1] clip_context_dim = 512 @@ -44,8 +44,8 @@ debug = False dec_layers = 2 dec_n_points = 4 device = cuda -dict_file = data/howto/vocabulary_howto_rate2_anet.json -dict_file_val = data/howto/vocabulary_howto_rate2_anet.json +dict_file = data/howto/vocabulary_howto_rate2_yc2.json +dict_file_val = data/howto/vocabulary_howto_rate2_yc2.json dilation = False disable_contrastive_projection = 1 disable_cudnn = 0 @@ -62,7 +62,6 @@ enc_layers = 2 enc_n_points = 4 eos_coef = 0.1 epoch = 20 -eval_proposal_file = data/generated_proposals/dbg_trainval_top100.json event_context_dim = None feature_dim = 768 feature_sample_rate = 1 @@ -70,14 +69,14 @@ fix_xcw = 1 focal_alpha = 0.25 focal_gamma = 2.0 focal_mil = False -frame_embedding_num = 100 +frame_embedding_num = 200 ft_gt_percent = 1.0 giou_loss_coef = 4 gpu_id = [] grad_clip = 100.0 gt_file_for_auc = data/anet/captiondata/val_all.json -gt_file_for_eval = ['data/anet/captiondata/val_1.json', 'data/anet/captiondata/val_2.json'] -gt_file_for_para_eval = ['data/anet/captiondata/para/anet_entities_val_1_para.json', 'data/anet/captiondata/para/anet_entities_val_2_para.json'] +gt_file_for_eval = ['data/yc2/captiondata/yc2_val.json'] +gt_file_for_para_eval = ['data/yc2/captiondata/para/para_yc2_val.json'] gt_proposal_sample_num = 20 hidden_dim = 512 hidden_dropout_prob = 0.5 @@ -104,7 +103,7 @@ lr_proj = 0 map = True matcher_type = default max_caption_len = 50 -max_eseq_length = 10 +max_eseq_length = 20 max_pos_num = 500 max_text_input_len = 32 merge_criterion = ins_cap_topk @@ -126,7 +125,7 @@ position_embedding_scale = 6.283185307179586 pre_percent = 1.0 pretrain = None pretrain_path = -pretrained_language_model = CLIP +pretrained_language_model = UniVL prior_anchor_duration_init = True prior_manner = all pseudo_box_aug = False @@ -162,13 +161,13 @@ start_from = start_from_mode = last start_refine_epoch = -1 statistic_mode = mode +test = False text_encoder_learning_strategy = frozen -text_feature_folder = ['/mnt/data/Gvlab/wuhao/features/howto100m/clip/text_proj', '/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/text/'] -text_feature_folder_val = ['/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/text/'] +text_feature_folder = ['/mnt/data/Gvlab/wuhao/features/howto100m/UniVL/text', '/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_text/'] +text_feature_folder_val = ['/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_text/'] text_hidden_dim = 768 -top_frames = 30 -train_caption_file = ['data/howto/captiondata/howto100m_train.json', 'data/anet/captiondata/train_modified.json'] -train_proposal_file = data/generated_proposals/dbg_trainval_top100.json +top_frames = 25 +train_caption_file = ['data/howto/captiondata/howto100m_train.json', 'data/yc2/captiondata/yc2_train.json'] train_proposal_sample_num = 30 train_proposal_type = gt training_scheme = all @@ -181,17 +180,17 @@ use_anchor = 0 use_neg_pseudo_box = False use_pseudo_box = False use_query_box_for_refine = 0 -val_caption_file = data/anet/captiondata/val_1.json -visual_feature_folder = ['/mnt/data/Gvlab/wuhao/features/howto100m/clip/visual', '/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/visual/'] -visual_feature_folder_val = ['/mnt/data/Gvlab/wuhao/features/anet/CLIP_feature/visual/'] -visual_feature_type = ['CLIP'] -vocab_size = 16221 -vocab_size_val = 16221 +val_caption_file = data/yc2/captiondata/yc2_val.json +visual_feature_folder = ['/mnt/data/Gvlab/wuhao/features/howto100m/UniVL/visual', '/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_visual/'] +visual_feature_folder_val = ['/mnt/data/Gvlab/wuhao/features/yc2/UniVL_features/UniVL_visual/'] +visual_feature_type = ['UniVL'] +vocab_size = 14538 +vocab_size_val = 14538 weight_decay = 0.0001 weighted_mil_loss = False width_ratio = 1 width_th = 1 -window_size = 2 +window_size = 3 with_box_refine = 1 wordRNN_input_feats_type = C @@ -322,8 +321,8 @@ PDVC( ) (caption_head): ModuleList( (0): LSTMDSACaptioner( - (embed): Embedding(16222, 512) - (logit): Linear(in_features=512, out_features=16222, bias=True) + (embed): Embedding(14539, 512) + (logit): Linear(in_features=512, out_features=14539, bias=True) (dropout): Dropout(p=0.5, inplace=False) (core): ShowAttendTellCore( (rnn): LSTM(1536, 512, bias=False, dropout=0.5) @@ -340,8 +339,8 @@ PDVC( ) ) (1): LSTMDSACaptioner( - (embed): Embedding(16222, 512) - (logit): Linear(in_features=512, out_features=16222, bias=True) + (embed): Embedding(14539, 512) + (logit): Linear(in_features=512, out_features=14539, bias=True) (dropout): Dropout(p=0.5, inplace=False) (core): ShowAttendTellCore( (rnn): LSTM(1536, 512, bias=False, dropout=0.5) @@ -368,8 +367,8 @@ PDVC( (1): Linear(in_features=512, out_features=1, bias=True) ) (count_head): ModuleList( - (0): Linear(in_features=512, out_features=11, bias=True) - (1): Linear(in_features=512, out_features=11, bias=True) + (0): Linear(in_features=512, out_features=21, bias=True) + (1): Linear(in_features=512, out_features=21, bias=True) ) (bbox_head): ModuleList( (0): MLP( @@ -401,1120 +400,1117 @@ PDVC( ******************** Strat training ! ****************************************** loss type: dict_keys(['loss_ce', 'loss_bbox', 'loss_giou', 'loss_counter', 'loss_caption', 'contrastive_loss', 'loss_ce_0', 'loss_bbox_0', 'loss_giou_0', 'loss_counter_0', 'loss_caption_0', 'contrastive_loss_0']) loss weights: dict_values([2, 0, 4, 0.5, 2, 0.0, 2, 0, 4, 0.5, 2, 0.0]) -ID seq2-ft(mix)-gt_percent-1.0 iter 1000 (epoch 0), -loss = OrderedDict([('loss_ce', 0.284), ('loss_counter', 0.126), ('loss_bbox', 0.117), ('loss_giou', 0.275), ('loss_self_iou', 0.126), ('cardinality_error', 3.775), ('loss_ce_0', 0.284), ('loss_counter_0', 0.126), ('loss_bbox_0', 0.118), ('loss_giou_0', 0.276), ('loss_self_iou_0', 0.126), ('cardinality_error_0', 3.775), ('loss_caption_0', 3.781), ('loss_caption', 3.778), ('total_loss', 18.585)]), -time/iter = 0.182, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 2000 (epoch 0), -loss = OrderedDict([('loss_ce', 0.287), ('loss_counter', 0.119), ('loss_bbox', 0.087), ('loss_giou', 0.239), ('loss_self_iou', 0.12), ('cardinality_error', 3.705), ('loss_ce_0', 0.289), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.087), ('loss_giou_0', 0.239), ('loss_self_iou_0', 0.121), ('cardinality_error_0', 3.705), ('loss_caption_0', 3.682), ('loss_caption', 3.675), ('total_loss', 17.896)]), -time/iter = 0.180, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 3000 (epoch 0), -loss = OrderedDict([('loss_ce', 0.291), ('loss_counter', 0.122), ('loss_bbox', 0.078), ('loss_giou', 0.227), ('loss_self_iou', 0.098), ('cardinality_error', 3.705), ('loss_ce_0', 0.292), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.078), ('loss_giou_0', 0.228), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.705), ('loss_caption_0', 3.668), ('loss_caption', 3.664), ('total_loss', 17.771)]), -time/iter = 0.181, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 4000 (epoch 0), -loss = OrderedDict([('loss_ce', 0.289), ('loss_counter', 0.126), ('loss_bbox', 0.078), ('loss_giou', 0.224), ('loss_self_iou', 0.1), ('cardinality_error', 3.784), ('loss_ce_0', 0.291), ('loss_counter_0', 0.127), ('loss_bbox_0', 0.078), ('loss_giou_0', 0.223), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.784), ('loss_caption_0', 3.624), ('loss_caption', 3.629), ('total_loss', 17.579)]), -time/iter = 0.174, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 5000 (epoch 0), -loss = OrderedDict([('loss_ce', 0.285), ('loss_counter', 0.121), ('loss_bbox', 0.08), ('loss_giou', 0.218), ('loss_self_iou', 0.114), ('cardinality_error', 3.674), ('loss_ce_0', 0.287), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.08), ('loss_giou_0', 0.218), ('loss_self_iou_0', 0.115), ('cardinality_error_0', 3.674), ('loss_caption_0', 3.629), ('loss_caption', 3.629), ('total_loss', 17.526)]), -time/iter = 0.178, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 6000 (epoch 0), -loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.13), ('loss_bbox', 0.076), ('loss_giou', 0.22), ('loss_self_iou', 0.098), ('cardinality_error', 3.786), ('loss_ce_0', 0.293), ('loss_counter_0', 0.129), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.22), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.786), ('loss_caption_0', 3.625), ('loss_caption', 3.622), ('total_loss', 17.555)]), -time/iter = 0.182, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 7000 (epoch 0), -loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.12), ('loss_bbox', 0.076), ('loss_giou', 0.215), ('loss_self_iou', 0.097), ('cardinality_error', 3.746), ('loss_ce_0', 0.293), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.215), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.746), ('loss_caption_0', 3.58), ('loss_caption', 3.576), ('total_loss', 17.319)]), -time/iter = 0.179, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 8000 (epoch 0), -loss = OrderedDict([('loss_ce', 0.288), ('loss_counter', 0.129), ('loss_bbox', 0.078), ('loss_giou', 0.218), ('loss_self_iou', 0.108), ('cardinality_error', 3.754), ('loss_ce_0', 0.288), ('loss_counter_0', 0.128), ('loss_bbox_0', 0.079), ('loss_giou_0', 0.218), ('loss_self_iou_0', 0.11), ('cardinality_error_0', 3.754), ('loss_caption_0', 3.546), ('loss_caption', 3.546), ('total_loss', 17.209)]), -time/iter = 0.184, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 9000 (epoch 0), -loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.12), ('loss_bbox', 0.078), ('loss_giou', 0.219), ('loss_self_iou', 0.1), ('cardinality_error', 3.685), ('loss_ce_0', 0.291), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.078), ('loss_giou_0', 0.219), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.685), ('loss_caption_0', 3.544), ('loss_caption', 3.54), ('total_loss', 17.2)]), -time/iter = 0.180, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 10000 (epoch 0), -loss = OrderedDict([('loss_ce', 0.293), ('loss_counter', 0.125), ('loss_bbox', 0.077), ('loss_giou', 0.22), ('loss_self_iou', 0.101), ('cardinality_error', 3.748), ('loss_ce_0', 0.293), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.078), ('loss_giou_0', 0.22), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.748), ('loss_caption_0', 3.582), ('loss_caption', 3.577), ('total_loss', 17.376)]), -time/iter = 0.180, bad_vid = 0.000 - -Validation results of iter 10009: -Bleu_1:0.15656016917085527 -Bleu_2:0.08210369852679855 -Bleu_3:0.042491746140277446 -Bleu_4:0.021149866989626908 -METEOR:0.08752782819459405 -ROUGE_L:0.1577032846084498 -CIDEr:0.2687260839927409 -Recall:0.4986985069085389 -Precision:0.548450952477792 -soda_c:0.045070258467165024 -para_Bleu_1:0.36987086578065714 -para_Bleu_2:0.1987998709052068 -para_Bleu_3:0.11671522868501899 -para_Bleu_4:0.07164097958462183 -para_METEOR:0.13901753612789455 -para_ROUGE_L:0.2826680559963382 -para_CIDEr:0.0956891322121665 - -overall score of iter 10009: 0.3063476479246829 - -Save model at iter 10009 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. -Save Best-model at iter 10009 to checkpoint file. -Save info to info.json -ID seq2-ft(mix)-gt_percent-1.0 iter 11000 (epoch 1), -loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.124), ('loss_bbox', 0.077), ('loss_giou', 0.217), ('loss_self_iou', 0.101), ('cardinality_error', 3.788), ('loss_ce_0', 0.292), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.217), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.788), ('loss_caption_0', 3.446), ('loss_caption', 3.443), ('total_loss', 16.802)]), -time/iter = 0.707, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 12000 (epoch 1), -loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.12), ('loss_bbox', 0.076), ('loss_giou', 0.214), ('loss_self_iou', 0.103), ('cardinality_error', 3.694), ('loss_ce_0', 0.291), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.075), ('loss_giou_0', 0.213), ('loss_self_iou_0', 0.103), ('cardinality_error_0', 3.694), ('loss_caption_0', 3.427), ('loss_caption', 3.428), ('total_loss', 16.701)]), -time/iter = 0.191, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 13000 (epoch 1), -loss = OrderedDict([('loss_ce', 0.291), ('loss_counter', 0.12), ('loss_bbox', 0.076), ('loss_giou', 0.217), ('loss_self_iou', 0.107), ('cardinality_error', 3.689), ('loss_ce_0', 0.291), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.217), ('loss_self_iou_0', 0.107), ('cardinality_error_0', 3.689), ('loss_caption_0', 3.464), ('loss_caption', 3.461), ('total_loss', 16.871)]), -time/iter = 0.189, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 14000 (epoch 1), -loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.118), ('loss_bbox', 0.073), ('loss_giou', 0.21), ('loss_self_iou', 0.1), ('cardinality_error', 3.663), ('loss_ce_0', 0.292), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.211), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.663), ('loss_caption_0', 3.414), ('loss_caption', 3.41), ('total_loss', 16.616)]), -time/iter = 0.189, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 15000 (epoch 1), -loss = OrderedDict([('loss_ce', 0.295), ('loss_counter', 0.127), ('loss_bbox', 0.076), ('loss_giou', 0.214), ('loss_self_iou', 0.103), ('cardinality_error', 3.828), ('loss_ce_0', 0.296), ('loss_counter_0', 0.127), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.215), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.828), ('loss_caption_0', 3.453), ('loss_caption', 3.453), ('total_loss', 16.836)]), -time/iter = 0.191, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 16000 (epoch 1), -loss = OrderedDict([('loss_ce', 0.296), ('loss_counter', 0.121), ('loss_bbox', 0.073), ('loss_giou', 0.206), ('loss_self_iou', 0.105), ('cardinality_error', 3.687), ('loss_ce_0', 0.297), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.072), ('loss_giou_0', 0.207), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.687), ('loss_caption_0', 3.461), ('loss_caption', 3.462), ('total_loss', 16.803)]), -time/iter = 0.191, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 17000 (epoch 1), -loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.127), ('loss_bbox', 0.073), ('loss_giou', 0.208), ('loss_self_iou', 0.102), ('cardinality_error', 3.791), ('loss_ce_0', 0.3), ('loss_counter_0', 0.127), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.209), ('loss_self_iou_0', 0.103), ('cardinality_error_0', 3.791), ('loss_caption_0', 3.469), ('loss_caption', 3.465), ('total_loss', 16.864)]), -time/iter = 0.191, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 18000 (epoch 1), -loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.119), ('loss_bbox', 0.074), ('loss_giou', 0.205), ('loss_self_iou', 0.107), ('cardinality_error', 3.68), ('loss_ce_0', 0.298), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.074), ('loss_giou_0', 0.206), ('loss_self_iou_0', 0.107), ('cardinality_error_0', 3.68), ('loss_caption_0', 3.478), ('loss_caption', 3.475), ('total_loss', 16.859)]), -time/iter = 0.191, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 19000 (epoch 1), -loss = OrderedDict([('loss_ce', 0.305), ('loss_counter', 0.126), ('loss_bbox', 0.073), ('loss_giou', 0.207), ('loss_self_iou', 0.099), ('cardinality_error', 3.752), ('loss_ce_0', 0.304), ('loss_counter_0', 0.126), ('loss_bbox_0', 0.072), ('loss_giou_0', 0.208), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.752), ('loss_caption_0', 3.396), ('loss_caption', 3.396), ('total_loss', 16.585)]), -time/iter = 0.194, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 20000 (epoch 1), -loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.128), ('loss_bbox', 0.071), ('loss_giou', 0.208), ('loss_self_iou', 0.101), ('cardinality_error', 3.804), ('loss_ce_0', 0.304), ('loss_counter_0', 0.128), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.208), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.804), ('loss_caption_0', 3.42), ('loss_caption', 3.419), ('total_loss', 16.684)]), -time/iter = 0.189, bad_vid = 0.000 - -Validation results of iter 20018: -Bleu_1:0.15965966113561106 -Bleu_2:0.08785069799970043 -Bleu_3:0.04739925348589703 -Bleu_4:0.02377096308421814 -METEOR:0.09062964515721111 -ROUGE_L:0.1652647774491388 -CIDEr:0.27366191469495676 -Recall:0.45131293652113946 -Precision:0.5379414954918249 -soda_c:0.04303682007432423 -para_Bleu_1:0.3640361416830845 -para_Bleu_2:0.1986476696673755 -para_Bleu_3:0.11814800235116821 -para_Bleu_4:0.07336184523852665 -para_METEOR:0.13911724177507803 -para_ROUGE_L:0.28211794880017504 -para_CIDEr:0.08634617454158834 - -overall score of iter 20018: 0.29882526155519307 - -Save model at iter 20018 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. +ID seq2-ft(mix)-gt_percent-1.0 iter 133 (epoch 0), +loss = OrderedDict([('loss_ce', 0.336), ('loss_counter', 0.129), ('loss_bbox', 0.039), ('loss_giou', 0.368), ('loss_self_iou', 0.028), ('cardinality_error', 7.797), ('loss_ce_0', 0.337), ('loss_counter_0', 0.13), ('loss_bbox_0', 0.041), ('loss_giou_0', 0.381), ('loss_self_iou_0', 0.03), ('cardinality_error_0', 7.797), ('loss_caption_0', 2.755), ('loss_caption', 2.681), ('total_loss', 15.341)]), +time/iter = 0.172, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 266 (epoch 0), +loss = OrderedDict([('loss_ce', 0.324), ('loss_counter', 0.129), ('loss_bbox', 0.036), ('loss_giou', 0.369), ('loss_self_iou', 0.018), ('cardinality_error', 7.812), ('loss_ce_0', 0.341), ('loss_counter_0', 0.132), ('loss_bbox_0', 0.039), ('loss_giou_0', 0.38), ('loss_self_iou_0', 0.019), ('cardinality_error_0', 7.812), ('loss_caption_0', 2.803), ('loss_caption', 2.638), ('total_loss', 15.341)]), +time/iter = 0.159, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 399 (epoch 0), +loss = OrderedDict([('loss_ce', 0.312), ('loss_counter', 0.13), ('loss_bbox', 0.039), ('loss_giou', 0.375), ('loss_self_iou', 0.02), ('cardinality_error', 7.835), ('loss_ce_0', 0.324), ('loss_counter_0', 0.132), ('loss_bbox_0', 0.043), ('loss_giou_0', 0.395), ('loss_self_iou_0', 0.021), ('cardinality_error_0', 7.835), ('loss_caption_0', 2.81), ('loss_caption', 2.676), ('total_loss', 15.459)]), +time/iter = 0.162, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 532 (epoch 0), +loss = OrderedDict([('loss_ce', 0.307), ('loss_counter', 0.133), ('loss_bbox', 0.044), ('loss_giou', 0.394), ('loss_self_iou', 0.02), ('cardinality_error', 7.902), ('loss_ce_0', 0.319), ('loss_counter_0', 0.133), ('loss_bbox_0', 0.05), ('loss_giou_0', 0.421), ('loss_self_iou_0', 0.026), ('cardinality_error_0', 7.902), ('loss_caption_0', 2.817), ('loss_caption', 2.654), ('total_loss', 15.588)]), +time/iter = 0.167, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 665 (epoch 0), +loss = OrderedDict([('loss_ce', 0.312), ('loss_counter', 0.135), ('loss_bbox', 0.034), ('loss_giou', 0.345), ('loss_self_iou', 0.017), ('cardinality_error', 7.805), ('loss_ce_0', 0.319), ('loss_counter_0', 0.131), ('loss_bbox_0', 0.038), ('loss_giou_0', 0.372), ('loss_self_iou_0', 0.019), ('cardinality_error_0', 7.805), ('loss_caption_0', 2.758), ('loss_caption', 2.635), ('total_loss', 15.049)]), +time/iter = 0.157, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 798 (epoch 0), +loss = OrderedDict([('loss_ce', 0.321), ('loss_counter', 0.125), ('loss_bbox', 0.03), ('loss_giou', 0.319), ('loss_self_iou', 0.015), ('cardinality_error', 7.774), ('loss_ce_0', 0.331), ('loss_counter_0', 0.124), ('loss_bbox_0', 0.032), ('loss_giou_0', 0.344), ('loss_self_iou_0', 0.015), ('cardinality_error_0', 7.774), ('loss_caption_0', 2.66), ('loss_caption', 2.559), ('total_loss', 14.519)]), +time/iter = 0.155, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 931 (epoch 0), +loss = OrderedDict([('loss_ce', 0.327), ('loss_counter', 0.122), ('loss_bbox', 0.027), ('loss_giou', 0.306), ('loss_self_iou', 0.011), ('cardinality_error', 7.865), ('loss_ce_0', 0.346), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.029), ('loss_giou_0', 0.327), ('loss_self_iou_0', 0.012), ('cardinality_error_0', 7.865), ('loss_caption_0', 2.54), ('loss_caption', 2.468), ('total_loss', 14.017)]), +time/iter = 0.154, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 1064 (epoch 0), +loss = OrderedDict([('loss_ce', 0.331), ('loss_counter', 0.121), ('loss_bbox', 0.027), ('loss_giou', 0.292), ('loss_self_iou', 0.01), ('cardinality_error', 7.579), ('loss_ce_0', 0.345), ('loss_counter_0', 0.127), ('loss_bbox_0', 0.028), ('loss_giou_0', 0.311), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.579), ('loss_caption_0', 2.639), ('loss_caption', 2.626), ('total_loss', 14.419)]), +time/iter = 0.163, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 1197 (epoch 0), +loss = OrderedDict([('loss_ce', 0.325), ('loss_counter', 0.118), ('loss_bbox', 0.026), ('loss_giou', 0.296), ('loss_self_iou', 0.011), ('cardinality_error', 7.241), ('loss_ce_0', 0.339), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.028), ('loss_giou_0', 0.317), ('loss_self_iou_0', 0.011), ('cardinality_error_0', 7.241), ('loss_caption_0', 2.501), ('loss_caption', 2.496), ('total_loss', 13.892)]), +time/iter = 0.154, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 1330 (epoch 0), +loss = OrderedDict([('loss_ce', 0.327), ('loss_counter', 0.126), ('loss_bbox', 0.026), ('loss_giou', 0.304), ('loss_self_iou', 0.011), ('cardinality_error', 7.94), ('loss_ce_0', 0.334), ('loss_counter_0', 0.127), ('loss_bbox_0', 0.029), ('loss_giou_0', 0.332), ('loss_self_iou_0', 0.011), ('cardinality_error_0', 7.94), ('loss_caption_0', 2.635), ('loss_caption', 2.619), ('total_loss', 14.504)]), +time/iter = 0.158, bad_vid = 0.000 + +Validation results of iter 1333: +Bleu_1:0.16894357888730638 +Bleu_2:0.09902176620134434 +Bleu_3:0.05312286436412136 +Bleu_4:0.026212861867102137 +METEOR:0.0791142699299577 +ROUGE_L:0.15563765109454591 +CIDEr:0.4087091055845523 +Recall:0.1991554685892762 +Precision:0.40083793546594454 +soda_c:0.05642652494419026 +para_Bleu_1:0.28013834967939705 +para_Bleu_2:0.16393959632782257 +para_Bleu_3:0.09809744775628881 +para_Bleu_4:0.060378126412557326 +para_METEOR:0.1286956339033507 +para_ROUGE_L:0.29903071052996405 +para_CIDEr:0.14675303603221324 + +overall score of iter 1333: 0.3358267963481213 + +Save model at iter 1333 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. +Save Best-model at iter 1333 to checkpoint file. Save info to info.json -ID seq2-ft(mix)-gt_percent-1.0 iter 21000 (epoch 2), -loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.122), ('loss_bbox', 0.071), ('loss_giou', 0.202), ('loss_self_iou', 0.101), ('cardinality_error', 3.666), ('loss_ce_0', 0.299), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.103), ('cardinality_error_0', 3.666), ('loss_caption_0', 3.344), ('loss_caption', 3.335), ('total_loss', 16.294)]), -time/iter = 0.726, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 22000 (epoch 2), -loss = OrderedDict([('loss_ce', 0.293), ('loss_counter', 0.119), ('loss_bbox', 0.073), ('loss_giou', 0.201), ('loss_self_iou', 0.109), ('cardinality_error', 3.752), ('loss_ce_0', 0.292), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.203), ('loss_self_iou_0', 0.11), ('cardinality_error_0', 3.752), ('loss_caption_0', 3.302), ('loss_caption', 3.304), ('total_loss', 16.116)]), -time/iter = 0.188, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 23000 (epoch 2), -loss = OrderedDict([('loss_ce', 0.299), ('loss_counter', 0.128), ('loss_bbox', 0.077), ('loss_giou', 0.208), ('loss_self_iou', 0.113), ('cardinality_error', 3.803), ('loss_ce_0', 0.299), ('loss_counter_0', 0.128), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.208), ('loss_self_iou_0', 0.112), ('cardinality_error_0', 3.803), ('loss_caption_0', 3.348), ('loss_caption', 3.34), ('total_loss', 16.363)]), -time/iter = 0.190, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 24000 (epoch 2), -loss = OrderedDict([('loss_ce', 0.293), ('loss_counter', 0.122), ('loss_bbox', 0.076), ('loss_giou', 0.207), ('loss_self_iou', 0.093), ('cardinality_error', 3.729), ('loss_ce_0', 0.294), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.207), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.729), ('loss_caption_0', 3.354), ('loss_caption', 3.351), ('total_loss', 16.364)]), -time/iter = 0.191, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 25000 (epoch 2), -loss = OrderedDict([('loss_ce', 0.294), ('loss_counter', 0.122), ('loss_bbox', 0.078), ('loss_giou', 0.213), ('loss_self_iou', 0.091), ('cardinality_error', 3.734), ('loss_ce_0', 0.295), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.077), ('loss_giou_0', 0.214), ('loss_self_iou_0', 0.09), ('cardinality_error_0', 3.734), ('loss_caption_0', 3.372), ('loss_caption', 3.372), ('total_loss', 16.494)]), -time/iter = 0.192, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 26000 (epoch 2), -loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.125), ('loss_bbox', 0.072), ('loss_giou', 0.203), ('loss_self_iou', 0.096), ('cardinality_error', 3.784), ('loss_ce_0', 0.299), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.204), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.784), ('loss_caption_0', 3.334), ('loss_caption', 3.333), ('total_loss', 16.279)]), -time/iter = 0.191, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 27000 (epoch 2), -loss = OrderedDict([('loss_ce', 0.289), ('loss_counter', 0.118), ('loss_bbox', 0.076), ('loss_giou', 0.203), ('loss_self_iou', 0.102), ('cardinality_error', 3.64), ('loss_ce_0', 0.291), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.203), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.64), ('loss_caption_0', 3.348), ('loss_caption', 3.345), ('total_loss', 16.287)]), -time/iter = 0.191, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 28000 (epoch 2), -loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.125), ('loss_bbox', 0.077), ('loss_giou', 0.201), ('loss_self_iou', 0.095), ('cardinality_error', 3.774), ('loss_ce_0', 0.293), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.076), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.774), ('loss_caption_0', 3.337), ('loss_caption', 3.333), ('total_loss', 16.249)]), -time/iter = 0.194, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 29000 (epoch 2), -loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.12), ('loss_bbox', 0.075), ('loss_giou', 0.204), ('loss_self_iou', 0.1), ('cardinality_error', 3.755), ('loss_ce_0', 0.299), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.074), ('loss_giou_0', 0.205), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.755), ('loss_caption_0', 3.315), ('loss_caption', 3.321), ('total_loss', 16.223)]), -time/iter = 0.192, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 30000 (epoch 2), -loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.119), ('loss_bbox', 0.071), ('loss_giou', 0.195), ('loss_self_iou', 0.103), ('cardinality_error', 3.72), ('loss_ce_0', 0.302), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.072), ('loss_giou_0', 0.196), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.72), ('loss_caption_0', 3.347), ('loss_caption', 3.349), ('total_loss', 16.283)]), -time/iter = 0.195, bad_vid = 0.000 - -Validation results of iter 30027: -Bleu_1:0.15440507165989542 -Bleu_2:0.08178273697953425 -Bleu_3:0.042600749568780155 -Bleu_4:0.02119123483046711 -METEOR:0.08563216148714695 -ROUGE_L:0.156809182143994 -CIDEr:0.25960752079137744 -Recall:0.5075951227720545 -Precision:0.571834112941489 -soda_c:0.048597974030683 -para_Bleu_1:0.3985431504573892 -para_Bleu_2:0.22415947108296613 -para_Bleu_3:0.1341003834690626 -para_Bleu_4:0.08312155143550452 -para_METEOR:0.1510085678983445 -para_ROUGE_L:0.2957598062989384 -para_CIDEr:0.12271570278513648 - -overall score of iter 30027: 0.3568458221189855 - -Save model at iter 30027 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. -Save Best-model at iter 30027 to checkpoint file. +ID seq2-ft(mix)-gt_percent-1.0 iter 1463 (epoch 1), +loss = OrderedDict([('loss_ce', 0.322), ('loss_counter', 0.128), ('loss_bbox', 0.026), ('loss_giou', 0.301), ('loss_self_iou', 0.011), ('cardinality_error', 7.699), ('loss_ce_0', 0.335), ('loss_counter_0', 0.129), ('loss_bbox_0', 0.026), ('loss_giou_0', 0.316), ('loss_self_iou_0', 0.011), ('cardinality_error_0', 7.699), ('loss_caption_0', 2.448), ('loss_caption', 2.462), ('total_loss', 13.729)]), +time/iter = 0.660, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 1596 (epoch 1), +loss = OrderedDict([('loss_ce', 0.311), ('loss_counter', 0.126), ('loss_bbox', 0.022), ('loss_giou', 0.284), ('loss_self_iou', 0.01), ('cardinality_error', 8.233), ('loss_ce_0', 0.322), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.024), ('loss_giou_0', 0.31), ('loss_self_iou_0', 0.01), ('cardinality_error_0', 8.233), ('loss_caption_0', 2.348), ('loss_caption', 2.348), ('total_loss', 13.16)]), +time/iter = 0.153, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 1729 (epoch 1), +loss = OrderedDict([('loss_ce', 0.311), ('loss_counter', 0.124), ('loss_bbox', 0.023), ('loss_giou', 0.273), ('loss_self_iou', 0.01), ('cardinality_error', 7.632), ('loss_ce_0', 0.32), ('loss_counter_0', 0.124), ('loss_bbox_0', 0.026), ('loss_giou_0', 0.307), ('loss_self_iou_0', 0.012), ('cardinality_error_0', 7.632), ('loss_caption_0', 2.363), ('loss_caption', 2.353), ('total_loss', 13.14)]), +time/iter = 0.156, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 1862 (epoch 1), +loss = OrderedDict([('loss_ce', 0.316), ('loss_counter', 0.12), ('loss_bbox', 0.023), ('loss_giou', 0.268), ('loss_self_iou', 0.01), ('cardinality_error', 7.609), ('loss_ce_0', 0.32), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.025), ('loss_giou_0', 0.29), ('loss_self_iou_0', 0.01), ('cardinality_error_0', 7.609), ('loss_caption_0', 2.439), ('loss_caption', 2.419), ('total_loss', 13.343)]), +time/iter = 0.159, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 1995 (epoch 1), +loss = OrderedDict([('loss_ce', 0.314), ('loss_counter', 0.122), ('loss_bbox', 0.022), ('loss_giou', 0.281), ('loss_self_iou', 0.009), ('cardinality_error', 7.541), ('loss_ce_0', 0.322), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.025), ('loss_giou_0', 0.309), ('loss_self_iou_0', 0.011), ('cardinality_error_0', 7.541), ('loss_caption_0', 2.503), ('loss_caption', 2.503), ('total_loss', 13.766)]), +time/iter = 0.159, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 2128 (epoch 1), +loss = OrderedDict([('loss_ce', 0.316), ('loss_counter', 0.126), ('loss_bbox', 0.024), ('loss_giou', 0.284), ('loss_self_iou', 0.009), ('cardinality_error', 7.789), ('loss_ce_0', 0.324), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.026), ('loss_giou_0', 0.301), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.789), ('loss_caption_0', 2.5), ('loss_caption', 2.493), ('total_loss', 13.73)]), +time/iter = 0.161, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 2261 (epoch 1), +loss = OrderedDict([('loss_ce', 0.31), ('loss_counter', 0.122), ('loss_bbox', 0.023), ('loss_giou', 0.285), ('loss_self_iou', 0.012), ('cardinality_error', 7.902), ('loss_ce_0', 0.316), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.025), ('loss_giou_0', 0.304), ('loss_self_iou_0', 0.011), ('cardinality_error_0', 7.902), ('loss_caption_0', 2.425), ('loss_caption', 2.424), ('total_loss', 13.426)]), +time/iter = 0.156, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 2394 (epoch 1), +loss = OrderedDict([('loss_ce', 0.315), ('loss_counter', 0.126), ('loss_bbox', 0.025), ('loss_giou', 0.29), ('loss_self_iou', 0.011), ('cardinality_error', 7.534), ('loss_ce_0', 0.323), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.026), ('loss_giou_0', 0.308), ('loss_self_iou_0', 0.01), ('cardinality_error_0', 7.534), ('loss_caption_0', 2.439), ('loss_caption', 2.435), ('total_loss', 13.54)]), +time/iter = 0.154, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 2527 (epoch 1), +loss = OrderedDict([('loss_ce', 0.313), ('loss_counter', 0.125), ('loss_bbox', 0.023), ('loss_giou', 0.276), ('loss_self_iou', 0.009), ('cardinality_error', 7.647), ('loss_ce_0', 0.319), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.025), ('loss_giou_0', 0.296), ('loss_self_iou_0', 0.01), ('cardinality_error_0', 7.647), ('loss_caption_0', 2.454), ('loss_caption', 2.455), ('total_loss', 13.492)]), +time/iter = 0.157, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 2660 (epoch 1), +loss = OrderedDict([('loss_ce', 0.313), ('loss_counter', 0.131), ('loss_bbox', 0.023), ('loss_giou', 0.273), ('loss_self_iou', 0.01), ('cardinality_error', 8.0), ('loss_ce_0', 0.317), ('loss_counter_0', 0.128), ('loss_bbox_0', 0.026), ('loss_giou_0', 0.294), ('loss_self_iou_0', 0.01), ('cardinality_error_0', 8.0), ('loss_caption_0', 2.464), ('loss_caption', 2.451), ('total_loss', 13.487)]), +time/iter = 0.167, bad_vid = 0.000 + +Validation results of iter 2666: +Bleu_1:0.18247710374533507 +Bleu_2:0.10433126216854799 +Bleu_3:0.05471515540980739 +Bleu_4:0.025315544998990337 +METEOR:0.08392673175891194 +ROUGE_L:0.16810710582244187 +CIDEr:0.48711946137609907 +Recall:0.23104975652842194 +Precision:0.4442690424090867 +soda_c:0.06454827356060923 +para_Bleu_1:0.27953804293947354 +para_Bleu_2:0.1635778619591909 +para_Bleu_3:0.09761782578266559 +para_Bleu_4:0.060085255296605154 +para_METEOR:0.13134445752685775 +para_ROUGE_L:0.3040652157082556 +para_CIDEr:0.15701615141849948 + +overall score of iter 2666: 0.34844586424196233 + +Save model at iter 2666 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. +Save Best-model at iter 2666 to checkpoint file. Save info to info.json -ID seq2-ft(mix)-gt_percent-1.0 iter 31000 (epoch 3), -loss = OrderedDict([('loss_ce', 0.296), ('loss_counter', 0.123), ('loss_bbox', 0.073), ('loss_giou', 0.202), ('loss_self_iou', 0.114), ('cardinality_error', 3.772), ('loss_ce_0', 0.296), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.074), ('loss_giou_0', 0.203), ('loss_self_iou_0', 0.115), ('cardinality_error_0', 3.772), ('loss_caption_0', 3.24), ('loss_caption', 3.242), ('total_loss', 15.889)]), -time/iter = 0.725, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 32000 (epoch 3), -loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.117), ('loss_bbox', 0.069), ('loss_giou', 0.193), ('loss_self_iou', 0.093), ('cardinality_error', 3.66), ('loss_ce_0', 0.3), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.195), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.66), ('loss_caption_0', 3.251), ('loss_caption', 3.248), ('total_loss', 15.869)]), -time/iter = 0.192, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 33000 (epoch 3), -loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.126), ('loss_bbox', 0.07), ('loss_giou', 0.197), ('loss_self_iou', 0.102), ('cardinality_error', 3.787), ('loss_ce_0', 0.301), ('loss_counter_0', 0.126), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.199), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.787), ('loss_caption_0', 3.223), ('loss_caption', 3.225), ('total_loss', 15.81)]), -time/iter = 0.189, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 34000 (epoch 3), -loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.121), ('loss_bbox', 0.076), ('loss_giou', 0.201), ('loss_self_iou', 0.107), ('cardinality_error', 3.719), ('loss_ce_0', 0.296), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.077), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.108), ('cardinality_error_0', 3.719), ('loss_caption_0', 3.21), ('loss_caption', 3.206), ('total_loss', 15.752)]), -time/iter = 0.189, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 35000 (epoch 3), -loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.122), ('loss_bbox', 0.074), ('loss_giou', 0.201), ('loss_self_iou', 0.1), ('cardinality_error', 3.761), ('loss_ce_0', 0.304), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.761), ('loss_caption_0', 3.261), ('loss_caption', 3.267), ('total_loss', 16.006)]), -time/iter = 0.187, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 36000 (epoch 3), -loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.12), ('loss_bbox', 0.074), ('loss_giou', 0.202), ('loss_self_iou', 0.096), ('cardinality_error', 3.731), ('loss_ce_0', 0.302), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.075), ('loss_giou_0', 0.203), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.731), ('loss_caption_0', 3.322), ('loss_caption', 3.322), ('total_loss', 16.237)]), -time/iter = 0.192, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 37000 (epoch 3), -loss = OrderedDict([('loss_ce', 0.306), ('loss_counter', 0.12), ('loss_bbox', 0.069), ('loss_giou', 0.193), ('loss_self_iou', 0.088), ('cardinality_error', 3.747), ('loss_ce_0', 0.306), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.195), ('loss_self_iou_0', 0.089), ('cardinality_error_0', 3.747), ('loss_caption_0', 3.276), ('loss_caption', 3.278), ('total_loss', 16.005)]), -time/iter = 0.194, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 38000 (epoch 3), -loss = OrderedDict([('loss_ce', 0.295), ('loss_counter', 0.122), ('loss_bbox', 0.073), ('loss_giou', 0.198), ('loss_self_iou', 0.096), ('cardinality_error', 3.747), ('loss_ce_0', 0.295), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.074), ('loss_giou_0', 0.199), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.747), ('loss_caption_0', 3.26), ('loss_caption', 3.267), ('total_loss', 15.944)]), -time/iter = 0.192, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 39000 (epoch 3), -loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.12), ('loss_bbox', 0.073), ('loss_giou', 0.194), ('loss_self_iou', 0.096), ('cardinality_error', 3.714), ('loss_ce_0', 0.3), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.074), ('loss_giou_0', 0.196), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.714), ('loss_caption_0', 3.29), ('loss_caption', 3.284), ('total_loss', 16.029)]), -time/iter = 0.192, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 40000 (epoch 3), -loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.124), ('loss_bbox', 0.068), ('loss_giou', 0.187), ('loss_self_iou', 0.098), ('cardinality_error', 3.742), ('loss_ce_0', 0.302), ('loss_counter_0', 0.124), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.189), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.742), ('loss_caption_0', 3.255), ('loss_caption', 3.258), ('total_loss', 15.861)]), -time/iter = 0.191, bad_vid = 0.000 - -Validation results of iter 40036: -Bleu_1:0.16003947012491918 -Bleu_2:0.08640386650819816 -Bleu_3:0.045769192920880976 -Bleu_4:0.023139762266241797 -METEOR:0.08893476927946467 -ROUGE_L:0.16285119298911696 -CIDEr:0.27850058398714506 -Recall:0.4974410652224822 -Precision:0.571762083926507 -soda_c:0.04898353247531122 -para_Bleu_1:0.4116267700746525 -para_Bleu_2:0.23315066082372427 -para_Bleu_3:0.139785630195007 -para_Bleu_4:0.08689414164874545 -para_METEOR:0.15321412716959742 -para_ROUGE_L:0.2993749803089721 -para_CIDEr:0.12755194391496638 - -overall score of iter 40036: 0.3676602127333093 - -Save model at iter 40036 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. -Save Best-model at iter 40036 to checkpoint file. +ID seq2-ft(mix)-gt_percent-1.0 iter 2793 (epoch 2), +loss = OrderedDict([('loss_ce', 0.309), ('loss_counter', 0.119), ('loss_bbox', 0.021), ('loss_giou', 0.26), ('loss_self_iou', 0.01), ('cardinality_error', 7.556), ('loss_ce_0', 0.312), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.024), ('loss_giou_0', 0.285), ('loss_self_iou_0', 0.011), ('cardinality_error_0', 7.556), ('loss_caption_0', 2.27), ('loss_caption', 2.276), ('total_loss', 12.632)]), +time/iter = 0.666, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 2926 (epoch 2), +loss = OrderedDict([('loss_ce', 0.313), ('loss_counter', 0.121), ('loss_bbox', 0.023), ('loss_giou', 0.266), ('loss_self_iou', 0.008), ('cardinality_error', 7.444), ('loss_ce_0', 0.317), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.025), ('loss_giou_0', 0.287), ('loss_self_iou_0', 0.01), ('cardinality_error_0', 7.444), ('loss_caption_0', 2.276), ('loss_caption', 2.291), ('total_loss', 12.726)]), +time/iter = 0.157, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 3059 (epoch 2), +loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.127), ('loss_bbox', 0.02), ('loss_giou', 0.272), ('loss_self_iou', 0.008), ('cardinality_error', 8.135), ('loss_ce_0', 0.302), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.296), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 8.135), ('loss_caption_0', 2.364), ('loss_caption', 2.364), ('total_loss', 13.057)]), +time/iter = 0.165, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 3192 (epoch 2), +loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.122), ('loss_bbox', 0.022), ('loss_giou', 0.266), ('loss_self_iou', 0.008), ('cardinality_error', 7.699), ('loss_ce_0', 0.306), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.286), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.699), ('loss_caption_0', 2.367), ('loss_caption', 2.381), ('total_loss', 13.038)]), +time/iter = 0.177, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 3325 (epoch 2), +loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.123), ('loss_bbox', 0.021), ('loss_giou', 0.274), ('loss_self_iou', 0.009), ('cardinality_error', 7.932), ('loss_ce_0', 0.3), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.291), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.932), ('loss_caption_0', 2.323), ('loss_caption', 2.33), ('total_loss', 12.887)]), +time/iter = 0.155, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 3458 (epoch 2), +loss = OrderedDict([('loss_ce', 0.31), ('loss_counter', 0.124), ('loss_bbox', 0.021), ('loss_giou', 0.277), ('loss_self_iou', 0.01), ('cardinality_error', 7.865), ('loss_ce_0', 0.31), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.295), ('loss_self_iou_0', 0.011), ('cardinality_error_0', 7.865), ('loss_caption_0', 2.351), ('loss_caption', 2.341), ('total_loss', 13.038)]), +time/iter = 0.153, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 3591 (epoch 2), +loss = OrderedDict([('loss_ce', 0.306), ('loss_counter', 0.114), ('loss_bbox', 0.022), ('loss_giou', 0.263), ('loss_self_iou', 0.009), ('cardinality_error', 7.586), ('loss_ce_0', 0.308), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.024), ('loss_giou_0', 0.285), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.586), ('loss_caption_0', 2.222), ('loss_caption', 2.223), ('total_loss', 12.425)]), +time/iter = 0.154, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 3724 (epoch 2), +loss = OrderedDict([('loss_ce', 0.305), ('loss_counter', 0.123), ('loss_bbox', 0.023), ('loss_giou', 0.265), ('loss_self_iou', 0.009), ('cardinality_error', 7.624), ('loss_ce_0', 0.307), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.024), ('loss_giou_0', 0.279), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.624), ('loss_caption_0', 2.38), ('loss_caption', 2.368), ('total_loss', 13.014)]), +time/iter = 0.159, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 3857 (epoch 2), +loss = OrderedDict([('loss_ce', 0.306), ('loss_counter', 0.115), ('loss_bbox', 0.021), ('loss_giou', 0.264), ('loss_self_iou', 0.009), ('cardinality_error', 7.489), ('loss_ce_0', 0.312), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.279), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.489), ('loss_caption_0', 2.343), ('loss_caption', 2.344), ('total_loss', 12.897)]), +time/iter = 0.155, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 3990 (epoch 2), +loss = OrderedDict([('loss_ce', 0.299), ('loss_counter', 0.134), ('loss_bbox', 0.02), ('loss_giou', 0.268), ('loss_self_iou', 0.012), ('cardinality_error', 8.301), ('loss_ce_0', 0.299), ('loss_counter_0', 0.131), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.289), ('loss_self_iou_0', 0.013), ('cardinality_error_0', 8.301), ('loss_caption_0', 2.327), ('loss_caption', 2.346), ('total_loss', 12.9)]), +time/iter = 0.154, bad_vid = 0.000 + +Validation results of iter 3999: +Bleu_1:0.18812761655735627 +Bleu_2:0.11394688266117041 +Bleu_3:0.06350983100569632 +Bleu_4:0.03295035253718016 +METEOR:0.08673497362280043 +ROUGE_L:0.17099683701262633 +CIDEr:0.534654554166069 +Recall:0.2545535313519452 +Precision:0.4357073390990242 +soda_c:0.06940030844072555 +para_Bleu_1:0.31911536052560924 +para_Bleu_2:0.19074275606485158 +para_Bleu_3:0.11503629156908896 +para_Bleu_4:0.07096292455051724 +para_METEOR:0.14141970569772275 +para_ROUGE_L:0.3133292457236414 +para_CIDEr:0.18756071216976763 + +overall score of iter 3999: 0.3999433424180076 + +Save model at iter 3999 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. +Save Best-model at iter 3999 to checkpoint file. Save info to info.json -ID seq2-ft(mix)-gt_percent-1.0 iter 41000 (epoch 4), -loss = OrderedDict([('loss_ce', 0.304), ('loss_counter', 0.122), ('loss_bbox', 0.071), ('loss_giou', 0.196), ('loss_self_iou', 0.094), ('cardinality_error', 3.73), ('loss_ce_0', 0.303), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.197), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.73), ('loss_caption_0', 3.159), ('loss_caption', 3.162), ('total_loss', 15.549)]), -time/iter = 0.733, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 42000 (epoch 4), -loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.117), ('loss_bbox', 0.072), ('loss_giou', 0.188), ('loss_self_iou', 0.097), ('cardinality_error', 3.698), ('loss_ce_0', 0.298), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.189), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.698), ('loss_caption_0', 3.191), ('loss_caption', 3.187), ('total_loss', 15.571)]), -time/iter = 0.194, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 43000 (epoch 4), -loss = OrderedDict([('loss_ce', 0.306), ('loss_counter', 0.12), ('loss_bbox', 0.07), ('loss_giou', 0.198), ('loss_self_iou', 0.089), ('cardinality_error', 3.785), ('loss_ce_0', 0.306), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.2), ('loss_self_iou_0', 0.087), ('cardinality_error_0', 3.785), ('loss_caption_0', 3.247), ('loss_caption', 3.249), ('total_loss', 15.93)]), -time/iter = 0.195, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 44000 (epoch 4), -loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.12), ('loss_bbox', 0.072), ('loss_giou', 0.194), ('loss_self_iou', 0.104), ('cardinality_error', 3.727), ('loss_ce_0', 0.302), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.072), ('loss_giou_0', 0.195), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.727), ('loss_caption_0', 3.228), ('loss_caption', 3.227), ('total_loss', 15.794)]), -time/iter = 0.192, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 45000 (epoch 4), -loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.12), ('loss_bbox', 0.07), ('loss_giou', 0.194), ('loss_self_iou', 0.094), ('cardinality_error', 3.684), ('loss_ce_0', 0.304), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.196), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.684), ('loss_caption_0', 3.138), ('loss_caption', 3.143), ('total_loss', 15.458)]), -time/iter = 0.190, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 46000 (epoch 4), -loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.123), ('loss_bbox', 0.071), ('loss_giou', 0.194), ('loss_self_iou', 0.107), ('cardinality_error', 3.8), ('loss_ce_0', 0.301), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.196), ('loss_self_iou_0', 0.107), ('cardinality_error_0', 3.8), ('loss_caption_0', 3.198), ('loss_caption', 3.202), ('total_loss', 15.69)]), -time/iter = 0.192, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 47000 (epoch 4), -loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.124), ('loss_bbox', 0.071), ('loss_giou', 0.193), ('loss_self_iou', 0.1), ('cardinality_error', 3.724), ('loss_ce_0', 0.302), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.072), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.724), ('loss_caption_0', 3.166), ('loss_caption', 3.167), ('total_loss', 15.544)]), -time/iter = 0.191, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 48000 (epoch 4), -loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.126), ('loss_bbox', 0.074), ('loss_giou', 0.194), ('loss_self_iou', 0.1), ('cardinality_error', 3.779), ('loss_ce_0', 0.303), ('loss_counter_0', 0.126), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.195), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.779), ('loss_caption_0', 3.197), ('loss_caption', 3.204), ('total_loss', 15.693)]), -time/iter = 0.194, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 49000 (epoch 4), -loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.117), ('loss_bbox', 0.072), ('loss_giou', 0.186), ('loss_self_iou', 0.103), ('cardinality_error', 3.67), ('loss_ce_0', 0.299), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.189), ('loss_self_iou_0', 0.103), ('cardinality_error_0', 3.67), ('loss_caption_0', 3.197), ('loss_caption', 3.193), ('total_loss', 15.597)]), -time/iter = 0.191, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 50000 (epoch 4), -loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.122), ('loss_bbox', 0.071), ('loss_giou', 0.191), ('loss_self_iou', 0.1), ('cardinality_error', 3.769), ('loss_ce_0', 0.303), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.769), ('loss_caption_0', 3.195), ('loss_caption', 3.196), ('total_loss', 15.646)]), -time/iter = 0.193, bad_vid = 0.000 - -Validation results of iter 50045: -Bleu_1:0.1612752203314224 -Bleu_2:0.08712092952271142 -Bleu_3:0.04643407984417907 -Bleu_4:0.024237450149938583 -METEOR:0.0888552980469009 -ROUGE_L:0.16165678007821221 -CIDEr:0.28844655875134945 -Recall:0.5079771255793173 -Precision:0.5707494407158785 -soda_c:0.05143467092505771 -para_Bleu_1:0.425828341023263 -para_Bleu_2:0.2431293051387748 -para_Bleu_3:0.14662751878582 -para_Bleu_4:0.09131956416083617 -para_METEOR:0.15868276543147294 -para_ROUGE_L:0.30762031965083425 -para_CIDEr:0.1438790695271004 - -overall score of iter 50045: 0.39388139911940956 - -Save model at iter 50045 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. -Save Best-model at iter 50045 to checkpoint file. +ID seq2-ft(mix)-gt_percent-1.0 iter 4123 (epoch 3), +loss = OrderedDict([('loss_ce', 0.305), ('loss_counter', 0.129), ('loss_bbox', 0.021), ('loss_giou', 0.256), ('loss_self_iou', 0.008), ('cardinality_error', 7.925), ('loss_ce_0', 0.307), ('loss_counter_0', 0.126), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.275), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.925), ('loss_caption_0', 2.272), ('loss_caption', 2.28), ('total_loss', 12.579)]), +time/iter = 0.678, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 4256 (epoch 3), +loss = OrderedDict([('loss_ce', 0.308), ('loss_counter', 0.121), ('loss_bbox', 0.02), ('loss_giou', 0.256), ('loss_self_iou', 0.008), ('cardinality_error', 7.632), ('loss_ce_0', 0.31), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.276), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.632), ('loss_caption_0', 2.247), ('loss_caption', 2.252), ('total_loss', 12.484)]), +time/iter = 0.159, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 4389 (epoch 3), +loss = OrderedDict([('loss_ce', 0.305), ('loss_counter', 0.12), ('loss_bbox', 0.021), ('loss_giou', 0.26), ('loss_self_iou', 0.011), ('cardinality_error', 7.526), ('loss_ce_0', 0.309), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.272), ('loss_self_iou_0', 0.01), ('cardinality_error_0', 7.526), ('loss_caption_0', 2.194), ('loss_caption', 2.205), ('total_loss', 12.273)]), +time/iter = 0.161, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 4522 (epoch 3), +loss = OrderedDict([('loss_ce', 0.305), ('loss_counter', 0.115), ('loss_bbox', 0.019), ('loss_giou', 0.248), ('loss_self_iou', 0.007), ('cardinality_error', 7.519), ('loss_ce_0', 0.303), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.262), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.519), ('loss_caption_0', 2.335), ('loss_caption', 2.326), ('total_loss', 12.689)]), +time/iter = 0.170, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 4655 (epoch 3), +loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.122), ('loss_bbox', 0.02), ('loss_giou', 0.263), ('loss_self_iou', 0.008), ('cardinality_error', 7.97), ('loss_ce_0', 0.298), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.285), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.97), ('loss_caption_0', 2.254), ('loss_caption', 2.267), ('total_loss', 12.545)]), +time/iter = 0.153, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 4788 (epoch 3), +loss = OrderedDict([('loss_ce', 0.308), ('loss_counter', 0.118), ('loss_bbox', 0.021), ('loss_giou', 0.253), ('loss_self_iou', 0.008), ('cardinality_error', 7.481), ('loss_ce_0', 0.308), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.268), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.481), ('loss_caption_0', 2.208), ('loss_caption', 2.195), ('total_loss', 12.24)]), +time/iter = 0.151, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 4921 (epoch 3), +loss = OrderedDict([('loss_ce', 0.306), ('loss_counter', 0.12), ('loss_bbox', 0.019), ('loss_giou', 0.262), ('loss_self_iou', 0.01), ('cardinality_error', 7.842), ('loss_ce_0', 0.305), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.284), ('loss_self_iou_0', 0.011), ('cardinality_error_0', 7.842), ('loss_caption_0', 2.186), ('loss_caption', 2.196), ('total_loss', 12.289)]), +time/iter = 0.162, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 5054 (epoch 3), +loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.121), ('loss_bbox', 0.022), ('loss_giou', 0.26), ('loss_self_iou', 0.009), ('cardinality_error', 7.887), ('loss_ce_0', 0.305), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.271), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.887), ('loss_caption_0', 2.242), ('loss_caption', 2.239), ('total_loss', 12.422)]), +time/iter = 0.170, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 5187 (epoch 3), +loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.124), ('loss_bbox', 0.021), ('loss_giou', 0.262), ('loss_self_iou', 0.009), ('cardinality_error', 7.932), ('loss_ce_0', 0.305), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.277), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.932), ('loss_caption_0', 2.25), ('loss_caption', 2.246), ('total_loss', 12.483)]), +time/iter = 0.166, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 5320 (epoch 3), +loss = OrderedDict([('loss_ce', 0.299), ('loss_counter', 0.12), ('loss_bbox', 0.022), ('loss_giou', 0.26), ('loss_self_iou', 0.006), ('cardinality_error', 7.729), ('loss_ce_0', 0.298), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.024), ('loss_giou_0', 0.279), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.729), ('loss_caption_0', 2.287), ('loss_caption', 2.298), ('total_loss', 12.64)]), +time/iter = 0.161, bad_vid = 0.000 + +Validation results of iter 5332: +Bleu_1:0.19536023703614988 +Bleu_2:0.11676341716851109 +Bleu_3:0.06337153157323498 +Bleu_4:0.031788948303475714 +METEOR:0.09287502887069582 +ROUGE_L:0.18168372139225142 +CIDEr:0.5345089450528974 +Recall:0.26186565000159123 +Precision:0.4578470702650138 +soda_c:0.06891495599002981 +para_Bleu_1:0.3645537642333956 +para_Bleu_2:0.21504928179111618 +para_Bleu_3:0.1297486406737134 +para_Bleu_4:0.08010111193897063 +para_METEOR:0.1518569517959942 +para_ROUGE_L:0.3241825281759821 +para_CIDEr:0.22211083978975357 + +overall score of iter 5332: 0.4540689035247184 + +Save model at iter 5332 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. +Save Best-model at iter 5332 to checkpoint file. Save info to info.json -ID seq2-ft(mix)-gt_percent-1.0 iter 51000 (epoch 5), -loss = OrderedDict([('loss_ce', 0.304), ('loss_counter', 0.119), ('loss_bbox', 0.072), ('loss_giou', 0.19), ('loss_self_iou', 0.1), ('cardinality_error', 3.708), ('loss_ce_0', 0.304), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.708), ('loss_caption_0', 3.123), ('loss_caption', 3.122), ('total_loss', 15.345)]), -time/iter = 0.739, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 52000 (epoch 5), -loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.122), ('loss_bbox', 0.07), ('loss_giou', 0.195), ('loss_self_iou', 0.091), ('cardinality_error', 3.787), ('loss_ce_0', 0.302), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.198), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.787), ('loss_caption_0', 3.08), ('loss_caption', 3.08), ('total_loss', 15.224)]), -time/iter = 0.193, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 53000 (epoch 5), -loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.12), ('loss_bbox', 0.07), ('loss_giou', 0.192), ('loss_self_iou', 0.101), ('cardinality_error', 3.688), ('loss_ce_0', 0.302), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.688), ('loss_caption_0', 3.121), ('loss_caption', 3.125), ('total_loss', 15.366)]), -time/iter = 0.196, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 54000 (epoch 5), -loss = OrderedDict([('loss_ce', 0.304), ('loss_counter', 0.12), ('loss_bbox', 0.069), ('loss_giou', 0.184), ('loss_self_iou', 0.096), ('cardinality_error', 3.66), ('loss_ce_0', 0.303), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.66), ('loss_caption_0', 3.151), ('loss_caption', 3.158), ('total_loss', 15.44)]), -time/iter = 0.193, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 55000 (epoch 5), -loss = OrderedDict([('loss_ce', 0.314), ('loss_counter', 0.123), ('loss_bbox', 0.069), ('loss_giou', 0.186), ('loss_self_iou', 0.102), ('cardinality_error', 3.759), ('loss_ce_0', 0.314), ('loss_counter_0', 0.124), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.188), ('loss_self_iou_0', 0.103), ('cardinality_error_0', 3.759), ('loss_caption_0', 3.137), ('loss_caption', 3.138), ('total_loss', 15.427)]), -time/iter = 0.193, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 56000 (epoch 5), -loss = OrderedDict([('loss_ce', 0.304), ('loss_counter', 0.12), ('loss_bbox', 0.069), ('loss_giou', 0.186), ('loss_self_iou', 0.102), ('cardinality_error', 3.7), ('loss_ce_0', 0.303), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.189), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.7), ('loss_caption_0', 3.128), ('loss_caption', 3.132), ('total_loss', 15.353)]), -time/iter = 0.191, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 57000 (epoch 5), -loss = OrderedDict([('loss_ce', 0.308), ('loss_counter', 0.125), ('loss_bbox', 0.069), ('loss_giou', 0.192), ('loss_self_iou', 0.094), ('cardinality_error', 3.833), ('loss_ce_0', 0.308), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.833), ('loss_caption_0', 3.157), ('loss_caption', 3.154), ('total_loss', 15.516)]), -time/iter = 0.192, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 58000 (epoch 5), -loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.116), ('loss_bbox', 0.072), ('loss_giou', 0.192), ('loss_self_iou', 0.099), ('cardinality_error', 3.724), ('loss_ce_0', 0.3), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.724), ('loss_caption_0', 3.092), ('loss_caption', 3.088), ('total_loss', 15.209)]), -time/iter = 0.189, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 59000 (epoch 5), -loss = OrderedDict([('loss_ce', 0.305), ('loss_counter', 0.126), ('loss_bbox', 0.07), ('loss_giou', 0.187), ('loss_self_iou', 0.092), ('cardinality_error', 3.806), ('loss_ce_0', 0.304), ('loss_counter_0', 0.126), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.806), ('loss_caption_0', 3.204), ('loss_caption', 3.204), ('total_loss', 15.668)]), -time/iter = 0.191, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 60000 (epoch 5), -loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.119), ('loss_bbox', 0.073), ('loss_giou', 0.197), ('loss_self_iou', 0.102), ('cardinality_error', 3.73), ('loss_ce_0', 0.298), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.074), ('loss_giou_0', 0.198), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.73), ('loss_caption_0', 3.185), ('loss_caption', 3.179), ('total_loss', 15.62)]), -time/iter = 0.192, bad_vid = 0.000 - -Validation results of iter 60054: -Bleu_1:0.16203040821313286 -Bleu_2:0.087418866671477 -Bleu_3:0.04641401855891123 -Bleu_4:0.023872355329811287 -METEOR:0.08736154709181514 -ROUGE_L:0.16095171754962678 -CIDEr:0.3019460931650574 -Recall:0.5237442505746305 -Precision:0.5691986983933232 -soda_c:0.05366939846142926 -para_Bleu_1:0.4285515683378188 -para_Bleu_2:0.24896313523930838 -para_Bleu_3:0.15083849533584295 -para_Bleu_4:0.09425440122753082 -para_METEOR:0.15418242275887206 -para_ROUGE_L:0.3037081433191389 -para_CIDEr:0.16822639157343386 - -overall score of iter 60054: 0.41666321555983676 - -Save model at iter 60054 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. -Save Best-model at iter 60054 to checkpoint file. +ID seq2-ft(mix)-gt_percent-1.0 iter 5453 (epoch 4), +loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.113), ('loss_bbox', 0.022), ('loss_giou', 0.25), ('loss_self_iou', 0.011), ('cardinality_error', 7.519), ('loss_ce_0', 0.298), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.269), ('loss_self_iou_0', 0.011), ('cardinality_error_0', 7.519), ('loss_caption_0', 2.175), ('loss_caption', 2.176), ('total_loss', 12.088)]), +time/iter = 0.716, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 5586 (epoch 4), +loss = OrderedDict([('loss_ce', 0.294), ('loss_counter', 0.12), ('loss_bbox', 0.018), ('loss_giou', 0.252), ('loss_self_iou', 0.007), ('cardinality_error', 7.662), ('loss_ce_0', 0.292), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.274), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.662), ('loss_caption_0', 2.16), ('loss_caption', 2.132), ('total_loss', 11.979)]), +time/iter = 0.162, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 5719 (epoch 4), +loss = OrderedDict([('loss_ce', 0.305), ('loss_counter', 0.13), ('loss_bbox', 0.02), ('loss_giou', 0.255), ('loss_self_iou', 0.008), ('cardinality_error', 8.451), ('loss_ce_0', 0.302), ('loss_counter_0', 0.127), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.273), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 8.451), ('loss_caption_0', 2.166), ('loss_caption', 2.164), ('total_loss', 12.113)]), +time/iter = 0.156, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 5852 (epoch 4), +loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.12), ('loss_bbox', 0.019), ('loss_giou', 0.246), ('loss_self_iou', 0.007), ('cardinality_error', 7.835), ('loss_ce_0', 0.302), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.267), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.835), ('loss_caption_0', 2.122), ('loss_caption', 2.111), ('total_loss', 11.841)]), +time/iter = 0.157, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 5985 (epoch 4), +loss = OrderedDict([('loss_ce', 0.304), ('loss_counter', 0.122), ('loss_bbox', 0.02), ('loss_giou', 0.243), ('loss_self_iou', 0.009), ('cardinality_error', 7.474), ('loss_ce_0', 0.298), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.263), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.474), ('loss_caption_0', 2.149), ('loss_caption', 2.14), ('total_loss', 11.926)]), +time/iter = 0.155, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 6118 (epoch 4), +loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.113), ('loss_bbox', 0.018), ('loss_giou', 0.241), ('loss_self_iou', 0.008), ('cardinality_error', 7.639), ('loss_ce_0', 0.302), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.259), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.639), ('loss_caption_0', 2.235), ('loss_caption', 2.215), ('total_loss', 12.218)]), +time/iter = 0.160, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 6251 (epoch 4), +loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.125), ('loss_bbox', 0.02), ('loss_giou', 0.251), ('loss_self_iou', 0.007), ('cardinality_error', 7.857), ('loss_ce_0', 0.301), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.268), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.857), ('loss_caption_0', 2.235), ('loss_caption', 2.226), ('total_loss', 12.328)]), +time/iter = 0.155, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 6384 (epoch 4), +loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.124), ('loss_bbox', 0.02), ('loss_giou', 0.246), ('loss_self_iou', 0.006), ('cardinality_error', 7.82), ('loss_ce_0', 0.301), ('loss_counter_0', 0.124), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.265), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.82), ('loss_caption_0', 2.208), ('loss_caption', 2.183), ('total_loss', 12.157)]), +time/iter = 0.159, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 6517 (epoch 4), +loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.12), ('loss_bbox', 0.02), ('loss_giou', 0.256), ('loss_self_iou', 0.008), ('cardinality_error', 7.872), ('loss_ce_0', 0.295), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.271), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.872), ('loss_caption_0', 2.135), ('loss_caption', 2.155), ('total_loss', 11.99)]), +time/iter = 0.162, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 6650 (epoch 4), +loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.112), ('loss_bbox', 0.021), ('loss_giou', 0.244), ('loss_self_iou', 0.008), ('cardinality_error', 7.398), ('loss_ce_0', 0.297), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.26), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.398), ('loss_caption_0', 2.205), ('loss_caption', 2.202), ('total_loss', 12.127)]), +time/iter = 0.156, bad_vid = 0.000 + +Validation results of iter 6665: +Bleu_1:0.19366491706119263 +Bleu_2:0.1161802397372496 +Bleu_3:0.06381908710297783 +Bleu_4:0.0310996008751752 +METEOR:0.0900086447067842 +ROUGE_L:0.1772625018945245 +CIDEr:0.5329339889166991 +Recall:0.27822837264850414 +Precision:0.4414053002674447 +soda_c:0.0725148309247326 +para_Bleu_1:0.36779729697992286 +para_Bleu_2:0.2189609464261768 +para_Bleu_3:0.13170237886801614 +para_Bleu_4:0.08102932652379062 +para_METEOR:0.15287168689015676 +para_ROUGE_L:0.32609559286330886 +para_CIDEr:0.24981796796266917 + +overall score of iter 6665: 0.48371898137661656 + +Save model at iter 6665 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. +Save Best-model at iter 6665 to checkpoint file. Save info to info.json -ID seq2-ft(mix)-gt_percent-1.0 iter 61000 (epoch 6), -loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.117), ('loss_bbox', 0.068), ('loss_giou', 0.183), ('loss_self_iou', 0.099), ('cardinality_error', 3.687), ('loss_ce_0', 0.303), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.185), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.687), ('loss_caption_0', 3.025), ('loss_caption', 3.031), ('total_loss', 14.914)]), -time/iter = 0.715, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 62000 (epoch 6), -loss = OrderedDict([('loss_ce', 0.305), ('loss_counter', 0.125), ('loss_bbox', 0.068), ('loss_giou', 0.192), ('loss_self_iou', 0.088), ('cardinality_error', 3.809), ('loss_ce_0', 0.304), ('loss_counter_0', 0.125), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.089), ('cardinality_error_0', 3.809), ('loss_caption_0', 3.067), ('loss_caption', 3.064), ('total_loss', 15.147)]), -time/iter = 0.192, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 63000 (epoch 6), -loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.113), ('loss_bbox', 0.072), ('loss_giou', 0.189), ('loss_self_iou', 0.102), ('cardinality_error', 3.636), ('loss_ce_0', 0.301), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.193), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.636), ('loss_caption_0', 3.09), ('loss_caption', 3.083), ('total_loss', 15.188)]), -time/iter = 0.192, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 64000 (epoch 6), -loss = OrderedDict([('loss_ce', 0.308), ('loss_counter', 0.12), ('loss_bbox', 0.067), ('loss_giou', 0.185), ('loss_self_iou', 0.105), ('cardinality_error', 3.738), ('loss_ce_0', 0.309), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.186), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.738), ('loss_caption_0', 3.09), ('loss_caption', 3.088), ('total_loss', 15.193)]), -time/iter = 0.194, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 65000 (epoch 6), -loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.123), ('loss_bbox', 0.069), ('loss_giou', 0.191), ('loss_self_iou', 0.094), ('cardinality_error', 3.735), ('loss_ce_0', 0.304), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.191), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.735), ('loss_caption_0', 3.087), ('loss_caption', 3.083), ('total_loss', 15.203)]), -time/iter = 0.190, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 66000 (epoch 6), -loss = OrderedDict([('loss_ce', 0.307), ('loss_counter', 0.121), ('loss_bbox', 0.069), ('loss_giou', 0.188), ('loss_self_iou', 0.095), ('cardinality_error', 3.753), ('loss_ce_0', 0.307), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.753), ('loss_caption_0', 3.093), ('loss_caption', 3.093), ('total_loss', 15.235)]), -time/iter = 0.192, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 67000 (epoch 6), -loss = OrderedDict([('loss_ce', 0.299), ('loss_counter', 0.123), ('loss_bbox', 0.071), ('loss_giou', 0.189), ('loss_self_iou', 0.099), ('cardinality_error', 3.781), ('loss_ce_0', 0.299), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.072), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.781), ('loss_caption_0', 3.104), ('loss_caption', 3.095), ('total_loss', 15.24)]), -time/iter = 0.194, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 68000 (epoch 6), -loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.118), ('loss_bbox', 0.073), ('loss_giou', 0.186), ('loss_self_iou', 0.102), ('cardinality_error', 3.702), ('loss_ce_0', 0.3), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.073), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.702), ('loss_caption_0', 3.092), ('loss_caption', 3.087), ('total_loss', 15.171)]), -time/iter = 0.193, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 69000 (epoch 6), -loss = OrderedDict([('loss_ce', 0.304), ('loss_counter', 0.116), ('loss_bbox', 0.068), ('loss_giou', 0.184), ('loss_self_iou', 0.087), ('cardinality_error', 3.705), ('loss_ce_0', 0.303), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.088), ('cardinality_error_0', 3.705), ('loss_caption_0', 3.087), ('loss_caption', 3.084), ('total_loss', 15.154)]), -time/iter = 0.192, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 70000 (epoch 6), -loss = OrderedDict([('loss_ce', 0.308), ('loss_counter', 0.119), ('loss_bbox', 0.07), ('loss_giou', 0.188), ('loss_self_iou', 0.104), ('cardinality_error', 3.763), ('loss_ce_0', 0.309), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.763), ('loss_caption_0', 3.137), ('loss_caption', 3.142), ('total_loss', 15.421)]), -time/iter = 0.201, bad_vid = 0.000 - -Validation results of iter 70063: -Bleu_1:0.17095715677415013 -Bleu_2:0.0951967897773989 -Bleu_3:0.05145074727592996 -Bleu_4:0.026686223548170303 -METEOR:0.09033289555302068 -ROUGE_L:0.16939818741017104 -CIDEr:0.33299543538258497 -Recall:0.5001550726802355 -Precision:0.5629321740898863 -soda_c:0.05378783144134501 -para_Bleu_1:0.44719474980697405 -para_Bleu_2:0.2615784516531111 -para_Bleu_3:0.15956746990786394 -para_Bleu_4:0.09983770060804388 -para_METEOR:0.15549284849496958 -para_ROUGE_L:0.30852597622578265 -para_CIDEr:0.18758102150887232 - -overall score of iter 70063: 0.4429115706118858 - -Save model at iter 70063 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. -Save Best-model at iter 70063 to checkpoint file. +ID seq2-ft(mix)-gt_percent-1.0 iter 6783 (epoch 5), +loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.117), ('loss_bbox', 0.019), ('loss_giou', 0.24), ('loss_self_iou', 0.007), ('cardinality_error', 7.586), ('loss_ce_0', 0.29), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.257), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.586), ('loss_caption_0', 2.02), ('loss_caption', 2.014), ('total_loss', 11.332)]), +time/iter = 0.689, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 6916 (epoch 5), +loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.118), ('loss_bbox', 0.021), ('loss_giou', 0.249), ('loss_self_iou', 0.008), ('cardinality_error', 7.519), ('loss_ce_0', 0.302), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.023), ('loss_giou_0', 0.264), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.519), ('loss_caption_0', 2.118), ('loss_caption', 2.101), ('total_loss', 11.817)]), +time/iter = 0.162, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 7049 (epoch 5), +loss = OrderedDict([('loss_ce', 0.294), ('loss_counter', 0.119), ('loss_bbox', 0.019), ('loss_giou', 0.25), ('loss_self_iou', 0.007), ('cardinality_error', 7.699), ('loss_ce_0', 0.292), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.265), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.699), ('loss_caption_0', 2.105), ('loss_caption', 2.111), ('total_loss', 11.78)]), +time/iter = 0.172, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 7182 (epoch 5), +loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.115), ('loss_bbox', 0.021), ('loss_giou', 0.242), ('loss_self_iou', 0.008), ('cardinality_error', 7.594), ('loss_ce_0', 0.288), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.257), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 7.594), ('loss_caption_0', 2.194), ('loss_caption', 2.195), ('total_loss', 12.045)]), +time/iter = 0.160, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 7315 (epoch 5), +loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.123), ('loss_bbox', 0.02), ('loss_giou', 0.254), ('loss_self_iou', 0.009), ('cardinality_error', 8.301), ('loss_ce_0', 0.291), ('loss_counter_0', 0.123), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.268), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 8.301), ('loss_caption_0', 2.096), ('loss_caption', 2.09), ('total_loss', 11.741)]), +time/iter = 0.153, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 7448 (epoch 5), +loss = OrderedDict([('loss_ce', 0.296), ('loss_counter', 0.12), ('loss_bbox', 0.019), ('loss_giou', 0.234), ('loss_self_iou', 0.006), ('cardinality_error', 7.677), ('loss_ce_0', 0.292), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.251), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.677), ('loss_caption_0', 2.076), ('loss_caption', 2.063), ('total_loss', 11.513)]), +time/iter = 0.152, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 7581 (epoch 5), +loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.116), ('loss_bbox', 0.019), ('loss_giou', 0.238), ('loss_self_iou', 0.008), ('cardinality_error', 7.534), ('loss_ce_0', 0.295), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.253), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.534), ('loss_caption_0', 2.114), ('loss_caption', 2.112), ('total_loss', 11.718)]), +time/iter = 0.154, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 7714 (epoch 5), +loss = OrderedDict([('loss_ce', 0.295), ('loss_counter', 0.117), ('loss_bbox', 0.018), ('loss_giou', 0.235), ('loss_self_iou', 0.008), ('cardinality_error', 7.677), ('loss_ce_0', 0.291), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.253), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.677), ('loss_caption_0', 2.167), ('loss_caption', 2.179), ('total_loss', 11.932)]), +time/iter = 0.160, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 7847 (epoch 5), +loss = OrderedDict([('loss_ce', 0.293), ('loss_counter', 0.118), ('loss_bbox', 0.019), ('loss_giou', 0.252), ('loss_self_iou', 0.009), ('cardinality_error', 8.053), ('loss_ce_0', 0.289), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.269), ('loss_self_iou_0', 0.009), ('cardinality_error_0', 8.053), ('loss_caption_0', 2.106), ('loss_caption', 2.115), ('total_loss', 11.804)]), +time/iter = 0.155, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 7980 (epoch 5), +loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.118), ('loss_bbox', 0.019), ('loss_giou', 0.249), ('loss_self_iou', 0.007), ('cardinality_error', 7.902), ('loss_ce_0', 0.295), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.268), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.902), ('loss_caption_0', 2.151), ('loss_caption', 2.153), ('total_loss', 11.979)]), +time/iter = 0.158, bad_vid = 0.000 + +Validation results of iter 7998: +Bleu_1:0.19874944106127662 +Bleu_2:0.12266046915797622 +Bleu_3:0.07150852984916518 +Bleu_4:0.036185181004552064 +METEOR:0.09274687098087099 +ROUGE_L:0.18413336093424784 +CIDEr:0.5727051685734265 +Recall:0.259037909270404 +Precision:0.451289465457956 +soda_c:0.07263494732248185 +para_Bleu_1:0.32307562783294125 +para_Bleu_2:0.1944214796418441 +para_Bleu_3:0.11901149393254483 +para_Bleu_4:0.07454555120453704 +para_METEOR:0.14324209261218024 +para_ROUGE_L:0.31918573126228 +para_CIDEr:0.23096832321460165 + +overall score of iter 7998: 0.4487559670313189 + +Save model at iter 7998 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. Save info to info.json -ID seq2-ft(mix)-gt_percent-1.0 iter 71000 (epoch 7), -loss = OrderedDict([('loss_ce', 0.304), ('loss_counter', 0.115), ('loss_bbox', 0.067), ('loss_giou', 0.187), ('loss_self_iou', 0.091), ('cardinality_error', 3.724), ('loss_ce_0', 0.304), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.189), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.724), ('loss_caption_0', 2.994), ('loss_caption', 2.994), ('total_loss', 14.812)]), -time/iter = 0.691, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 72000 (epoch 7), -loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.118), ('loss_bbox', 0.07), ('loss_giou', 0.187), ('loss_self_iou', 0.099), ('cardinality_error', 3.665), ('loss_ce_0', 0.296), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.072), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.665), ('loss_caption_0', 2.995), ('loss_caption', 3.0), ('total_loss', 14.803)]), -time/iter = 0.193, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 73000 (epoch 7), -loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.122), ('loss_bbox', 0.067), ('loss_giou', 0.183), ('loss_self_iou', 0.099), ('cardinality_error', 3.762), ('loss_ce_0', 0.302), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.184), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.762), ('loss_caption_0', 3.03), ('loss_caption', 3.034), ('total_loss', 14.924)]), -time/iter = 0.189, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 74000 (epoch 7), -loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.12), ('loss_bbox', 0.067), ('loss_giou', 0.181), ('loss_self_iou', 0.093), ('cardinality_error', 3.722), ('loss_ce_0', 0.304), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.183), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.722), ('loss_caption_0', 3.061), ('loss_caption', 3.062), ('total_loss', 15.037)]), -time/iter = 0.192, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 75000 (epoch 7), -loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.124), ('loss_bbox', 0.069), ('loss_giou', 0.188), ('loss_self_iou', 0.097), ('cardinality_error', 3.835), ('loss_ce_0', 0.302), ('loss_counter_0', 0.124), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.835), ('loss_caption_0', 3.102), ('loss_caption', 3.108), ('total_loss', 15.261)]), -time/iter = 0.195, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 76000 (epoch 7), -loss = OrderedDict([('loss_ce', 0.304), ('loss_counter', 0.118), ('loss_bbox', 0.069), ('loss_giou', 0.19), ('loss_self_iou', 0.096), ('cardinality_error', 3.787), ('loss_ce_0', 0.305), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.787), ('loss_caption_0', 3.055), ('loss_caption', 3.056), ('total_loss', 15.081)]), -time/iter = 0.192, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 77000 (epoch 7), -loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.122), ('loss_bbox', 0.07), ('loss_giou', 0.191), ('loss_self_iou', 0.101), ('cardinality_error', 3.753), ('loss_ce_0', 0.3), ('loss_counter_0', 0.122), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.753), ('loss_caption_0', 3.064), ('loss_caption', 3.063), ('total_loss', 15.105)]), -time/iter = 0.191, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 78000 (epoch 7), -loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.118), ('loss_bbox', 0.069), ('loss_giou', 0.192), ('loss_self_iou', 0.094), ('cardinality_error', 3.812), ('loss_ce_0', 0.302), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.071), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.812), ('loss_caption_0', 3.075), ('loss_caption', 3.081), ('total_loss', 15.186)]), -time/iter = 0.192, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 79000 (epoch 7), -loss = OrderedDict([('loss_ce', 0.303), ('loss_counter', 0.119), ('loss_bbox', 0.068), ('loss_giou', 0.184), ('loss_self_iou', 0.099), ('cardinality_error', 3.712), ('loss_ce_0', 0.304), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.712), ('loss_caption_0', 3.004), ('loss_caption', 3.004), ('total_loss', 14.833)]), -time/iter = 0.189, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 80000 (epoch 7), -loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.117), ('loss_bbox', 0.068), ('loss_giou', 0.184), ('loss_self_iou', 0.099), ('cardinality_error', 3.639), ('loss_ce_0', 0.298), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.185), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.639), ('loss_caption_0', 3.011), ('loss_caption', 3.021), ('total_loss', 14.846)]), -time/iter = 0.189, bad_vid = 0.000 - -Validation results of iter 80072: -Bleu_1:0.16525493799366836 -Bleu_2:0.09017429361474327 -Bleu_3:0.04843073565357156 -Bleu_4:0.025752141227780294 -METEOR:0.09042668571725655 -ROUGE_L:0.1657835735936403 -CIDEr:0.30766696683798356 -Recall:0.5070758476264831 -Precision:0.5698723815334497 -soda_c:0.05193286444599829 -para_Bleu_1:0.4299765573510605 -para_Bleu_2:0.24998607326423264 -para_Bleu_3:0.15168978606887273 -para_Bleu_4:0.09540463753102806 -para_METEOR:0.15913054274631774 -para_ROUGE_L:0.30821511076520103 -para_CIDEr:0.14655297481419807 - -overall score of iter 80072: 0.4010881550915439 - -Save model at iter 80072 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. +ID seq2-ft(mix)-gt_percent-1.0 iter 8113 (epoch 6), +loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.114), ('loss_bbox', 0.019), ('loss_giou', 0.236), ('loss_self_iou', 0.008), ('cardinality_error', 7.617), ('loss_ce_0', 0.295), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.257), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.617), ('loss_caption_0', 2.036), ('loss_caption', 2.044), ('total_loss', 11.427)]), +time/iter = 0.677, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 8246 (epoch 6), +loss = OrderedDict([('loss_ce', 0.286), ('loss_counter', 0.119), ('loss_bbox', 0.019), ('loss_giou', 0.237), ('loss_self_iou', 0.006), ('cardinality_error', 7.827), ('loss_ce_0', 0.283), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.257), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.827), ('loss_caption_0', 2.055), ('loss_caption', 2.057), ('total_loss', 11.458)]), +time/iter = 0.160, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 8379 (epoch 6), +loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.118), ('loss_bbox', 0.018), ('loss_giou', 0.225), ('loss_self_iou', 0.005), ('cardinality_error', 7.82), ('loss_ce_0', 0.286), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.246), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.82), ('loss_caption_0', 2.046), ('loss_caption', 2.041), ('total_loss', 11.331)]), +time/iter = 0.160, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 8512 (epoch 6), +loss = OrderedDict([('loss_ce', 0.286), ('loss_counter', 0.114), ('loss_bbox', 0.018), ('loss_giou', 0.228), ('loss_self_iou', 0.006), ('cardinality_error', 7.654), ('loss_ce_0', 0.283), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.245), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.654), ('loss_caption_0', 1.991), ('loss_caption', 1.997), ('total_loss', 11.118)]), +time/iter = 0.157, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 8645 (epoch 6), +loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.115), ('loss_bbox', 0.02), ('loss_giou', 0.251), ('loss_self_iou', 0.007), ('cardinality_error', 8.068), ('loss_ce_0', 0.287), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.265), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 8.068), ('loss_caption_0', 2.094), ('loss_caption', 2.097), ('total_loss', 11.714)]), +time/iter = 0.159, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 8778 (epoch 6), +loss = OrderedDict([('loss_ce', 0.288), ('loss_counter', 0.121), ('loss_bbox', 0.019), ('loss_giou', 0.24), ('loss_self_iou', 0.008), ('cardinality_error', 8.008), ('loss_ce_0', 0.286), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.258), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 8.008), ('loss_caption_0', 2.092), ('loss_caption', 2.092), ('total_loss', 11.63)]), +time/iter = 0.155, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 8911 (epoch 6), +loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.114), ('loss_bbox', 0.019), ('loss_giou', 0.235), ('loss_self_iou', 0.008), ('cardinality_error', 7.338), ('loss_ce_0', 0.297), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.248), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.338), ('loss_caption_0', 2.051), ('loss_caption', 2.054), ('total_loss', 11.446)]), +time/iter = 0.156, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 9044 (epoch 6), +loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.105), ('loss_bbox', 0.02), ('loss_giou', 0.227), ('loss_self_iou', 0.008), ('cardinality_error', 7.226), ('loss_ce_0', 0.292), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.243), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.226), ('loss_caption_0', 2.08), ('loss_caption', 2.084), ('total_loss', 11.478)]), +time/iter = 0.160, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 9177 (epoch 6), +loss = OrderedDict([('loss_ce', 0.291), ('loss_counter', 0.12), ('loss_bbox', 0.019), ('loss_giou', 0.254), ('loss_self_iou', 0.007), ('cardinality_error', 7.977), ('loss_ce_0', 0.288), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.275), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.977), ('loss_caption_0', 2.046), ('loss_caption', 2.031), ('total_loss', 11.546)]), +time/iter = 0.158, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 9310 (epoch 6), +loss = OrderedDict([('loss_ce', 0.28), ('loss_counter', 0.117), ('loss_bbox', 0.018), ('loss_giou', 0.236), ('loss_self_iou', 0.006), ('cardinality_error', 7.97), ('loss_ce_0', 0.281), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.252), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.97), ('loss_caption_0', 1.986), ('loss_caption', 1.995), ('total_loss', 11.157)]), +time/iter = 0.152, bad_vid = 0.000 + +Validation results of iter 9331: +Bleu_1:0.2003309018825777 +Bleu_2:0.1225756065112458 +Bleu_3:0.06724461390362559 +Bleu_4:0.033684328156599955 +METEOR:0.0938288297360794 +ROUGE_L:0.1832565856913202 +CIDEr:0.5805494889367487 +Recall:0.28578288505804933 +Precision:0.4570872842207636 +soda_c:0.07457933387713374 +para_Bleu_1:0.3713316702717572 +para_Bleu_2:0.22391267992808692 +para_Bleu_3:0.1360620228892395 +para_Bleu_4:0.08475146307949002 +para_METEOR:0.15553928732702577 +para_ROUGE_L:0.3279787647771023 +para_CIDEr:0.24807495620487915 + +overall score of iter 9331: 0.4883657066113949 + +Save model at iter 9331 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. +Save Best-model at iter 9331 to checkpoint file. Save info to info.json -ID seq2-ft(mix)-gt_percent-1.0 iter 81000 (epoch 8), -loss = OrderedDict([('loss_ce', 0.3), ('loss_counter', 0.116), ('loss_bbox', 0.064), ('loss_giou', 0.177), ('loss_self_iou', 0.098), ('cardinality_error', 3.664), ('loss_ce_0', 0.3), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.664), ('loss_caption_0', 2.972), ('loss_caption', 2.974), ('total_loss', 14.63)]), -time/iter = 0.723, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 82000 (epoch 8), -loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.113), ('loss_bbox', 0.067), ('loss_giou', 0.179), ('loss_self_iou', 0.098), ('cardinality_error', 3.692), ('loss_ce_0', 0.301), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.692), ('loss_caption_0', 2.914), ('loss_caption', 2.912), ('total_loss', 14.413)]), -time/iter = 0.190, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 83000 (epoch 8), -loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.117), ('loss_bbox', 0.067), ('loss_giou', 0.188), ('loss_self_iou', 0.097), ('cardinality_error', 3.764), ('loss_ce_0', 0.298), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.764), ('loss_caption_0', 2.939), ('loss_caption', 2.933), ('total_loss', 14.562)]), -time/iter = 0.188, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 84000 (epoch 8), -loss = OrderedDict([('loss_ce', 0.299), ('loss_counter', 0.119), ('loss_bbox', 0.066), ('loss_giou', 0.18), ('loss_self_iou', 0.086), ('cardinality_error', 3.724), ('loss_ce_0', 0.3), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.086), ('cardinality_error_0', 3.724), ('loss_caption_0', 2.964), ('loss_caption', 2.963), ('total_loss', 14.614)]), -time/iter = 0.192, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 85000 (epoch 8), -loss = OrderedDict([('loss_ce', 0.301), ('loss_counter', 0.114), ('loss_bbox', 0.066), ('loss_giou', 0.187), ('loss_self_iou', 0.094), ('cardinality_error', 3.73), ('loss_ce_0', 0.301), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.189), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.73), ('loss_caption_0', 2.942), ('loss_caption', 2.945), ('total_loss', 14.596)]), -time/iter = 0.190, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 86000 (epoch 8), -loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.118), ('loss_bbox', 0.067), ('loss_giou', 0.184), ('loss_self_iou', 0.096), ('cardinality_error', 3.764), ('loss_ce_0', 0.298), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.764), ('loss_caption_0', 2.989), ('loss_caption', 2.988), ('total_loss', 14.745)]), -time/iter = 0.193, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 87000 (epoch 8), -loss = OrderedDict([('loss_ce', 0.295), ('loss_counter', 0.119), ('loss_bbox', 0.067), ('loss_giou', 0.178), ('loss_self_iou', 0.096), ('cardinality_error', 3.692), ('loss_ce_0', 0.298), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.182), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.692), ('loss_caption_0', 2.93), ('loss_caption', 2.931), ('total_loss', 14.465)]), -time/iter = 0.191, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 88000 (epoch 8), -loss = OrderedDict([('loss_ce', 0.299), ('loss_counter', 0.117), ('loss_bbox', 0.068), ('loss_giou', 0.181), ('loss_self_iou', 0.102), ('cardinality_error', 3.74), ('loss_ce_0', 0.298), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.184), ('loss_self_iou_0', 0.105), ('cardinality_error_0', 3.74), ('loss_caption_0', 2.945), ('loss_caption', 2.939), ('total_loss', 14.538)]), -time/iter = 0.189, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 89000 (epoch 8), -loss = OrderedDict([('loss_ce', 0.302), ('loss_counter', 0.124), ('loss_bbox', 0.069), ('loss_giou', 0.186), ('loss_self_iou', 0.096), ('cardinality_error', 3.911), ('loss_ce_0', 0.303), ('loss_counter_0', 0.124), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.188), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.911), ('loss_caption_0', 2.981), ('loss_caption', 2.985), ('total_loss', 14.762)]), -time/iter = 0.190, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 90000 (epoch 8), -loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.113), ('loss_bbox', 0.066), ('loss_giou', 0.174), ('loss_self_iou', 0.099), ('cardinality_error', 3.667), ('loss_ce_0', 0.3), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.667), ('loss_caption_0', 2.946), ('loss_caption', 2.945), ('total_loss', 14.493)]), -time/iter = 0.191, bad_vid = 0.000 - -Validation results of iter 90081: -Bleu_1:0.1659435247550983 -Bleu_2:0.09010888064116455 -Bleu_3:0.04740925434645997 -Bleu_4:0.023810200153797586 -METEOR:0.0893691583245007 -ROUGE_L:0.16481267120708817 -CIDEr:0.3096929324572276 -Recall:0.5271698247293078 -Precision:0.5766981899532185 -soda_c:0.05637593299631936 -para_Bleu_1:0.4507795558374508 -para_Bleu_2:0.2668765313566654 -para_Bleu_3:0.16324000259413463 -para_Bleu_4:0.10292908422008885 -para_METEOR:0.163503434468027 -para_ROUGE_L:0.3141109355407807 -para_CIDEr:0.1830754815850521 - -overall score of iter 90081: 0.44950800027316795 - -Save model at iter 90081 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. -Save Best-model at iter 90081 to checkpoint file. +ID seq2-ft(mix)-gt_percent-1.0 iter 9443 (epoch 7), +loss = OrderedDict([('loss_ce', 0.294), ('loss_counter', 0.114), ('loss_bbox', 0.018), ('loss_giou', 0.226), ('loss_self_iou', 0.006), ('cardinality_error', 7.617), ('loss_ce_0', 0.292), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.239), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.617), ('loss_caption_0', 2.065), ('loss_caption', 2.061), ('total_loss', 11.394)]), +time/iter = 0.717, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 9576 (epoch 7), +loss = OrderedDict([('loss_ce', 0.285), ('loss_counter', 0.119), ('loss_bbox', 0.02), ('loss_giou', 0.231), ('loss_self_iou', 0.006), ('cardinality_error', 7.917), ('loss_ce_0', 0.284), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.252), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.917), ('loss_caption_0', 1.977), ('loss_caption', 1.974), ('total_loss', 11.093)]), +time/iter = 0.165, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 9709 (epoch 7), +loss = OrderedDict([('loss_ce', 0.291), ('loss_counter', 0.117), ('loss_bbox', 0.016), ('loss_giou', 0.224), ('loss_self_iou', 0.006), ('cardinality_error', 8.098), ('loss_ce_0', 0.29), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.242), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 8.098), ('loss_caption_0', 2.051), ('loss_caption', 2.063), ('total_loss', 11.373)]), +time/iter = 0.170, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 9842 (epoch 7), +loss = OrderedDict([('loss_ce', 0.288), ('loss_counter', 0.11), ('loss_bbox', 0.018), ('loss_giou', 0.242), ('loss_self_iou', 0.007), ('cardinality_error', 7.662), ('loss_ce_0', 0.286), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.262), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.662), ('loss_caption_0', 1.939), ('loss_caption', 1.953), ('total_loss', 11.058)]), +time/iter = 0.169, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 9975 (epoch 7), +loss = OrderedDict([('loss_ce', 0.28), ('loss_counter', 0.116), ('loss_bbox', 0.017), ('loss_giou', 0.238), ('loss_self_iou', 0.006), ('cardinality_error', 8.233), ('loss_ce_0', 0.281), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.255), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 8.233), ('loss_caption_0', 2.024), ('loss_caption', 2.026), ('total_loss', 11.31)]), +time/iter = 0.167, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 10108 (epoch 7), +loss = OrderedDict([('loss_ce', 0.283), ('loss_counter', 0.111), ('loss_bbox', 0.018), ('loss_giou', 0.232), ('loss_self_iou', 0.006), ('cardinality_error', 7.466), ('loss_ce_0', 0.279), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.246), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.466), ('loss_caption_0', 1.878), ('loss_caption', 1.882), ('total_loss', 10.667)]), +time/iter = 0.162, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 10241 (epoch 7), +loss = OrderedDict([('loss_ce', 0.285), ('loss_counter', 0.119), ('loss_bbox', 0.018), ('loss_giou', 0.24), ('loss_self_iou', 0.007), ('cardinality_error', 7.722), ('loss_ce_0', 0.282), ('loss_counter_0', 0.119), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.253), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.722), ('loss_caption_0', 1.984), ('loss_caption', 1.988), ('total_loss', 11.165)]), +time/iter = 0.162, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 10374 (epoch 7), +loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.113), ('loss_bbox', 0.017), ('loss_giou', 0.225), ('loss_self_iou', 0.007), ('cardinality_error', 7.692), ('loss_ce_0', 0.285), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.241), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.692), ('loss_caption_0', 2.089), ('loss_caption', 2.094), ('total_loss', 11.498)]), +time/iter = 0.164, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 10507 (epoch 7), +loss = OrderedDict([('loss_ce', 0.287), ('loss_counter', 0.113), ('loss_bbox', 0.019), ('loss_giou', 0.22), ('loss_self_iou', 0.007), ('cardinality_error', 7.564), ('loss_ce_0', 0.283), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.021), ('loss_giou_0', 0.241), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.564), ('loss_caption_0', 1.936), ('loss_caption', 1.935), ('total_loss', 10.84)]), +time/iter = 0.165, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 10640 (epoch 7), +loss = OrderedDict([('loss_ce', 0.281), ('loss_counter', 0.115), ('loss_bbox', 0.02), ('loss_giou', 0.232), ('loss_self_iou', 0.008), ('cardinality_error', 7.549), ('loss_ce_0', 0.278), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.022), ('loss_giou_0', 0.249), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.549), ('loss_caption_0', 2.041), ('loss_caption', 2.042), ('total_loss', 11.323)]), +time/iter = 0.178, bad_vid = 0.000 + +Validation results of iter 10664: +Bleu_1:0.19584871429233122 +Bleu_2:0.1203954133477019 +Bleu_3:0.06765236989260215 +Bleu_4:0.03515047236439923 +METEOR:0.09347581038898298 +ROUGE_L:0.18336361365161372 +CIDEr:0.5642570328531701 +Recall:0.287053410514844 +Precision:0.4506790316418327 +soda_c:0.07315525040409161 +para_Bleu_1:0.39595219023577966 +para_Bleu_2:0.23717913606151478 +para_Bleu_3:0.14480681642134902 +para_Bleu_4:0.0901695364250172 +para_METEOR:0.16127903027678414 +para_ROUGE_L:0.3324403291093838 +para_CIDEr:0.23804687234043756 + +overall score of iter 10664: 0.48949543904223886 + +Save model at iter 10664 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. +Save Best-model at iter 10664 to checkpoint file. Save info to info.json -ID seq2-ft(mix)-gt_percent-1.0 iter 91000 (epoch 9), -loss = OrderedDict([('loss_ce', 0.296), ('loss_counter', 0.121), ('loss_bbox', 0.066), ('loss_giou', 0.179), ('loss_self_iou', 0.097), ('cardinality_error', 3.807), ('loss_ce_0', 0.298), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.182), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.807), ('loss_caption_0', 2.916), ('loss_caption', 2.914), ('total_loss', 14.411)]), +ID seq2-ft(mix)-gt_percent-1.0 iter 10773 (epoch 8), +loss = OrderedDict([('loss_ce', 0.279), ('loss_counter', 0.114), ('loss_bbox', 0.017), ('loss_giou', 0.235), ('loss_self_iou', 0.006), ('cardinality_error', 7.94), ('loss_ce_0', 0.278), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.253), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.94), ('loss_caption_0', 1.851), ('loss_caption', 1.84), ('total_loss', 10.561)]), time/iter = 0.724, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 92000 (epoch 9), -loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.121), ('loss_bbox', 0.067), ('loss_giou', 0.179), ('loss_self_iou', 0.093), ('cardinality_error', 3.784), ('loss_ce_0', 0.298), ('loss_counter_0', 0.121), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.182), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.784), ('loss_caption_0', 2.916), ('loss_caption', 2.915), ('total_loss', 14.422)]), -time/iter = 0.192, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 93000 (epoch 9), -loss = OrderedDict([('loss_ce', 0.298), ('loss_counter', 0.117), ('loss_bbox', 0.065), ('loss_giou', 0.18), ('loss_self_iou', 0.091), ('cardinality_error', 3.806), ('loss_ce_0', 0.3), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.183), ('loss_self_iou_0', 0.091), ('cardinality_error_0', 3.806), ('loss_caption_0', 2.9), ('loss_caption', 2.905), ('total_loss', 14.377)]), -time/iter = 0.189, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 94000 (epoch 9), -loss = OrderedDict([('loss_ce', 0.293), ('loss_counter', 0.109), ('loss_bbox', 0.068), ('loss_giou', 0.174), ('loss_self_iou', 0.105), ('cardinality_error', 3.616), ('loss_ce_0', 0.293), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.106), ('cardinality_error_0', 3.616), ('loss_caption_0', 2.912), ('loss_caption', 2.914), ('total_loss', 14.339)]), -time/iter = 0.187, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 95000 (epoch 9), -loss = OrderedDict([('loss_ce', 0.295), ('loss_counter', 0.12), ('loss_bbox', 0.066), ('loss_giou', 0.185), ('loss_self_iou', 0.093), ('cardinality_error', 3.805), ('loss_ce_0', 0.296), ('loss_counter_0', 0.12), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.805), ('loss_caption_0', 2.938), ('loss_caption', 2.941), ('total_loss', 14.546)]), -time/iter = 0.188, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 96000 (epoch 9), -loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.114), ('loss_bbox', 0.069), ('loss_giou', 0.177), ('loss_self_iou', 0.103), ('cardinality_error', 3.684), ('loss_ce_0', 0.293), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.105), ('cardinality_error_0', 3.684), ('loss_caption_0', 2.928), ('loss_caption', 2.931), ('total_loss', 14.434)]), -time/iter = 0.193, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 97000 (epoch 9), -loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.111), ('loss_bbox', 0.066), ('loss_giou', 0.184), ('loss_self_iou', 0.095), ('cardinality_error', 3.693), ('loss_ce_0', 0.298), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.693), ('loss_caption_0', 2.902), ('loss_caption', 2.903), ('total_loss', 14.392)]), -time/iter = 0.190, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 98000 (epoch 9), -loss = OrderedDict([('loss_ce', 0.296), ('loss_counter', 0.115), ('loss_bbox', 0.068), ('loss_giou', 0.181), ('loss_self_iou', 0.089), ('cardinality_error', 3.738), ('loss_ce_0', 0.298), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.184), ('loss_self_iou_0', 0.09), ('cardinality_error_0', 3.738), ('loss_caption_0', 2.896), ('loss_caption', 2.902), ('total_loss', 14.361)]), -time/iter = 0.188, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 99000 (epoch 9), -loss = OrderedDict([('loss_ce', 0.295), ('loss_counter', 0.115), ('loss_bbox', 0.064), ('loss_giou', 0.174), ('loss_self_iou', 0.095), ('cardinality_error', 3.702), ('loss_ce_0', 0.296), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.702), ('loss_caption_0', 2.956), ('loss_caption', 2.956), ('total_loss', 14.525)]), -time/iter = 0.195, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 100000 (epoch 9), -loss = OrderedDict([('loss_ce', 0.296), ('loss_counter', 0.114), ('loss_bbox', 0.066), ('loss_giou', 0.177), ('loss_self_iou', 0.092), ('cardinality_error', 3.751), ('loss_ce_0', 0.298), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.751), ('loss_caption_0', 2.932), ('loss_caption', 2.932), ('total_loss', 14.453)]), -time/iter = 0.191, bad_vid = 0.000 - -Validation results of iter 100090: -Bleu_1:0.16664911544364056 -Bleu_2:0.09023295213839283 -Bleu_3:0.04763940550902772 -Bleu_4:0.02409205514859969 -METEOR:0.0878588871148787 -ROUGE_L:0.16401896184386325 -CIDEr:0.31947446694949533 -Recall:0.5282742157284517 -Precision:0.5750796556165633 -soda_c:0.05745241491068406 -para_Bleu_1:0.46204429574393835 -para_Bleu_2:0.2749900961045832 -para_Bleu_3:0.1683879565471281 -para_Bleu_4:0.10624339593597942 -para_METEOR:0.16245439213508253 -para_ROUGE_L:0.3162965936511474 -para_CIDEr:0.20803178964320856 - -overall score of iter 100090: 0.4767295777142705 - -Save model at iter 100090 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. -Save Best-model at iter 100090 to checkpoint file. +ID seq2-ft(mix)-gt_percent-1.0 iter 10906 (epoch 8), +loss = OrderedDict([('loss_ce', 0.279), ('loss_counter', 0.109), ('loss_bbox', 0.017), ('loss_giou', 0.215), ('loss_self_iou', 0.006), ('cardinality_error', 7.218), ('loss_ce_0', 0.278), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.231), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.218), ('loss_caption_0', 1.945), ('loss_caption', 1.948), ('total_loss', 10.791)]), +time/iter = 0.165, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 11039 (epoch 8), +loss = OrderedDict([('loss_ce', 0.288), ('loss_counter', 0.108), ('loss_bbox', 0.017), ('loss_giou', 0.207), ('loss_self_iou', 0.006), ('cardinality_error', 7.579), ('loss_ce_0', 0.283), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.223), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.579), ('loss_caption_0', 1.92), ('loss_caption', 1.927), ('total_loss', 10.664)]), +time/iter = 0.165, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 11172 (epoch 8), +loss = OrderedDict([('loss_ce', 0.28), ('loss_counter', 0.11), ('loss_bbox', 0.018), ('loss_giou', 0.215), ('loss_self_iou', 0.006), ('cardinality_error', 7.451), ('loss_ce_0', 0.279), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.231), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.451), ('loss_caption_0', 1.91), ('loss_caption', 1.9), ('total_loss', 10.635)]), +time/iter = 0.155, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 11305 (epoch 8), +loss = OrderedDict([('loss_ce', 0.278), ('loss_counter', 0.125), ('loss_bbox', 0.017), ('loss_giou', 0.233), ('loss_self_iou', 0.006), ('cardinality_error', 8.09), ('loss_ce_0', 0.276), ('loss_counter_0', 0.126), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.244), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 8.09), ('loss_caption_0', 1.876), ('loss_caption', 1.877), ('total_loss', 10.648)]), +time/iter = 0.152, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 11438 (epoch 8), +loss = OrderedDict([('loss_ce', 0.273), ('loss_counter', 0.113), ('loss_bbox', 0.016), ('loss_giou', 0.211), ('loss_self_iou', 0.005), ('cardinality_error', 7.744), ('loss_ce_0', 0.269), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.231), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.744), ('loss_caption_0', 1.981), ('loss_caption', 1.968), ('total_loss', 10.865)]), +time/iter = 0.156, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 11571 (epoch 8), +loss = OrderedDict([('loss_ce', 0.281), ('loss_counter', 0.114), ('loss_bbox', 0.018), ('loss_giou', 0.225), ('loss_self_iou', 0.006), ('cardinality_error', 7.699), ('loss_ce_0', 0.277), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.02), ('loss_giou_0', 0.243), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.699), ('loss_caption_0', 1.833), ('loss_caption', 1.846), ('total_loss', 10.461)]), +time/iter = 0.149, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 11704 (epoch 8), +loss = OrderedDict([('loss_ce', 0.28), ('loss_counter', 0.115), ('loss_bbox', 0.017), ('loss_giou', 0.21), ('loss_self_iou', 0.006), ('cardinality_error', 7.82), ('loss_ce_0', 0.278), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.226), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.82), ('loss_caption_0', 1.91), ('loss_caption', 1.915), ('total_loss', 10.628)]), +time/iter = 0.159, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 11837 (epoch 8), +loss = OrderedDict([('loss_ce', 0.271), ('loss_counter', 0.111), ('loss_bbox', 0.016), ('loss_giou', 0.215), ('loss_self_iou', 0.007), ('cardinality_error', 8.0), ('loss_ce_0', 0.273), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.23), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 8.0), ('loss_caption_0', 1.936), ('loss_caption', 1.939), ('total_loss', 10.726)]), +time/iter = 0.156, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 11970 (epoch 8), +loss = OrderedDict([('loss_ce', 0.272), ('loss_counter', 0.115), ('loss_bbox', 0.017), ('loss_giou', 0.22), ('loss_self_iou', 0.006), ('cardinality_error', 8.158), ('loss_ce_0', 0.27), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.242), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 8.158), ('loss_caption_0', 1.953), ('loss_caption', 1.962), ('total_loss', 10.881)]), +time/iter = 0.168, bad_vid = 0.000 + +Validation results of iter 11997: +Bleu_1:0.19696025394358163 +Bleu_2:0.12042554867022627 +Bleu_3:0.06805715701089529 +Bleu_4:0.034063345644385214 +METEOR:0.09208296372249718 +ROUGE_L:0.1803782633150628 +CIDEr:0.5812603125344058 +Recall:0.29169024735901117 +Precision:0.44299129936438486 +soda_c:0.07606608300691252 +para_Bleu_1:0.383549187276652 +para_Bleu_2:0.23192713278728125 +para_Bleu_3:0.14217181061136971 +para_Bleu_4:0.0892715976218228 +para_METEOR:0.16074434603101373 +para_ROUGE_L:0.3336567463040183 +para_CIDEr:0.2859809872200661 + +overall score of iter 11997: 0.5359969308729027 + +Save model at iter 11997 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. +Save Best-model at iter 11997 to checkpoint file. Save info to info.json -ID seq2-ft(mix)-gt_percent-1.0 iter 101000 (epoch 10), -loss = OrderedDict([('loss_ce', 0.29), ('loss_counter', 0.111), ('loss_bbox', 0.065), ('loss_giou', 0.173), ('loss_self_iou', 0.093), ('cardinality_error', 3.699), ('loss_ce_0', 0.292), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.699), ('loss_caption_0', 2.849), ('loss_caption', 2.847), ('total_loss', 14.064)]), -time/iter = 0.713, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 102000 (epoch 10), -loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.116), ('loss_bbox', 0.065), ('loss_giou', 0.174), ('loss_self_iou', 0.093), ('cardinality_error', 3.695), ('loss_ce_0', 0.293), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.695), ('loss_caption_0', 2.85), ('loss_caption', 2.848), ('total_loss', 14.087)]), -time/iter = 0.189, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 103000 (epoch 10), -loss = OrderedDict([('loss_ce', 0.293), ('loss_counter', 0.115), ('loss_bbox', 0.066), ('loss_giou', 0.173), ('loss_self_iou', 0.093), ('cardinality_error', 3.724), ('loss_ce_0', 0.293), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.724), ('loss_caption_0', 2.846), ('loss_caption', 2.854), ('total_loss', 14.092)]), -time/iter = 0.190, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 104000 (epoch 10), -loss = OrderedDict([('loss_ce', 0.289), ('loss_counter', 0.113), ('loss_bbox', 0.064), ('loss_giou', 0.178), ('loss_self_iou', 0.097), ('cardinality_error', 3.736), ('loss_ce_0', 0.29), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.736), ('loss_caption_0', 2.916), ('loss_caption', 2.913), ('total_loss', 14.362)]), -time/iter = 0.191, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 105000 (epoch 10), -loss = OrderedDict([('loss_ce', 0.288), ('loss_counter', 0.117), ('loss_bbox', 0.067), ('loss_giou', 0.18), ('loss_self_iou', 0.091), ('cardinality_error', 3.736), ('loss_ce_0', 0.29), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.183), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.736), ('loss_caption_0', 2.907), ('loss_caption', 2.902), ('total_loss', 14.342)]), -time/iter = 0.189, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 106000 (epoch 10), -loss = OrderedDict([('loss_ce', 0.292), ('loss_counter', 0.113), ('loss_bbox', 0.068), ('loss_giou', 0.184), ('loss_self_iou', 0.11), ('cardinality_error', 3.775), ('loss_ce_0', 0.293), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.069), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.11), ('cardinality_error_0', 3.775), ('loss_caption_0', 2.876), ('loss_caption', 2.875), ('total_loss', 14.264)]), -time/iter = 0.189, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 107000 (epoch 10), -loss = OrderedDict([('loss_ce', 0.291), ('loss_counter', 0.114), ('loss_bbox', 0.069), ('loss_giou', 0.178), ('loss_self_iou', 0.099), ('cardinality_error', 3.743), ('loss_ce_0', 0.291), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.07), ('loss_giou_0', 0.183), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.743), ('loss_caption_0', 2.91), ('loss_caption', 2.909), ('total_loss', 14.358)]), -time/iter = 0.191, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 108000 (epoch 10), -loss = OrderedDict([('loss_ce', 0.295), ('loss_counter', 0.118), ('loss_bbox', 0.066), ('loss_giou', 0.177), ('loss_self_iou', 0.1), ('cardinality_error', 3.81), ('loss_ce_0', 0.296), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.81), ('loss_caption_0', 2.928), ('loss_caption', 2.93), ('total_loss', 14.446)]), -time/iter = 0.194, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 109000 (epoch 10), -loss = OrderedDict([('loss_ce', 0.294), ('loss_counter', 0.118), ('loss_bbox', 0.063), ('loss_giou', 0.178), ('loss_self_iou', 0.091), ('cardinality_error', 3.78), ('loss_ce_0', 0.296), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.182), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.78), ('loss_caption_0', 2.916), ('loss_caption', 2.912), ('total_loss', 14.396)]), -time/iter = 0.193, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 110000 (epoch 10), -loss = OrderedDict([('loss_ce', 0.297), ('loss_counter', 0.113), ('loss_bbox', 0.064), ('loss_giou', 0.178), ('loss_self_iou', 0.087), ('cardinality_error', 3.72), ('loss_ce_0', 0.297), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.184), ('loss_self_iou_0', 0.088), ('cardinality_error_0', 3.72), ('loss_caption_0', 2.948), ('loss_caption', 2.948), ('total_loss', 14.539)]), -time/iter = 0.196, bad_vid = 0.000 - -Validation results of iter 110099: -Bleu_1:0.1671778590456048 -Bleu_2:0.09077014613023152 -Bleu_3:0.0476684747303012 -Bleu_4:0.02445564298599047 -METEOR:0.08933235383587503 -ROUGE_L:0.1654660162888944 -CIDEr:0.31886265111118334 -Recall:0.5314017615268335 -Precision:0.5831469052945512 -soda_c:0.05853263249839839 -para_Bleu_1:0.46544090189732323 -para_Bleu_2:0.2789325258737778 -para_Bleu_3:0.17172911957785325 -para_Bleu_4:0.10903514181091935 -para_METEOR:0.16550159188298816 -para_ROUGE_L:0.3181118223429575 -para_CIDEr:0.2056618808195008 - -overall score of iter 110099: 0.4801986145134083 - -Save model at iter 110099 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. -Save Best-model at iter 110099 to checkpoint file. +ID seq2-ft(mix)-gt_percent-1.0 iter 12103 (epoch 9), +loss = OrderedDict([('loss_ce', 0.275), ('loss_counter', 0.111), ('loss_bbox', 0.016), ('loss_giou', 0.216), ('loss_self_iou', 0.006), ('cardinality_error', 8.038), ('loss_ce_0', 0.274), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.231), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 8.038), ('loss_caption_0', 1.832), ('loss_caption', 1.845), ('total_loss', 10.35)]), +time/iter = 0.705, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 12236 (epoch 9), +loss = OrderedDict([('loss_ce', 0.272), ('loss_counter', 0.111), ('loss_bbox', 0.016), ('loss_giou', 0.206), ('loss_self_iou', 0.005), ('cardinality_error', 7.812), ('loss_ce_0', 0.266), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.223), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.812), ('loss_caption_0', 1.968), ('loss_caption', 1.959), ('total_loss', 10.757)]), +time/iter = 0.166, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 12369 (epoch 9), +loss = OrderedDict([('loss_ce', 0.273), ('loss_counter', 0.118), ('loss_bbox', 0.016), ('loss_giou', 0.21), ('loss_self_iou', 0.005), ('cardinality_error', 7.827), ('loss_ce_0', 0.27), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.226), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.827), ('loss_caption_0', 1.89), ('loss_caption', 1.903), ('total_loss', 10.534)]), +time/iter = 0.158, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 12502 (epoch 9), +loss = OrderedDict([('loss_ce', 0.27), ('loss_counter', 0.108), ('loss_bbox', 0.016), ('loss_giou', 0.205), ('loss_self_iou', 0.006), ('cardinality_error', 7.684), ('loss_ce_0', 0.268), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.224), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.684), ('loss_caption_0', 1.903), ('loss_caption', 1.905), ('total_loss', 10.519)]), +time/iter = 0.160, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 12635 (epoch 9), +loss = OrderedDict([('loss_ce', 0.27), ('loss_counter', 0.111), ('loss_bbox', 0.015), ('loss_giou', 0.218), ('loss_self_iou', 0.005), ('cardinality_error', 7.947), ('loss_ce_0', 0.269), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.232), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.947), ('loss_caption_0', 1.822), ('loss_caption', 1.826), ('total_loss', 10.284)]), +time/iter = 0.158, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 12768 (epoch 9), +loss = OrderedDict([('loss_ce', 0.277), ('loss_counter', 0.111), ('loss_bbox', 0.017), ('loss_giou', 0.219), ('loss_self_iou', 0.008), ('cardinality_error', 7.669), ('loss_ce_0', 0.276), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.235), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.669), ('loss_caption_0', 1.905), ('loss_caption', 1.909), ('total_loss', 10.662)]), +time/iter = 0.160, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 12901 (epoch 9), +loss = OrderedDict([('loss_ce', 0.269), ('loss_counter', 0.106), ('loss_bbox', 0.015), ('loss_giou', 0.208), ('loss_self_iou', 0.005), ('cardinality_error', 7.639), ('loss_ce_0', 0.267), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.224), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.639), ('loss_caption_0', 1.856), ('loss_caption', 1.863), ('total_loss', 10.344)]), +time/iter = 0.154, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 13034 (epoch 9), +loss = OrderedDict([('loss_ce', 0.273), ('loss_counter', 0.112), ('loss_bbox', 0.015), ('loss_giou', 0.216), ('loss_self_iou', 0.005), ('cardinality_error', 7.85), ('loss_ce_0', 0.274), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.231), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.85), ('loss_caption_0', 1.841), ('loss_caption', 1.841), ('total_loss', 10.356)]), +time/iter = 0.149, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 13167 (epoch 9), +loss = OrderedDict([('loss_ce', 0.275), ('loss_counter', 0.109), ('loss_bbox', 0.018), ('loss_giou', 0.21), ('loss_self_iou', 0.005), ('cardinality_error', 7.406), ('loss_ce_0', 0.273), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.226), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.406), ('loss_caption_0', 1.931), ('loss_caption', 1.927), ('total_loss', 10.663)]), +time/iter = 0.162, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 13300 (epoch 9), +loss = OrderedDict([('loss_ce', 0.274), ('loss_counter', 0.113), ('loss_bbox', 0.017), ('loss_giou', 0.212), ('loss_self_iou', 0.005), ('cardinality_error', 7.737), ('loss_ce_0', 0.272), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.019), ('loss_giou_0', 0.23), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.737), ('loss_caption_0', 1.853), ('loss_caption', 1.849), ('total_loss', 10.379)]), +time/iter = 0.154, bad_vid = 0.000 + +Validation results of iter 13330: +Bleu_1:0.20446290018298774 +Bleu_2:0.12418412895577716 +Bleu_3:0.06899010124646034 +Bleu_4:0.03428116460131532 +METEOR:0.09595521703655657 +ROUGE_L:0.1876517650928566 +CIDEr:0.5887832993219201 +Recall:0.3017153873964599 +Precision:0.4588439095550697 +soda_c:0.07875391677883807 +para_Bleu_1:0.3953706124668704 +para_Bleu_2:0.24043007714841402 +para_Bleu_3:0.14833197751929023 +para_Bleu_4:0.09386644902900565 +para_METEOR:0.16476396966168239 +para_ROUGE_L:0.33760319454244797 +para_CIDEr:0.31194480042956774 + +overall score of iter 13330: 0.5705752191202558 + +Save model at iter 13330 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. +Save Best-model at iter 13330 to checkpoint file. Save info to info.json -ID seq2-ft(mix)-gt_percent-1.0 iter 111000 (epoch 11), -loss = OrderedDict([('loss_ce', 0.286), ('loss_counter', 0.114), ('loss_bbox', 0.066), ('loss_giou', 0.173), ('loss_self_iou', 0.095), ('cardinality_error', 3.718), ('loss_ce_0', 0.287), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.718), ('loss_caption_0', 2.867), ('loss_caption', 2.869), ('total_loss', 14.14)]), -time/iter = 0.727, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 112000 (epoch 11), -loss = OrderedDict([('loss_ce', 0.287), ('loss_counter', 0.111), ('loss_bbox', 0.064), ('loss_giou', 0.169), ('loss_self_iou', 0.098), ('cardinality_error', 3.725), ('loss_ce_0', 0.289), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.725), ('loss_caption_0', 2.844), ('loss_caption', 2.842), ('total_loss', 14.015)]), -time/iter = 0.189, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 113000 (epoch 11), -loss = OrderedDict([('loss_ce', 0.284), ('loss_counter', 0.111), ('loss_bbox', 0.064), ('loss_giou', 0.172), ('loss_self_iou', 0.097), ('cardinality_error', 3.734), ('loss_ce_0', 0.286), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.734), ('loss_caption_0', 2.837), ('loss_caption', 2.834), ('total_loss', 13.981)]), -time/iter = 0.188, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 114000 (epoch 11), -loss = OrderedDict([('loss_ce', 0.283), ('loss_counter', 0.112), ('loss_bbox', 0.064), ('loss_giou', 0.174), ('loss_self_iou', 0.096), ('cardinality_error', 3.739), ('loss_ce_0', 0.285), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.18), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.739), ('loss_caption_0', 2.855), ('loss_caption', 2.857), ('total_loss', 14.084)]), -time/iter = 0.191, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 115000 (epoch 11), -loss = OrderedDict([('loss_ce', 0.284), ('loss_counter', 0.111), ('loss_bbox', 0.064), ('loss_giou', 0.175), ('loss_self_iou', 0.092), ('cardinality_error', 3.74), ('loss_ce_0', 0.284), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.18), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.74), ('loss_caption_0', 2.823), ('loss_caption', 2.824), ('total_loss', 13.959)]), -time/iter = 0.190, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 116000 (epoch 11), -loss = OrderedDict([('loss_ce', 0.286), ('loss_counter', 0.113), ('loss_bbox', 0.065), ('loss_giou', 0.177), ('loss_self_iou', 0.088), ('cardinality_error', 3.753), ('loss_ce_0', 0.288), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.088), ('cardinality_error_0', 3.753), ('loss_caption_0', 2.846), ('loss_caption', 2.843), ('total_loss', 14.073)]), -time/iter = 0.192, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 117000 (epoch 11), -loss = OrderedDict([('loss_ce', 0.285), ('loss_counter', 0.113), ('loss_bbox', 0.064), ('loss_giou', 0.174), ('loss_self_iou', 0.096), ('cardinality_error', 3.755), ('loss_ce_0', 0.287), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.755), ('loss_caption_0', 2.804), ('loss_caption', 2.81), ('total_loss', 13.896)]), -time/iter = 0.189, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 118000 (epoch 11), -loss = OrderedDict([('loss_ce', 0.284), ('loss_counter', 0.109), ('loss_bbox', 0.066), ('loss_giou', 0.175), ('loss_self_iou', 0.093), ('cardinality_error', 3.715), ('loss_ce_0', 0.285), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.715), ('loss_caption_0', 2.863), ('loss_caption', 2.866), ('total_loss', 14.129)]), -time/iter = 0.193, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 119000 (epoch 11), -loss = OrderedDict([('loss_ce', 0.286), ('loss_counter', 0.114), ('loss_bbox', 0.064), ('loss_giou', 0.176), ('loss_self_iou', 0.098), ('cardinality_error', 3.735), ('loss_ce_0', 0.287), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.735), ('loss_caption_0', 2.844), ('loss_caption', 2.843), ('total_loss', 14.061)]), -time/iter = 0.191, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 120000 (epoch 11), -loss = OrderedDict([('loss_ce', 0.284), ('loss_counter', 0.113), ('loss_bbox', 0.065), ('loss_giou', 0.175), ('loss_self_iou', 0.101), ('cardinality_error', 3.755), ('loss_ce_0', 0.285), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.755), ('loss_caption_0', 2.868), ('loss_caption', 2.878), ('total_loss', 14.168)]), -time/iter = 0.190, bad_vid = 0.000 - -Validation results of iter 120108: -Bleu_1:0.16560019346009094 -Bleu_2:0.08934946581658681 -Bleu_3:0.04692472826903507 -Bleu_4:0.023331060597699706 -METEOR:0.08861943572471001 -ROUGE_L:0.16392659155605854 -CIDEr:0.31177527957257306 -Recall:0.5248955646301546 -Precision:0.5713061826316813 -soda_c:0.056694173808073595 -para_Bleu_1:0.45551540477127933 -para_Bleu_2:0.2725270289009415 -para_Bleu_3:0.16731081427102573 -para_Bleu_4:0.10555679460767188 -para_METEOR:0.1665724805603667 -para_ROUGE_L:0.31619749898051375 -para_CIDEr:0.19719071969736374 - -overall score of iter 120108: 0.4693199948654023 - -Save model at iter 120108 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. +ID seq2-ft(mix)-gt_percent-1.0 iter 13433 (epoch 10), +loss = OrderedDict([('loss_ce', 0.271), ('loss_counter', 0.112), ('loss_bbox', 0.017), ('loss_giou', 0.217), ('loss_self_iou', 0.006), ('cardinality_error', 7.835), ('loss_ce_0', 0.267), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.235), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.835), ('loss_caption_0', 1.804), ('loss_caption', 1.811), ('total_loss', 10.223)]), +time/iter = 0.700, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 13566 (epoch 10), +loss = OrderedDict([('loss_ce', 0.266), ('loss_counter', 0.116), ('loss_bbox', 0.015), ('loss_giou', 0.204), ('loss_self_iou', 0.005), ('cardinality_error', 7.774), ('loss_ce_0', 0.266), ('loss_counter_0', 0.116), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.221), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.774), ('loss_caption_0', 1.884), ('loss_caption', 1.887), ('total_loss', 10.42)]), +time/iter = 0.155, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 13699 (epoch 10), +loss = OrderedDict([('loss_ce', 0.261), ('loss_counter', 0.111), ('loss_bbox', 0.016), ('loss_giou', 0.201), ('loss_self_iou', 0.006), ('cardinality_error', 7.729), ('loss_ce_0', 0.259), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.218), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.729), ('loss_caption_0', 1.823), ('loss_caption', 1.806), ('total_loss', 10.083)]), +time/iter = 0.158, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 13832 (epoch 10), +loss = OrderedDict([('loss_ce', 0.269), ('loss_counter', 0.111), ('loss_bbox', 0.016), ('loss_giou', 0.211), ('loss_self_iou', 0.005), ('cardinality_error', 7.699), ('loss_ce_0', 0.271), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.228), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.699), ('loss_caption_0', 1.855), ('loss_caption', 1.857), ('total_loss', 10.374)]), +time/iter = 0.164, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 13965 (epoch 10), +loss = OrderedDict([('loss_ce', 0.275), ('loss_counter', 0.105), ('loss_bbox', 0.016), ('loss_giou', 0.196), ('loss_self_iou', 0.006), ('cardinality_error', 7.128), ('loss_ce_0', 0.271), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.214), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.128), ('loss_caption_0', 1.809), ('loss_caption', 1.8), ('total_loss', 10.055)]), +time/iter = 0.151, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 14098 (epoch 10), +loss = OrderedDict([('loss_ce', 0.273), ('loss_counter', 0.112), ('loss_bbox', 0.016), ('loss_giou', 0.213), ('loss_self_iou', 0.007), ('cardinality_error', 7.925), ('loss_ce_0', 0.273), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.23), ('loss_self_iou_0', 0.008), ('cardinality_error_0', 7.925), ('loss_caption_0', 1.863), ('loss_caption', 1.863), ('total_loss', 10.433)]), +time/iter = 0.155, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 14231 (epoch 10), +loss = OrderedDict([('loss_ce', 0.261), ('loss_counter', 0.113), ('loss_bbox', 0.017), ('loss_giou', 0.212), ('loss_self_iou', 0.007), ('cardinality_error', 7.82), ('loss_ce_0', 0.262), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.018), ('loss_giou_0', 0.222), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.82), ('loss_caption_0', 1.936), ('loss_caption', 1.929), ('total_loss', 10.624)]), +time/iter = 0.161, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 14364 (epoch 10), +loss = OrderedDict([('loss_ce', 0.263), ('loss_counter', 0.104), ('loss_bbox', 0.015), ('loss_giou', 0.216), ('loss_self_iou', 0.005), ('cardinality_error', 7.744), ('loss_ce_0', 0.263), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.227), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.744), ('loss_caption_0', 1.757), ('loss_caption', 1.754), ('total_loss', 9.948)]), +time/iter = 0.185, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 14497 (epoch 10), +loss = OrderedDict([('loss_ce', 0.266), ('loss_counter', 0.11), ('loss_bbox', 0.015), ('loss_giou', 0.2), ('loss_self_iou', 0.005), ('cardinality_error', 7.827), ('loss_ce_0', 0.265), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.214), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.827), ('loss_caption_0', 1.896), ('loss_caption', 1.894), ('total_loss', 10.407)]), +time/iter = 0.155, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 14630 (epoch 10), +loss = OrderedDict([('loss_ce', 0.263), ('loss_counter', 0.113), ('loss_bbox', 0.015), ('loss_giou', 0.208), ('loss_self_iou', 0.005), ('cardinality_error', 7.925), ('loss_ce_0', 0.261), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.224), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.925), ('loss_caption_0', 1.84), ('loss_caption', 1.842), ('total_loss', 10.253)]), +time/iter = 0.158, bad_vid = 0.000 + +Validation results of iter 14663: +Bleu_1:0.19267153393038786 +Bleu_2:0.11732781330402656 +Bleu_3:0.06746115616325608 +Bleu_4:0.03425583839334337 +METEOR:0.08963300348041837 +ROUGE_L:0.17480207136309905 +CIDEr:0.575137603362526 +Recall:0.30432682743951917 +Precision:0.4353044354138446 +soda_c:0.07762847290423684 +para_Bleu_1:0.393384019586376 +para_Bleu_2:0.23835405770332685 +para_Bleu_3:0.14545808678454117 +para_Bleu_4:0.09085202435904723 +para_METEOR:0.16354570345255123 +para_ROUGE_L:0.3343729651839732 +para_CIDEr:0.27098453497923136 + +overall score of iter 14663: 0.5253822627908299 + +Save model at iter 14663 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. +Save info to info.json +ID seq2-ft(mix)-gt_percent-1.0 iter 14763 (epoch 11), +loss = OrderedDict([('loss_ce', 0.266), ('loss_counter', 0.111), ('loss_bbox', 0.015), ('loss_giou', 0.208), ('loss_self_iou', 0.006), ('cardinality_error', 7.85), ('loss_ce_0', 0.264), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.225), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.85), ('loss_caption_0', 1.87), ('loss_caption', 1.877), ('total_loss', 10.398)]), +time/iter = 0.690, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 14896 (epoch 11), +loss = OrderedDict([('loss_ce', 0.26), ('loss_counter', 0.112), ('loss_bbox', 0.015), ('loss_giou', 0.2), ('loss_self_iou', 0.005), ('cardinality_error', 7.692), ('loss_ce_0', 0.259), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.217), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.692), ('loss_caption_0', 1.796), ('loss_caption', 1.784), ('total_loss', 9.979)]), +time/iter = 0.160, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 15029 (epoch 11), +loss = OrderedDict([('loss_ce', 0.264), ('loss_counter', 0.103), ('loss_bbox', 0.015), ('loss_giou', 0.195), ('loss_self_iou', 0.006), ('cardinality_error', 7.414), ('loss_ce_0', 0.264), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.211), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.414), ('loss_caption_0', 1.763), ('loss_caption', 1.767), ('total_loss', 9.842)]), +time/iter = 0.154, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 15162 (epoch 11), +loss = OrderedDict([('loss_ce', 0.263), ('loss_counter', 0.113), ('loss_bbox', 0.015), ('loss_giou', 0.196), ('loss_self_iou', 0.004), ('cardinality_error', 7.767), ('loss_ce_0', 0.262), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.211), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.767), ('loss_caption_0', 1.781), ('loss_caption', 1.781), ('total_loss', 9.916)]), +time/iter = 0.155, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 15295 (epoch 11), +loss = OrderedDict([('loss_ce', 0.257), ('loss_counter', 0.106), ('loss_bbox', 0.015), ('loss_giou', 0.2), ('loss_self_iou', 0.005), ('cardinality_error', 7.662), ('loss_ce_0', 0.255), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.214), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.662), ('loss_caption_0', 1.735), ('loss_caption', 1.75), ('total_loss', 9.755)]), +time/iter = 0.153, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 15428 (epoch 11), +loss = OrderedDict([('loss_ce', 0.258), ('loss_counter', 0.114), ('loss_bbox', 0.015), ('loss_giou', 0.21), ('loss_self_iou', 0.005), ('cardinality_error', 7.992), ('loss_ce_0', 0.261), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.22), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.992), ('loss_caption_0', 1.852), ('loss_caption', 1.86), ('total_loss', 10.298)]), +time/iter = 0.156, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 15561 (epoch 11), +loss = OrderedDict([('loss_ce', 0.256), ('loss_counter', 0.112), ('loss_bbox', 0.015), ('loss_giou', 0.204), ('loss_self_iou', 0.006), ('cardinality_error', 8.068), ('loss_ce_0', 0.257), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.218), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 8.068), ('loss_caption_0', 1.878), ('loss_caption', 1.866), ('total_loss', 10.314)]), +time/iter = 0.161, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 15694 (epoch 11), +loss = OrderedDict([('loss_ce', 0.256), ('loss_counter', 0.106), ('loss_bbox', 0.015), ('loss_giou', 0.202), ('loss_self_iou', 0.004), ('cardinality_error', 7.647), ('loss_ce_0', 0.257), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.215), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.647), ('loss_caption_0', 1.7), ('loss_caption', 1.684), ('total_loss', 9.569)]), +time/iter = 0.152, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 15827 (epoch 11), +loss = OrderedDict([('loss_ce', 0.259), ('loss_counter', 0.104), ('loss_bbox', 0.016), ('loss_giou', 0.194), ('loss_self_iou', 0.005), ('cardinality_error', 7.722), ('loss_ce_0', 0.257), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.209), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.722), ('loss_caption_0', 1.848), ('loss_caption', 1.839), ('total_loss', 10.119)]), +time/iter = 0.153, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 15960 (epoch 11), +loss = OrderedDict([('loss_ce', 0.26), ('loss_counter', 0.107), ('loss_bbox', 0.015), ('loss_giou', 0.197), ('loss_self_iou', 0.004), ('cardinality_error', 7.609), ('loss_ce_0', 0.257), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.214), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.609), ('loss_caption_0', 1.847), ('loss_caption', 1.858), ('total_loss', 10.198)]), +time/iter = 0.161, bad_vid = 0.000 + +Validation results of iter 15996: +Bleu_1:0.1989422607268001 +Bleu_2:0.12223038556953512 +Bleu_3:0.06835990671747892 +Bleu_4:0.03486159828438583 +METEOR:0.09408978838449876 +ROUGE_L:0.18200142867223945 +CIDEr:0.593480700759431 +Recall:0.30795469953703025 +Precision:0.4513424333993264 +soda_c:0.0796861065455984 +para_Bleu_1:0.39594509057043764 +para_Bleu_2:0.24087109399513515 +para_Bleu_3:0.14790262814870953 +para_Bleu_4:0.09321042711819619 +para_METEOR:0.1655617051143519 +para_ROUGE_L:0.3391051008488012 +para_CIDEr:0.32807196750555834 + +overall score of iter 15996: 0.5868440997381064 + +Save model at iter 15996 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. +Save Best-model at iter 15996 to checkpoint file. Save info to info.json -ID seq2-ft(mix)-gt_percent-1.0 iter 121000 (epoch 12), -loss = OrderedDict([('loss_ce', 0.283), ('loss_counter', 0.108), ('loss_bbox', 0.063), ('loss_giou', 0.166), ('loss_self_iou', 0.095), ('cardinality_error', 3.691), ('loss_ce_0', 0.284), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.691), ('loss_caption_0', 2.809), ('loss_caption', 2.808), ('total_loss', 13.835)]), +ID seq2-ft(mix)-gt_percent-1.0 iter 16093 (epoch 12), +loss = OrderedDict([('loss_ce', 0.257), ('loss_counter', 0.109), ('loss_bbox', 0.015), ('loss_giou', 0.19), ('loss_self_iou', 0.004), ('cardinality_error', 7.992), ('loss_ce_0', 0.258), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.992), ('loss_caption_0', 1.773), ('loss_caption', 1.769), ('total_loss', 9.789)]), time/iter = 0.727, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 122000 (epoch 12), -loss = OrderedDict([('loss_ce', 0.28), ('loss_counter', 0.109), ('loss_bbox', 0.064), ('loss_giou', 0.17), ('loss_self_iou', 0.093), ('cardinality_error', 3.706), ('loss_ce_0', 0.281), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.706), ('loss_caption_0', 2.811), ('loss_caption', 2.814), ('total_loss', 13.867)]), -time/iter = 0.190, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 123000 (epoch 12), -loss = OrderedDict([('loss_ce', 0.28), ('loss_counter', 0.109), ('loss_bbox', 0.066), ('loss_giou', 0.172), ('loss_self_iou', 0.097), ('cardinality_error', 3.691), ('loss_ce_0', 0.281), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.691), ('loss_caption_0', 2.789), ('loss_caption', 2.797), ('total_loss', 13.808)]), -time/iter = 0.190, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 124000 (epoch 12), -loss = OrderedDict([('loss_ce', 0.282), ('loss_counter', 0.112), ('loss_bbox', 0.063), ('loss_giou', 0.17), ('loss_self_iou', 0.092), ('cardinality_error', 3.76), ('loss_ce_0', 0.281), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.76), ('loss_caption_0', 2.839), ('loss_caption', 2.842), ('total_loss', 13.984)]), -time/iter = 0.194, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 125000 (epoch 12), -loss = OrderedDict([('loss_ce', 0.281), ('loss_counter', 0.112), ('loss_bbox', 0.064), ('loss_giou', 0.174), ('loss_self_iou', 0.097), ('cardinality_error', 3.763), ('loss_ce_0', 0.282), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.763), ('loss_caption_0', 2.81), ('loss_caption', 2.815), ('total_loss', 13.898)]), -time/iter = 0.189, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 126000 (epoch 12), -loss = OrderedDict([('loss_ce', 0.282), ('loss_counter', 0.112), ('loss_bbox', 0.064), ('loss_giou', 0.177), ('loss_self_iou', 0.095), ('cardinality_error', 3.717), ('loss_ce_0', 0.283), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.183), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.717), ('loss_caption_0', 2.789), ('loss_caption', 2.787), ('total_loss', 13.835)]), -time/iter = 0.189, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 127000 (epoch 12), -loss = OrderedDict([('loss_ce', 0.277), ('loss_counter', 0.112), ('loss_bbox', 0.064), ('loss_giou', 0.172), ('loss_self_iou', 0.097), ('cardinality_error', 3.764), ('loss_ce_0', 0.277), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.764), ('loss_caption_0', 2.867), ('loss_caption', 2.871), ('total_loss', 14.097)]), -time/iter = 0.191, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 128000 (epoch 12), -loss = OrderedDict([('loss_ce', 0.281), ('loss_counter', 0.113), ('loss_bbox', 0.063), ('loss_giou', 0.173), ('loss_self_iou', 0.092), ('cardinality_error', 3.793), ('loss_ce_0', 0.283), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.793), ('loss_caption_0', 2.868), ('loss_caption', 2.863), ('total_loss', 14.111)]), -time/iter = 0.190, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 129000 (epoch 12), -loss = OrderedDict([('loss_ce', 0.279), ('loss_counter', 0.106), ('loss_bbox', 0.066), ('loss_giou', 0.175), ('loss_self_iou', 0.1), ('cardinality_error', 3.686), ('loss_ce_0', 0.283), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.686), ('loss_caption_0', 2.812), ('loss_caption', 2.813), ('total_loss', 13.903)]), -time/iter = 0.191, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 130000 (epoch 12), -loss = OrderedDict([('loss_ce', 0.283), ('loss_counter', 0.111), ('loss_bbox', 0.065), ('loss_giou', 0.174), ('loss_self_iou', 0.097), ('cardinality_error', 3.772), ('loss_ce_0', 0.286), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.772), ('loss_caption_0', 2.86), ('loss_caption', 2.861), ('total_loss', 14.105)]), -time/iter = 0.190, bad_vid = 0.000 - -Validation results of iter 130117: -Bleu_1:0.16778675341331784 -Bleu_2:0.09082555766488616 -Bleu_3:0.047445681271689716 -Bleu_4:0.02375280793420285 -METEOR:0.08883520478698428 -ROUGE_L:0.16531435721130755 -CIDEr:0.31778343902267087 -Recall:0.5273619026669621 -Precision:0.5698181479221706 -soda_c:0.05753856798988932 -para_Bleu_1:0.4610381779339771 -para_Bleu_2:0.2761144617772928 -para_Bleu_3:0.16915034097081671 -para_Bleu_4:0.10654029953240575 -para_METEOR:0.16638305166981465 -para_ROUGE_L:0.31710573495570465 -para_CIDEr:0.19601570682645908 - -overall score of iter 130117: 0.46893905802867947 - -Save model at iter 130117 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. +ID seq2-ft(mix)-gt_percent-1.0 iter 16226 (epoch 12), +loss = OrderedDict([('loss_ce', 0.26), ('loss_counter', 0.11), ('loss_bbox', 0.014), ('loss_giou', 0.198), ('loss_self_iou', 0.004), ('cardinality_error', 7.805), ('loss_ce_0', 0.259), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.215), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.805), ('loss_caption_0', 1.743), ('loss_caption', 1.749), ('total_loss', 9.786)]), +time/iter = 0.156, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 16359 (epoch 12), +loss = OrderedDict([('loss_ce', 0.265), ('loss_counter', 0.116), ('loss_bbox', 0.014), ('loss_giou', 0.198), ('loss_self_iou', 0.005), ('cardinality_error', 7.85), ('loss_ce_0', 0.264), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.214), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.85), ('loss_caption_0', 1.797), ('loss_caption', 1.778), ('total_loss', 9.972)]), +time/iter = 0.156, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 16492 (epoch 12), +loss = OrderedDict([('loss_ce', 0.254), ('loss_counter', 0.105), ('loss_bbox', 0.015), ('loss_giou', 0.189), ('loss_self_iou', 0.004), ('cardinality_error', 7.383), ('loss_ce_0', 0.257), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.383), ('loss_caption_0', 1.796), ('loss_caption', 1.808), ('total_loss', 9.899)]), +time/iter = 0.159, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 16625 (epoch 12), +loss = OrderedDict([('loss_ce', 0.258), ('loss_counter', 0.109), ('loss_bbox', 0.014), ('loss_giou', 0.186), ('loss_self_iou', 0.005), ('cardinality_error', 7.782), ('loss_ce_0', 0.256), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.203), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.782), ('loss_caption_0', 1.78), ('loss_caption', 1.779), ('total_loss', 9.812)]), +time/iter = 0.159, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 16758 (epoch 12), +loss = OrderedDict([('loss_ce', 0.252), ('loss_counter', 0.106), ('loss_bbox', 0.014), ('loss_giou', 0.196), ('loss_self_iou', 0.005), ('cardinality_error', 7.962), ('loss_ce_0', 0.252), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.211), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.962), ('loss_caption_0', 1.795), ('loss_caption', 1.806), ('total_loss', 9.948)]), +time/iter = 0.153, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 16891 (epoch 12), +loss = OrderedDict([('loss_ce', 0.258), ('loss_counter', 0.109), ('loss_bbox', 0.016), ('loss_giou', 0.199), ('loss_self_iou', 0.005), ('cardinality_error', 7.797), ('loss_ce_0', 0.255), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.211), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.797), ('loss_caption_0', 1.788), ('loss_caption', 1.782), ('total_loss', 9.914)]), +time/iter = 0.157, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 17024 (epoch 12), +loss = OrderedDict([('loss_ce', 0.262), ('loss_counter', 0.11), ('loss_bbox', 0.014), ('loss_giou', 0.198), ('loss_self_iou', 0.005), ('cardinality_error', 7.511), ('loss_ce_0', 0.26), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.211), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.511), ('loss_caption_0', 1.717), ('loss_caption', 1.72), ('total_loss', 9.666)]), +time/iter = 0.170, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 17157 (epoch 12), +loss = OrderedDict([('loss_ce', 0.25), ('loss_counter', 0.104), ('loss_bbox', 0.015), ('loss_giou', 0.189), ('loss_self_iou', 0.004), ('cardinality_error', 7.692), ('loss_ce_0', 0.252), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.2), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.692), ('loss_caption_0', 1.738), ('loss_caption', 1.749), ('total_loss', 9.638)]), +time/iter = 0.182, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 17290 (epoch 12), +loss = OrderedDict([('loss_ce', 0.254), ('loss_counter', 0.109), ('loss_bbox', 0.015), ('loss_giou', 0.198), ('loss_self_iou', 0.005), ('cardinality_error', 7.932), ('loss_ce_0', 0.254), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.214), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.932), ('loss_caption_0', 1.815), ('loss_caption', 1.83), ('total_loss', 10.067)]), +time/iter = 0.163, bad_vid = 0.000 + +Validation results of iter 17329: +Bleu_1:0.19294534256446427 +Bleu_2:0.11789730285267924 +Bleu_3:0.06601509377472357 +Bleu_4:0.03274421971508606 +METEOR:0.0906445074413136 +ROUGE_L:0.17678145420382357 +CIDEr:0.5750907875125135 +Recall:0.3073352674556176 +Precision:0.4434536834427428 +soda_c:0.07896521325127955 +para_Bleu_1:0.39483511792471604 +para_Bleu_2:0.23988438429479647 +para_Bleu_3:0.1464330354033768 +para_Bleu_4:0.09122283851671699 +para_METEOR:0.16480200992253577 +para_ROUGE_L:0.33317486176302236 +para_CIDEr:0.29080350784714515 + +overall score of iter 17329: 0.5468283562863979 + +Save model at iter 17329 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. Save info to info.json -ID seq2-ft(mix)-gt_percent-1.0 iter 131000 (epoch 13), -loss = OrderedDict([('loss_ce', 0.277), ('loss_counter', 0.107), ('loss_bbox', 0.062), ('loss_giou', 0.17), ('loss_self_iou', 0.092), ('cardinality_error', 3.75), ('loss_ce_0', 0.279), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.75), ('loss_caption_0', 2.817), ('loss_caption', 2.826), ('total_loss', 13.897)]), -time/iter = 0.734, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 132000 (epoch 13), -loss = OrderedDict([('loss_ce', 0.271), ('loss_counter', 0.109), ('loss_bbox', 0.065), ('loss_giou', 0.174), ('loss_self_iou', 0.089), ('cardinality_error', 3.814), ('loss_ce_0', 0.274), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.09), ('cardinality_error_0', 3.814), ('loss_caption_0', 2.778), ('loss_caption', 2.776), ('total_loss', 13.726)]), -time/iter = 0.189, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 133000 (epoch 13), -loss = OrderedDict([('loss_ce', 0.277), ('loss_counter', 0.113), ('loss_bbox', 0.064), ('loss_giou', 0.172), ('loss_self_iou', 0.095), ('cardinality_error', 3.773), ('loss_ce_0', 0.277), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.773), ('loss_caption_0', 2.843), ('loss_caption', 2.843), ('total_loss', 13.999)]), -time/iter = 0.191, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 134000 (epoch 13), -loss = OrderedDict([('loss_ce', 0.273), ('loss_counter', 0.108), ('loss_bbox', 0.065), ('loss_giou', 0.171), ('loss_self_iou', 0.101), ('cardinality_error', 3.743), ('loss_ce_0', 0.276), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.743), ('loss_caption_0', 2.786), ('loss_caption', 2.787), ('total_loss', 13.756)]), -time/iter = 0.188, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 135000 (epoch 13), -loss = OrderedDict([('loss_ce', 0.28), ('loss_counter', 0.115), ('loss_bbox', 0.061), ('loss_giou', 0.168), ('loss_self_iou', 0.096), ('cardinality_error', 3.794), ('loss_ce_0', 0.281), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.794), ('loss_caption_0', 2.785), ('loss_caption', 2.784), ('total_loss', 13.759)]), -time/iter = 0.190, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 136000 (epoch 13), -loss = OrderedDict([('loss_ce', 0.279), ('loss_counter', 0.106), ('loss_bbox', 0.065), ('loss_giou', 0.168), ('loss_self_iou', 0.092), ('cardinality_error', 3.653), ('loss_ce_0', 0.279), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.653), ('loss_caption_0', 2.828), ('loss_caption', 2.834), ('total_loss', 13.919)]), -time/iter = 0.191, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 137000 (epoch 13), -loss = OrderedDict([('loss_ce', 0.279), ('loss_counter', 0.105), ('loss_bbox', 0.065), ('loss_giou', 0.173), ('loss_self_iou', 0.099), ('cardinality_error', 3.654), ('loss_ce_0', 0.281), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.654), ('loss_caption_0', 2.79), ('loss_caption', 2.799), ('total_loss', 13.806)]), -time/iter = 0.190, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 138000 (epoch 13), -loss = OrderedDict([('loss_ce', 0.278), ('loss_counter', 0.109), ('loss_bbox', 0.064), ('loss_giou', 0.171), ('loss_self_iou', 0.095), ('cardinality_error', 3.714), ('loss_ce_0', 0.28), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.714), ('loss_caption_0', 2.835), ('loss_caption', 2.828), ('total_loss', 13.945)]), -time/iter = 0.188, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 139000 (epoch 13), -loss = OrderedDict([('loss_ce', 0.281), ('loss_counter', 0.115), ('loss_bbox', 0.062), ('loss_giou', 0.167), ('loss_self_iou', 0.098), ('cardinality_error', 3.813), ('loss_ce_0', 0.283), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.813), ('loss_caption_0', 2.83), ('loss_caption', 2.828), ('total_loss', 13.924)]), -time/iter = 0.186, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 140000 (epoch 13), -loss = OrderedDict([('loss_ce', 0.277), ('loss_counter', 0.107), ('loss_bbox', 0.063), ('loss_giou', 0.171), ('loss_self_iou', 0.09), ('cardinality_error', 3.664), ('loss_ce_0', 0.28), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.091), ('cardinality_error_0', 3.664), ('loss_caption_0', 2.821), ('loss_caption', 2.823), ('total_loss', 13.905)]), -time/iter = 0.191, bad_vid = 0.000 - -Validation results of iter 140126: -Bleu_1:0.16683698969676453 -Bleu_2:0.09036855967772307 -Bleu_3:0.047484441130632896 -Bleu_4:0.023876859658376735 -METEOR:0.08814626862844692 -ROUGE_L:0.16473003568483396 -CIDEr:0.3189568758512915 -Recall:0.5281546209817979 -Precision:0.5704333604501349 -soda_c:0.057417105431783064 -para_Bleu_1:0.4580706340663244 -para_Bleu_2:0.27372623489326064 -para_Bleu_3:0.16745128920972313 -para_Bleu_4:0.10550306643408856 -para_METEOR:0.16656454278617736 -para_ROUGE_L:0.31631873012989425 -para_CIDEr:0.19724321819057877 - -overall score of iter 140126: 0.46931082741084473 - -Save model at iter 140126 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. +ID seq2-ft(mix)-gt_percent-1.0 iter 17423 (epoch 13), +loss = OrderedDict([('loss_ce', 0.262), ('loss_counter', 0.111), ('loss_bbox', 0.015), ('loss_giou', 0.195), ('loss_self_iou', 0.007), ('cardinality_error', 7.692), ('loss_ce_0', 0.259), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.208), ('loss_self_iou_0', 0.007), ('cardinality_error_0', 7.692), ('loss_caption_0', 1.865), ('loss_caption', 1.881), ('total_loss', 10.261)]), +time/iter = 0.713, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 17556 (epoch 13), +loss = OrderedDict([('loss_ce', 0.252), ('loss_counter', 0.111), ('loss_bbox', 0.014), ('loss_giou', 0.193), ('loss_self_iou', 0.004), ('cardinality_error', 7.737), ('loss_ce_0', 0.253), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.209), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.737), ('loss_caption_0', 1.744), ('loss_caption', 1.743), ('total_loss', 9.707)]), +time/iter = 0.168, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 17689 (epoch 13), +loss = OrderedDict([('loss_ce', 0.259), ('loss_counter', 0.109), ('loss_bbox', 0.015), ('loss_giou', 0.199), ('loss_self_iou', 0.006), ('cardinality_error', 7.602), ('loss_ce_0', 0.262), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.211), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.602), ('loss_caption_0', 1.835), ('loss_caption', 1.819), ('total_loss', 10.1)]), +time/iter = 0.160, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 17822 (epoch 13), +loss = OrderedDict([('loss_ce', 0.25), ('loss_counter', 0.108), ('loss_bbox', 0.014), ('loss_giou', 0.191), ('loss_self_iou', 0.005), ('cardinality_error', 7.526), ('loss_ce_0', 0.249), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.206), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.526), ('loss_caption_0', 1.681), ('loss_caption', 1.67), ('total_loss', 9.397)]), +time/iter = 0.152, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 17955 (epoch 13), +loss = OrderedDict([('loss_ce', 0.255), ('loss_counter', 0.102), ('loss_bbox', 0.014), ('loss_giou', 0.184), ('loss_self_iou', 0.005), ('cardinality_error', 7.526), ('loss_ce_0', 0.252), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.2), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.526), ('loss_caption_0', 1.757), ('loss_caption', 1.745), ('total_loss', 9.658)]), +time/iter = 0.159, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 18088 (epoch 13), +loss = OrderedDict([('loss_ce', 0.251), ('loss_counter', 0.106), ('loss_bbox', 0.014), ('loss_giou', 0.177), ('loss_self_iou', 0.004), ('cardinality_error', 7.534), ('loss_ce_0', 0.251), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.191), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.534), ('loss_caption_0', 1.703), ('loss_caption', 1.701), ('total_loss', 9.39)]), +time/iter = 0.154, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 18221 (epoch 13), +loss = OrderedDict([('loss_ce', 0.252), ('loss_counter', 0.111), ('loss_bbox', 0.014), ('loss_giou', 0.201), ('loss_self_iou', 0.005), ('cardinality_error', 8.211), ('loss_ce_0', 0.252), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.213), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 8.211), ('loss_caption_0', 1.824), ('loss_caption', 1.816), ('total_loss', 10.053)]), +time/iter = 0.161, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 18354 (epoch 13), +loss = OrderedDict([('loss_ce', 0.253), ('loss_counter', 0.104), ('loss_bbox', 0.015), ('loss_giou', 0.195), ('loss_self_iou', 0.004), ('cardinality_error', 7.789), ('loss_ce_0', 0.249), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.21), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.789), ('loss_caption_0', 1.792), ('loss_caption', 1.779), ('total_loss', 9.874)]), +time/iter = 0.157, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 18487 (epoch 13), +loss = OrderedDict([('loss_ce', 0.255), ('loss_counter', 0.111), ('loss_bbox', 0.013), ('loss_giou', 0.19), ('loss_self_iou', 0.004), ('cardinality_error', 7.992), ('loss_ce_0', 0.251), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.205), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.992), ('loss_caption_0', 1.826), ('loss_caption', 1.81), ('total_loss', 9.979)]), +time/iter = 0.162, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 18620 (epoch 13), +loss = OrderedDict([('loss_ce', 0.251), ('loss_counter', 0.109), ('loss_bbox', 0.015), ('loss_giou', 0.193), ('loss_self_iou', 0.003), ('cardinality_error', 7.737), ('loss_ce_0', 0.251), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.206), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.737), ('loss_caption_0', 1.767), ('loss_caption', 1.771), ('total_loss', 9.784)]), +time/iter = 0.153, bad_vid = 0.000 + +Validation results of iter 18662: +Bleu_1:0.1916652028982354 +Bleu_2:0.11864819375256218 +Bleu_3:0.06801290454817709 +Bleu_4:0.03421778123301331 +METEOR:0.08890100804282676 +ROUGE_L:0.17229926562968575 +CIDEr:0.5719694906113042 +Recall:0.3115151404333572 +Precision:0.42734448265082836 +soda_c:0.07979305036983636 +para_Bleu_1:0.3972508455506424 +para_Bleu_2:0.24317507500304622 +para_Bleu_3:0.1497047997976745 +para_Bleu_4:0.09437727320664267 +para_METEOR:0.16651343432042678 +para_ROUGE_L:0.33875534436877147 +para_CIDEr:0.29220356232363026 + +overall score of iter 18662: 0.5530942698506998 + +Save model at iter 18662 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. Save info to info.json -ID seq2-ft(mix)-gt_percent-1.0 iter 141000 (epoch 14), -loss = OrderedDict([('loss_ce', 0.268), ('loss_counter', 0.108), ('loss_bbox', 0.066), ('loss_giou', 0.171), ('loss_self_iou', 0.106), ('cardinality_error', 3.774), ('loss_ce_0', 0.27), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.108), ('cardinality_error_0', 3.774), ('loss_caption_0', 2.75), ('loss_caption', 2.748), ('total_loss', 13.572)]), -time/iter = 0.739, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 142000 (epoch 14), -loss = OrderedDict([('loss_ce', 0.27), ('loss_counter', 0.109), ('loss_bbox', 0.062), ('loss_giou', 0.173), ('loss_self_iou', 0.091), ('cardinality_error', 3.797), ('loss_ce_0', 0.272), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.091), ('cardinality_error_0', 3.797), ('loss_caption_0', 2.72), ('loss_caption', 2.722), ('total_loss', 13.492)]), -time/iter = 0.186, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 143000 (epoch 14), -loss = OrderedDict([('loss_ce', 0.265), ('loss_counter', 0.1), ('loss_bbox', 0.063), ('loss_giou', 0.162), ('loss_self_iou', 0.095), ('cardinality_error', 3.637), ('loss_ce_0', 0.268), ('loss_counter_0', 0.1), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.171), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.637), ('loss_caption_0', 2.782), ('loss_caption', 2.782), ('total_loss', 13.626)]), -time/iter = 0.191, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 144000 (epoch 14), -loss = OrderedDict([('loss_ce', 0.27), ('loss_counter', 0.112), ('loss_bbox', 0.062), ('loss_giou', 0.172), ('loss_self_iou', 0.094), ('cardinality_error', 3.831), ('loss_ce_0', 0.273), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.18), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.831), ('loss_caption_0', 2.793), ('loss_caption', 2.79), ('total_loss', 13.773)]), -time/iter = 0.189, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 145000 (epoch 14), -loss = OrderedDict([('loss_ce', 0.269), ('loss_counter', 0.101), ('loss_bbox', 0.061), ('loss_giou', 0.16), ('loss_self_iou', 0.093), ('cardinality_error', 3.665), ('loss_ce_0', 0.273), ('loss_counter_0', 0.101), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.168), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.665), ('loss_caption_0', 2.762), ('loss_caption', 2.767), ('total_loss', 13.554)]), -time/iter = 0.189, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 146000 (epoch 14), -loss = OrderedDict([('loss_ce', 0.275), ('loss_counter', 0.109), ('loss_bbox', 0.061), ('loss_giou', 0.164), ('loss_self_iou', 0.091), ('cardinality_error', 3.725), ('loss_ce_0', 0.276), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.172), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.725), ('loss_caption_0', 2.813), ('loss_caption', 2.813), ('total_loss', 13.811)]), -time/iter = 0.191, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 147000 (epoch 14), -loss = OrderedDict([('loss_ce', 0.272), ('loss_counter', 0.104), ('loss_bbox', 0.063), ('loss_giou', 0.171), ('loss_self_iou', 0.097), ('cardinality_error', 3.714), ('loss_ce_0', 0.273), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.714), ('loss_caption_0', 2.747), ('loss_caption', 2.745), ('total_loss', 13.578)]), -time/iter = 0.189, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 148000 (epoch 14), -loss = OrderedDict([('loss_ce', 0.271), ('loss_counter', 0.108), ('loss_bbox', 0.063), ('loss_giou', 0.168), ('loss_self_iou', 0.096), ('cardinality_error', 3.728), ('loss_ce_0', 0.274), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.728), ('loss_caption_0', 2.843), ('loss_caption', 2.84), ('total_loss', 13.944)]), -time/iter = 0.192, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 149000 (epoch 14), -loss = OrderedDict([('loss_ce', 0.269), ('loss_counter', 0.108), ('loss_bbox', 0.066), ('loss_giou', 0.169), ('loss_self_iou', 0.098), ('cardinality_error', 3.799), ('loss_ce_0', 0.273), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.799), ('loss_caption_0', 2.836), ('loss_caption', 2.836), ('total_loss', 13.926)]), -time/iter = 0.196, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 150000 (epoch 14), -loss = OrderedDict([('loss_ce', 0.27), ('loss_counter', 0.107), ('loss_bbox', 0.063), ('loss_giou', 0.169), ('loss_self_iou', 0.087), ('cardinality_error', 3.703), ('loss_ce_0', 0.272), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.088), ('cardinality_error_0', 3.703), ('loss_caption_0', 2.806), ('loss_caption', 2.806), ('total_loss', 13.795)]), -time/iter = 0.193, bad_vid = 0.000 - -Validation results of iter 150135: -Bleu_1:0.16662144072598145 -Bleu_2:0.08988753231411394 -Bleu_3:0.04690847145308288 -Bleu_4:0.023224274927987735 -METEOR:0.08725158341768323 -ROUGE_L:0.16364893754496343 -CIDEr:0.32028824475030926 -Recall:0.5260420675803493 -Precision:0.5630584367161506 -soda_c:0.057565785652999135 -para_Bleu_1:0.46764194087144684 -para_Bleu_2:0.2801629240374498 -para_Bleu_3:0.1713033186995987 -para_Bleu_4:0.10750827268624512 -para_METEOR:0.16742715934059368 -para_ROUGE_L:0.31858424377772926 -para_CIDEr:0.2089956210595351 - -overall score of iter 150135: 0.4839310530863739 - -Save model at iter 150135 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. -Save Best-model at iter 150135 to checkpoint file. +ID seq2-ft(mix)-gt_percent-1.0 iter 18753 (epoch 14), +loss = OrderedDict([('loss_ce', 0.247), ('loss_counter', 0.115), ('loss_bbox', 0.013), ('loss_giou', 0.195), ('loss_self_iou', 0.004), ('cardinality_error', 8.241), ('loss_ce_0', 0.251), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.207), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 8.241), ('loss_caption_0', 1.758), ('loss_caption', 1.759), ('total_loss', 9.756)]), +time/iter = 0.731, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 18886 (epoch 14), +loss = OrderedDict([('loss_ce', 0.247), ('loss_counter', 0.103), ('loss_bbox', 0.015), ('loss_giou', 0.182), ('loss_self_iou', 0.004), ('cardinality_error', 7.436), ('loss_ce_0', 0.245), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.436), ('loss_caption_0', 1.696), ('loss_caption', 1.692), ('total_loss', 9.366)]), +time/iter = 0.163, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 19019 (epoch 14), +loss = OrderedDict([('loss_ce', 0.243), ('loss_counter', 0.104), ('loss_bbox', 0.013), ('loss_giou', 0.181), ('loss_self_iou', 0.003), ('cardinality_error', 7.692), ('loss_ce_0', 0.242), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.692), ('loss_caption_0', 1.73), ('loss_caption', 1.729), ('total_loss', 9.496)]), +time/iter = 0.157, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 19152 (epoch 14), +loss = OrderedDict([('loss_ce', 0.251), ('loss_counter', 0.112), ('loss_bbox', 0.014), ('loss_giou', 0.181), ('loss_self_iou', 0.006), ('cardinality_error', 7.82), ('loss_ce_0', 0.251), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.82), ('loss_caption_0', 1.779), ('loss_caption', 1.771), ('total_loss', 9.714)]), +time/iter = 0.161, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 19285 (epoch 14), +loss = OrderedDict([('loss_ce', 0.25), ('loss_counter', 0.105), ('loss_bbox', 0.014), ('loss_giou', 0.194), ('loss_self_iou', 0.004), ('cardinality_error', 7.669), ('loss_ce_0', 0.25), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.204), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.669), ('loss_caption_0', 1.76), ('loss_caption', 1.772), ('total_loss', 9.759)]), +time/iter = 0.154, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 19418 (epoch 14), +loss = OrderedDict([('loss_ce', 0.244), ('loss_counter', 0.115), ('loss_bbox', 0.013), ('loss_giou', 0.197), ('loss_self_iou', 0.004), ('cardinality_error', 8.256), ('loss_ce_0', 0.245), ('loss_counter_0', 0.117), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.211), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 8.256), ('loss_caption_0', 1.754), ('loss_caption', 1.758), ('total_loss', 9.747)]), +time/iter = 0.155, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 19551 (epoch 14), +loss = OrderedDict([('loss_ce', 0.249), ('loss_counter', 0.109), ('loss_bbox', 0.013), ('loss_giou', 0.175), ('loss_self_iou', 0.004), ('cardinality_error', 7.865), ('loss_ce_0', 0.253), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.865), ('loss_caption_0', 1.68), ('loss_caption', 1.689), ('total_loss', 9.3)]), +time/iter = 0.153, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 19684 (epoch 14), +loss = OrderedDict([('loss_ce', 0.263), ('loss_counter', 0.104), ('loss_bbox', 0.015), ('loss_giou', 0.187), ('loss_self_iou', 0.005), ('cardinality_error', 7.474), ('loss_ce_0', 0.262), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.199), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.474), ('loss_caption_0', 1.81), ('loss_caption', 1.803), ('total_loss', 9.923)]), +time/iter = 0.165, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 19817 (epoch 14), +loss = OrderedDict([('loss_ce', 0.246), ('loss_counter', 0.106), ('loss_bbox', 0.014), ('loss_giou', 0.183), ('loss_self_iou', 0.005), ('cardinality_error', 7.526), ('loss_ce_0', 0.247), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.197), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.526), ('loss_caption_0', 1.769), ('loss_caption', 1.765), ('total_loss', 9.677)]), +time/iter = 0.164, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 19950 (epoch 14), +loss = OrderedDict([('loss_ce', 0.254), ('loss_counter', 0.108), ('loss_bbox', 0.013), ('loss_giou', 0.19), ('loss_self_iou', 0.005), ('cardinality_error', 7.797), ('loss_ce_0', 0.253), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.797), ('loss_caption_0', 1.736), ('loss_caption', 1.748), ('total_loss', 9.654)]), +time/iter = 0.153, bad_vid = 0.000 + +Validation results of iter 19995: +Bleu_1:0.19012877786294885 +Bleu_2:0.11743680046097797 +Bleu_3:0.06623934110461578 +Bleu_4:0.03314975306654321 +METEOR:0.08857227272587216 +ROUGE_L:0.17208518718096077 +CIDEr:0.5689998070546577 +Recall:0.3090681299310951 +Precision:0.43095498593310433 +soda_c:0.08081534748318767 +para_Bleu_1:0.3949292262433903 +para_Bleu_2:0.24183495416706074 +para_Bleu_3:0.1493168425692173 +para_Bleu_4:0.0941904023418332 +para_METEOR:0.16661877157717606 +para_ROUGE_L:0.3391544295873436 +para_CIDEr:0.3057631644012313 + +overall score of iter 19995: 0.5665723383202406 + +Save model at iter 19995 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. Save info to info.json -ID seq2-ft(mix)-gt_percent-1.0 iter 151000 (epoch 15), -loss = OrderedDict([('loss_ce', 0.264), ('loss_counter', 0.101), ('loss_bbox', 0.063), ('loss_giou', 0.163), ('loss_self_iou', 0.097), ('cardinality_error', 3.645), ('loss_ce_0', 0.266), ('loss_counter_0', 0.101), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.171), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.645), ('loss_caption_0', 2.762), ('loss_caption', 2.759), ('total_loss', 13.537)]), -time/iter = 0.737, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 152000 (epoch 15), -loss = OrderedDict([('loss_ce', 0.265), ('loss_counter', 0.103), ('loss_bbox', 0.06), ('loss_giou', 0.166), ('loss_self_iou', 0.087), ('cardinality_error', 3.722), ('loss_ce_0', 0.269), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.087), ('cardinality_error_0', 3.722), ('loss_caption_0', 2.762), ('loss_caption', 2.766), ('total_loss', 13.59)]), -time/iter = 0.191, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 153000 (epoch 15), -loss = OrderedDict([('loss_ce', 0.264), ('loss_counter', 0.111), ('loss_bbox', 0.062), ('loss_giou', 0.168), ('loss_self_iou', 0.083), ('cardinality_error', 3.813), ('loss_ce_0', 0.267), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.085), ('cardinality_error_0', 3.813), ('loss_caption_0', 2.777), ('loss_caption', 2.778), ('total_loss', 13.663)]), -time/iter = 0.193, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 154000 (epoch 15), -loss = OrderedDict([('loss_ce', 0.268), ('loss_counter', 0.106), ('loss_bbox', 0.061), ('loss_giou', 0.168), ('loss_self_iou', 0.092), ('cardinality_error', 3.769), ('loss_ce_0', 0.272), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.178), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.769), ('loss_caption_0', 2.787), ('loss_caption', 2.787), ('total_loss', 13.717)]), -time/iter = 0.191, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 155000 (epoch 15), -loss = OrderedDict([('loss_ce', 0.264), ('loss_counter', 0.104), ('loss_bbox', 0.063), ('loss_giou', 0.169), ('loss_self_iou', 0.09), ('cardinality_error', 3.714), ('loss_ce_0', 0.267), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.091), ('cardinality_error_0', 3.714), ('loss_caption_0', 2.758), ('loss_caption', 2.76), ('total_loss', 13.593)]), -time/iter = 0.190, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 156000 (epoch 15), -loss = OrderedDict([('loss_ce', 0.265), ('loss_counter', 0.106), ('loss_bbox', 0.064), ('loss_giou', 0.167), ('loss_self_iou', 0.102), ('cardinality_error', 3.675), ('loss_ce_0', 0.269), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.675), ('loss_caption_0', 2.741), ('loss_caption', 2.742), ('total_loss', 13.504)]), -time/iter = 0.191, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 157000 (epoch 15), -loss = OrderedDict([('loss_ce', 0.267), ('loss_counter', 0.104), ('loss_bbox', 0.065), ('loss_giou', 0.167), ('loss_self_iou', 0.103), ('cardinality_error', 3.722), ('loss_ce_0', 0.268), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.068), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.105), ('cardinality_error_0', 3.722), ('loss_caption_0', 2.777), ('loss_caption', 2.783), ('total_loss', 13.668)]), -time/iter = 0.188, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 158000 (epoch 15), -loss = OrderedDict([('loss_ce', 0.266), ('loss_counter', 0.106), ('loss_bbox', 0.062), ('loss_giou', 0.164), ('loss_self_iou', 0.099), ('cardinality_error', 3.758), ('loss_ce_0', 0.27), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.758), ('loss_caption_0', 2.815), ('loss_caption', 2.817), ('total_loss', 13.789)]), -time/iter = 0.192, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 159000 (epoch 15), -loss = OrderedDict([('loss_ce', 0.272), ('loss_counter', 0.108), ('loss_bbox', 0.062), ('loss_giou', 0.169), ('loss_self_iou', 0.098), ('cardinality_error', 3.729), ('loss_ce_0', 0.275), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.729), ('loss_caption_0', 2.783), ('loss_caption', 2.785), ('total_loss', 13.721)]), -time/iter = 0.194, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 160000 (epoch 15), -loss = OrderedDict([('loss_ce', 0.269), ('loss_counter', 0.109), ('loss_bbox', 0.063), ('loss_giou', 0.166), ('loss_self_iou', 0.098), ('cardinality_error', 3.816), ('loss_ce_0', 0.271), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.816), ('loss_caption_0', 2.78), ('loss_caption', 2.784), ('total_loss', 13.686)]), -time/iter = 0.196, bad_vid = 0.000 - -Validation results of iter 160144: -Bleu_1:0.16754398447821903 -Bleu_2:0.08978801866243748 -Bleu_3:0.046077601805781236 -Bleu_4:0.02215727819941335 -METEOR:0.08650894641812401 -ROUGE_L:0.16425299709373153 -CIDEr:0.3192637628790779 -Recall:0.5308598805776927 -Precision:0.5705477594739302 -soda_c:0.059035206979637336 -para_Bleu_1:0.4722129873397206 -para_Bleu_2:0.2843271953295457 -para_Bleu_3:0.17433620623201318 -para_Bleu_4:0.10943737200004257 -para_METEOR:0.16524483023272712 -para_ROUGE_L:0.3180351825656492 -para_CIDEr:0.2139382514781602 - -overall score of iter 160144: 0.4886204537109299 - -Save model at iter 160144 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. -Save Best-model at iter 160144 to checkpoint file. +ID seq2-ft(mix)-gt_percent-1.0 iter 20083 (epoch 15), +loss = OrderedDict([('loss_ce', 0.254), ('loss_counter', 0.102), ('loss_bbox', 0.014), ('loss_giou', 0.186), ('loss_self_iou', 0.004), ('cardinality_error', 7.519), ('loss_ce_0', 0.257), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.197), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.519), ('loss_caption_0', 1.743), ('loss_caption', 1.756), ('total_loss', 9.655)]), +time/iter = 0.703, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 20216 (epoch 15), +loss = OrderedDict([('loss_ce', 0.244), ('loss_counter', 0.105), ('loss_bbox', 0.013), ('loss_giou', 0.179), ('loss_self_iou', 0.003), ('cardinality_error', 7.759), ('loss_ce_0', 0.244), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.193), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.759), ('loss_caption_0', 1.79), ('loss_caption', 1.781), ('total_loss', 9.713)]), +time/iter = 0.168, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 20349 (epoch 15), +loss = OrderedDict([('loss_ce', 0.246), ('loss_counter', 0.11), ('loss_bbox', 0.013), ('loss_giou', 0.19), ('loss_self_iou', 0.004), ('cardinality_error', 7.992), ('loss_ce_0', 0.245), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.992), ('loss_caption_0', 1.749), ('loss_caption', 1.759), ('total_loss', 9.675)]), +time/iter = 0.159, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 20482 (epoch 15), +loss = OrderedDict([('loss_ce', 0.244), ('loss_counter', 0.109), ('loss_bbox', 0.015), ('loss_giou', 0.193), ('loss_self_iou', 0.005), ('cardinality_error', 7.94), ('loss_ce_0', 0.244), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.207), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.94), ('loss_caption_0', 1.694), ('loss_caption', 1.715), ('total_loss', 9.502)]), +time/iter = 0.155, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 20615 (epoch 15), +loss = OrderedDict([('loss_ce', 0.257), ('loss_counter', 0.107), ('loss_bbox', 0.014), ('loss_giou', 0.188), ('loss_self_iou', 0.005), ('cardinality_error', 7.368), ('loss_ce_0', 0.257), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.368), ('loss_caption_0', 1.77), ('loss_caption', 1.771), ('total_loss', 9.775)]), +time/iter = 0.156, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 20748 (epoch 15), +loss = OrderedDict([('loss_ce', 0.247), ('loss_counter', 0.107), ('loss_bbox', 0.013), ('loss_giou', 0.178), ('loss_self_iou', 0.004), ('cardinality_error', 7.857), ('loss_ce_0', 0.247), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.857), ('loss_caption_0', 1.786), ('loss_caption', 1.773), ('total_loss', 9.695)]), +time/iter = 0.153, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 20881 (epoch 15), +loss = OrderedDict([('loss_ce', 0.243), ('loss_counter', 0.103), ('loss_bbox', 0.013), ('loss_giou', 0.178), ('loss_self_iou', 0.003), ('cardinality_error', 7.594), ('loss_ce_0', 0.242), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.594), ('loss_caption_0', 1.746), ('loss_caption', 1.748), ('total_loss', 9.541)]), +time/iter = 0.155, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 21014 (epoch 15), +loss = OrderedDict([('loss_ce', 0.249), ('loss_counter', 0.108), ('loss_bbox', 0.015), ('loss_giou', 0.19), ('loss_self_iou', 0.005), ('cardinality_error', 8.09), ('loss_ce_0', 0.249), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 8.09), ('loss_caption_0', 1.709), ('loss_caption', 1.698), ('total_loss', 9.49)]), +time/iter = 0.149, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 21147 (epoch 15), +loss = OrderedDict([('loss_ce', 0.246), ('loss_counter', 0.115), ('loss_bbox', 0.014), ('loss_giou', 0.186), ('loss_self_iou', 0.004), ('cardinality_error', 7.812), ('loss_ce_0', 0.248), ('loss_counter_0', 0.114), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.198), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.812), ('loss_caption_0', 1.733), ('loss_caption', 1.732), ('total_loss', 9.57)]), +time/iter = 0.155, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 21280 (epoch 15), +loss = OrderedDict([('loss_ce', 0.246), ('loss_counter', 0.104), ('loss_bbox', 0.014), ('loss_giou', 0.187), ('loss_self_iou', 0.004), ('cardinality_error', 7.632), ('loss_ce_0', 0.245), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.197), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.632), ('loss_caption_0', 1.646), ('loss_caption', 1.658), ('total_loss', 9.233)]), +time/iter = 0.152, bad_vid = 0.000 + +Validation results of iter 21328: +Bleu_1:0.1927355202990476 +Bleu_2:0.11755729236198051 +Bleu_3:0.06532950485231373 +Bleu_4:0.0318670348131602 +METEOR:0.08966953019840175 +ROUGE_L:0.17549405824640266 +CIDEr:0.5708533801009449 +Recall:0.31055728552993345 +Precision:0.4412863394810881 +soda_c:0.08079399116249976 +para_Bleu_1:0.3847850395827542 +para_Bleu_2:0.23591168028694995 +para_Bleu_3:0.14500000021146267 +para_Bleu_4:0.09097906463153684 +para_METEOR:0.1633729521776342 +para_ROUGE_L:0.33764324525807 +para_CIDEr:0.3225522700715415 + +overall score of iter 21328: 0.5769042868807126 + +Save model at iter 21328 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. Save info to info.json -ID seq2-ft(mix)-gt_percent-1.0 iter 161000 (epoch 16), -loss = OrderedDict([('loss_ce', 0.26), ('loss_counter', 0.103), ('loss_bbox', 0.061), ('loss_giou', 0.163), ('loss_self_iou', 0.097), ('cardinality_error', 3.695), ('loss_ce_0', 0.263), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.171), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.695), ('loss_caption_0', 2.766), ('loss_caption', 2.768), ('total_loss', 13.553)]), -time/iter = 0.749, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 162000 (epoch 16), -loss = OrderedDict([('loss_ce', 0.262), ('loss_counter', 0.103), ('loss_bbox', 0.063), ('loss_giou', 0.164), ('loss_self_iou', 0.091), ('cardinality_error', 3.694), ('loss_ce_0', 0.266), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.694), ('loss_caption_0', 2.768), ('loss_caption', 2.764), ('total_loss', 13.573)]), -time/iter = 0.188, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 163000 (epoch 16), -loss = OrderedDict([('loss_ce', 0.262), ('loss_counter', 0.105), ('loss_bbox', 0.064), ('loss_giou', 0.173), ('loss_self_iou', 0.097), ('cardinality_error', 3.769), ('loss_ce_0', 0.266), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.769), ('loss_caption_0', 2.765), ('loss_caption', 2.766), ('total_loss', 13.63)]), -time/iter = 0.189, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 164000 (epoch 16), -loss = OrderedDict([('loss_ce', 0.265), ('loss_counter', 0.11), ('loss_bbox', 0.061), ('loss_giou', 0.164), ('loss_self_iou', 0.092), ('cardinality_error', 3.774), ('loss_ce_0', 0.269), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.774), ('loss_caption_0', 2.772), ('loss_caption', 2.776), ('total_loss', 13.625)]), -time/iter = 0.190, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 165000 (epoch 16), -loss = OrderedDict([('loss_ce', 0.264), ('loss_counter', 0.102), ('loss_bbox', 0.063), ('loss_giou', 0.164), ('loss_self_iou', 0.092), ('cardinality_error', 3.699), ('loss_ce_0', 0.267), ('loss_counter_0', 0.102), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.699), ('loss_caption_0', 2.711), ('loss_caption', 2.716), ('total_loss', 13.368)]), -time/iter = 0.187, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 166000 (epoch 16), -loss = OrderedDict([('loss_ce', 0.264), ('loss_counter', 0.105), ('loss_bbox', 0.061), ('loss_giou', 0.163), ('loss_self_iou', 0.094), ('cardinality_error', 3.72), ('loss_ce_0', 0.268), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.72), ('loss_caption_0', 2.754), ('loss_caption', 2.755), ('total_loss', 13.534)]), -time/iter = 0.193, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 167000 (epoch 16), -loss = OrderedDict([('loss_ce', 0.261), ('loss_counter', 0.101), ('loss_bbox', 0.062), ('loss_giou', 0.168), ('loss_self_iou', 0.095), ('cardinality_error', 3.712), ('loss_ce_0', 0.266), ('loss_counter_0', 0.1), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.712), ('loss_caption_0', 2.771), ('loss_caption', 2.772), ('total_loss', 13.617)]), -time/iter = 0.192, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 168000 (epoch 16), -loss = OrderedDict([('loss_ce', 0.265), ('loss_counter', 0.108), ('loss_bbox', 0.062), ('loss_giou', 0.168), ('loss_self_iou', 0.09), ('cardinality_error', 3.816), ('loss_ce_0', 0.269), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.816), ('loss_caption_0', 2.814), ('loss_caption', 2.82), ('total_loss', 13.826)]), -time/iter = 0.193, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 169000 (epoch 16), -loss = OrderedDict([('loss_ce', 0.258), ('loss_counter', 0.106), ('loss_bbox', 0.064), ('loss_giou', 0.166), ('loss_self_iou', 0.106), ('cardinality_error', 3.697), ('loss_ce_0', 0.261), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.107), ('cardinality_error_0', 3.697), ('loss_caption_0', 2.769), ('loss_caption', 2.775), ('total_loss', 13.598)]), -time/iter = 0.191, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 170000 (epoch 16), -loss = OrderedDict([('loss_ce', 0.268), ('loss_counter', 0.105), ('loss_bbox', 0.062), ('loss_giou', 0.165), ('loss_self_iou', 0.093), ('cardinality_error', 3.799), ('loss_ce_0', 0.272), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.799), ('loss_caption_0', 2.794), ('loss_caption', 2.798), ('total_loss', 13.727)]), -time/iter = 0.191, bad_vid = 0.000 - -Validation results of iter 170153: -Bleu_1:0.16584280243722227 -Bleu_2:0.08889969905794425 -Bleu_3:0.04569298286173284 -Bleu_4:0.021992960199339176 -METEOR:0.08570833880397384 -ROUGE_L:0.16234979503724006 -CIDEr:0.3170462149966731 -Recall:0.5273397281824633 -Precision:0.5648989898989865 -soda_c:0.058539462474976364 -para_Bleu_1:0.4735378044184376 -para_Bleu_2:0.2855599966961999 -para_Bleu_3:0.17485842077678387 -para_Bleu_4:0.10998333079246524 -para_METEOR:0.16580782598840993 -para_ROUGE_L:0.3184105968751349 -para_CIDEr:0.2144083270960459 - -overall score of iter 170153: 0.4901994838769211 - -Save model at iter 170153 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. -Save Best-model at iter 170153 to checkpoint file. +ID seq2-ft(mix)-gt_percent-1.0 iter 21413 (epoch 16), +loss = OrderedDict([('loss_ce', 0.24), ('loss_counter', 0.107), ('loss_bbox', 0.014), ('loss_giou', 0.175), ('loss_self_iou', 0.004), ('cardinality_error', 7.541), ('loss_ce_0', 0.239), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.541), ('loss_caption_0', 1.637), ('loss_caption', 1.633), ('total_loss', 9.069)]), +time/iter = 0.698, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 21546 (epoch 16), +loss = OrderedDict([('loss_ce', 0.245), ('loss_counter', 0.102), ('loss_bbox', 0.013), ('loss_giou', 0.172), ('loss_self_iou', 0.004), ('cardinality_error', 7.624), ('loss_ce_0', 0.243), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.185), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.624), ('loss_caption_0', 1.773), ('loss_caption', 1.784), ('total_loss', 9.621)]), +time/iter = 0.162, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 21679 (epoch 16), +loss = OrderedDict([('loss_ce', 0.239), ('loss_counter', 0.107), ('loss_bbox', 0.014), ('loss_giou', 0.181), ('loss_self_iou', 0.004), ('cardinality_error', 7.992), ('loss_ce_0', 0.238), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.194), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.992), ('loss_caption_0', 1.809), ('loss_caption', 1.805), ('total_loss', 9.791)]), +time/iter = 0.162, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 21812 (epoch 16), +loss = OrderedDict([('loss_ce', 0.246), ('loss_counter', 0.107), ('loss_bbox', 0.013), ('loss_giou', 0.179), ('loss_self_iou', 0.003), ('cardinality_error', 7.677), ('loss_ce_0', 0.25), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.013), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.677), ('loss_caption_0', 1.674), ('loss_caption', 1.676), ('total_loss', 9.277)]), +time/iter = 0.157, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 21945 (epoch 16), +loss = OrderedDict([('loss_ce', 0.244), ('loss_counter', 0.108), ('loss_bbox', 0.014), ('loss_giou', 0.192), ('loss_self_iou', 0.004), ('cardinality_error', 7.865), ('loss_ce_0', 0.244), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.206), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.865), ('loss_caption_0', 1.713), ('loss_caption', 1.714), ('total_loss', 9.531)]), +time/iter = 0.155, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 22078 (epoch 16), +loss = OrderedDict([('loss_ce', 0.251), ('loss_counter', 0.11), ('loss_bbox', 0.014), ('loss_giou', 0.19), ('loss_self_iou', 0.005), ('cardinality_error', 7.707), ('loss_ce_0', 0.247), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.707), ('loss_caption_0', 1.772), ('loss_caption', 1.758), ('total_loss', 9.738)]), +time/iter = 0.159, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 22211 (epoch 16), +loss = OrderedDict([('loss_ce', 0.249), ('loss_counter', 0.101), ('loss_bbox', 0.013), ('loss_giou', 0.18), ('loss_self_iou', 0.005), ('cardinality_error', 7.541), ('loss_ce_0', 0.249), ('loss_counter_0', 0.101), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.193), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.541), ('loss_caption_0', 1.665), ('loss_caption', 1.66), ('total_loss', 9.243)]), +time/iter = 0.160, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 22344 (epoch 16), +loss = OrderedDict([('loss_ce', 0.246), ('loss_counter', 0.113), ('loss_bbox', 0.015), ('loss_giou', 0.187), ('loss_self_iou', 0.004), ('cardinality_error', 8.008), ('loss_ce_0', 0.248), ('loss_counter_0', 0.115), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.202), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 8.008), ('loss_caption_0', 1.799), ('loss_caption', 1.784), ('total_loss', 9.823)]), +time/iter = 0.163, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 22477 (epoch 16), +loss = OrderedDict([('loss_ce', 0.246), ('loss_counter', 0.102), ('loss_bbox', 0.013), ('loss_giou', 0.184), ('loss_self_iou', 0.004), ('cardinality_error', 7.699), ('loss_ce_0', 0.247), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.197), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.699), ('loss_caption_0', 1.722), ('loss_caption', 1.733), ('total_loss', 9.525)]), +time/iter = 0.159, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 22610 (epoch 16), +loss = OrderedDict([('loss_ce', 0.243), ('loss_counter', 0.106), ('loss_bbox', 0.014), ('loss_giou', 0.188), ('loss_self_iou', 0.004), ('cardinality_error', 7.729), ('loss_ce_0', 0.245), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.2), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.729), ('loss_caption_0', 1.664), ('loss_caption', 1.667), ('total_loss', 9.297)]), +time/iter = 0.154, bad_vid = 0.000 + +Validation results of iter 22661: +Bleu_1:0.1905629005997804 +Bleu_2:0.11689699082903934 +Bleu_3:0.06544029555928756 +Bleu_4:0.03330988693345351 +METEOR:0.08938496175202132 +ROUGE_L:0.17298359351524648 +CIDEr:0.5732307929342625 +Recall:0.309604513071417 +Precision:0.43046524955715343 +soda_c:0.08056479007503722 +para_Bleu_1:0.3975304274857351 +para_Bleu_2:0.24253918136446623 +para_Bleu_3:0.14848895422464012 +para_Bleu_4:0.09337330751749118 +para_METEOR:0.16677196164785574 +para_ROUGE_L:0.33750187221117683 +para_CIDEr:0.31278894258081524 + +overall score of iter 22661: 0.5729342117461622 + +Save model at iter 22661 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. Save info to info.json -ID seq2-ft(mix)-gt_percent-1.0 iter 171000 (epoch 17), -loss = OrderedDict([('loss_ce', 0.256), ('loss_counter', 0.101), ('loss_bbox', 0.062), ('loss_giou', 0.161), ('loss_self_iou', 0.094), ('cardinality_error', 3.694), ('loss_ce_0', 0.261), ('loss_counter_0', 0.101), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.169), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.694), ('loss_caption_0', 2.772), ('loss_caption', 2.77), ('total_loss', 13.544)]), -time/iter = 0.745, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 172000 (epoch 17), -loss = OrderedDict([('loss_ce', 0.258), ('loss_counter', 0.1), ('loss_bbox', 0.063), ('loss_giou', 0.165), ('loss_self_iou', 0.096), ('cardinality_error', 3.667), ('loss_ce_0', 0.262), ('loss_counter_0', 0.1), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.667), ('loss_caption_0', 2.741), ('loss_caption', 2.743), ('total_loss', 13.47)]), -time/iter = 0.191, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 173000 (epoch 17), -loss = OrderedDict([('loss_ce', 0.258), ('loss_counter', 0.104), ('loss_bbox', 0.062), ('loss_giou', 0.165), ('loss_self_iou', 0.09), ('cardinality_error', 3.753), ('loss_ce_0', 0.261), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.091), ('cardinality_error_0', 3.753), ('loss_caption_0', 2.786), ('loss_caption', 2.785), ('total_loss', 13.646)]), -time/iter = 0.193, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 174000 (epoch 17), -loss = OrderedDict([('loss_ce', 0.259), ('loss_counter', 0.107), ('loss_bbox', 0.06), ('loss_giou', 0.166), ('loss_self_iou', 0.094), ('cardinality_error', 3.832), ('loss_ce_0', 0.261), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.177), ('loss_self_iou_0', 0.096), ('cardinality_error_0', 3.832), ('loss_caption_0', 2.733), ('loss_caption', 2.738), ('total_loss', 13.457)]), -time/iter = 0.191, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 175000 (epoch 17), -loss = OrderedDict([('loss_ce', 0.255), ('loss_counter', 0.103), ('loss_bbox', 0.06), ('loss_giou', 0.163), ('loss_self_iou', 0.098), ('cardinality_error', 3.731), ('loss_ce_0', 0.259), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.062), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.731), ('loss_caption_0', 2.745), ('loss_caption', 2.744), ('total_loss', 13.454)]), -time/iter = 0.191, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 176000 (epoch 17), -loss = OrderedDict([('loss_ce', 0.261), ('loss_counter', 0.103), ('loss_bbox', 0.06), ('loss_giou', 0.164), ('loss_self_iou', 0.095), ('cardinality_error', 3.795), ('loss_ce_0', 0.264), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.095), ('cardinality_error_0', 3.795), ('loss_caption_0', 2.761), ('loss_caption', 2.77), ('total_loss', 13.575)]), -time/iter = 0.192, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 177000 (epoch 17), -loss = OrderedDict([('loss_ce', 0.255), ('loss_counter', 0.1), ('loss_bbox', 0.063), ('loss_giou', 0.161), ('loss_self_iou', 0.096), ('cardinality_error', 3.652), ('loss_ce_0', 0.261), ('loss_counter_0', 0.1), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.169), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.652), ('loss_caption_0', 2.743), ('loss_caption', 2.745), ('total_loss', 13.43)]), -time/iter = 0.190, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 178000 (epoch 17), -loss = OrderedDict([('loss_ce', 0.255), ('loss_counter', 0.103), ('loss_bbox', 0.063), ('loss_giou', 0.164), ('loss_self_iou', 0.103), ('cardinality_error', 3.664), ('loss_ce_0', 0.26), ('loss_counter_0', 0.102), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.104), ('cardinality_error_0', 3.664), ('loss_caption_0', 2.682), ('loss_caption', 2.68), ('total_loss', 13.211)]), -time/iter = 0.188, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 179000 (epoch 17), -loss = OrderedDict([('loss_ce', 0.261), ('loss_counter', 0.105), ('loss_bbox', 0.06), ('loss_giou', 0.164), ('loss_self_iou', 0.09), ('cardinality_error', 3.825), ('loss_ce_0', 0.266), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.091), ('cardinality_error_0', 3.825), ('loss_caption_0', 2.788), ('loss_caption', 2.796), ('total_loss', 13.671)]), -time/iter = 0.193, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 180000 (epoch 17), -loss = OrderedDict([('loss_ce', 0.255), ('loss_counter', 0.102), ('loss_bbox', 0.064), ('loss_giou', 0.166), ('loss_self_iou', 0.093), ('cardinality_error', 3.729), ('loss_ce_0', 0.261), ('loss_counter_0', 0.102), ('loss_bbox_0', 0.066), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.729), ('loss_caption_0', 2.781), ('loss_caption', 2.775), ('total_loss', 13.608)]), -time/iter = 0.192, bad_vid = 0.000 - -Validation results of iter 180162: -Bleu_1:0.16720622564646215 -Bleu_2:0.08946643461131876 -Bleu_3:0.04568137095423273 -Bleu_4:0.022039722503534608 -METEOR:0.08588931176535387 -ROUGE_L:0.16315869782389542 -CIDEr:0.32099741016990446 -Recall:0.5265047853249455 -Precision:0.5647345942647923 -soda_c:0.05847424883094643 -para_Bleu_1:0.47508155945278135 -para_Bleu_2:0.2858233856765029 -para_Bleu_3:0.17499503512152859 -para_Bleu_4:0.11002968407978216 -para_METEOR:0.16541373751181562 -para_ROUGE_L:0.3190110890037882 -para_CIDEr:0.21421557986951392 - -overall score of iter 180162: 0.4896590014611117 - -Save model at iter 180162 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. +ID seq2-ft(mix)-gt_percent-1.0 iter 22743 (epoch 17), +loss = OrderedDict([('loss_ce', 0.244), ('loss_counter', 0.108), ('loss_bbox', 0.015), ('loss_giou', 0.196), ('loss_self_iou', 0.005), ('cardinality_error', 7.714), ('loss_ce_0', 0.244), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.017), ('loss_giou_0', 0.21), ('loss_self_iou_0', 0.006), ('cardinality_error_0', 7.714), ('loss_caption_0', 1.773), ('loss_caption', 1.775), ('total_loss', 9.803)]), +time/iter = 0.714, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 22876 (epoch 17), +loss = OrderedDict([('loss_ce', 0.245), ('loss_counter', 0.11), ('loss_bbox', 0.013), ('loss_giou', 0.181), ('loss_self_iou', 0.004), ('cardinality_error', 7.774), ('loss_ce_0', 0.249), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.774), ('loss_caption_0', 1.76), ('loss_caption', 1.759), ('total_loss', 9.631)]), +time/iter = 0.157, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 23009 (epoch 17), +loss = OrderedDict([('loss_ce', 0.237), ('loss_counter', 0.105), ('loss_bbox', 0.012), ('loss_giou', 0.171), ('loss_self_iou', 0.003), ('cardinality_error', 7.872), ('loss_ce_0', 0.237), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.183), ('loss_self_iou_0', 0.003), ('cardinality_error_0', 7.872), ('loss_caption_0', 1.69), ('loss_caption', 1.688), ('total_loss', 9.229)]), +time/iter = 0.161, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 23142 (epoch 17), +loss = OrderedDict([('loss_ce', 0.242), ('loss_counter', 0.098), ('loss_bbox', 0.013), ('loss_giou', 0.177), ('loss_self_iou', 0.004), ('cardinality_error', 7.744), ('loss_ce_0', 0.239), ('loss_counter_0', 0.1), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.744), ('loss_caption_0', 1.66), ('loss_caption', 1.663), ('total_loss', 9.173)]), +time/iter = 0.157, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 23275 (epoch 17), +loss = OrderedDict([('loss_ce', 0.242), ('loss_counter', 0.108), ('loss_bbox', 0.014), ('loss_giou', 0.183), ('loss_self_iou', 0.004), ('cardinality_error', 7.82), ('loss_ce_0', 0.242), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.197), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.82), ('loss_caption_0', 1.727), ('loss_caption', 1.741), ('total_loss', 9.535)]), +time/iter = 0.160, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 23408 (epoch 17), +loss = OrderedDict([('loss_ce', 0.235), ('loss_counter', 0.104), ('loss_bbox', 0.014), ('loss_giou', 0.173), ('loss_self_iou', 0.004), ('cardinality_error', 7.083), ('loss_ce_0', 0.235), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.182), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.083), ('loss_caption_0', 1.678), ('loss_caption', 1.68), ('total_loss', 9.181)]), +time/iter = 0.153, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 23541 (epoch 17), +loss = OrderedDict([('loss_ce', 0.25), ('loss_counter', 0.112), ('loss_bbox', 0.013), ('loss_giou', 0.185), ('loss_self_iou', 0.003), ('cardinality_error', 7.782), ('loss_ce_0', 0.253), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.197), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.782), ('loss_caption_0', 1.686), ('loss_caption', 1.674), ('total_loss', 9.361)]), +time/iter = 0.158, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 23674 (epoch 17), +loss = OrderedDict([('loss_ce', 0.242), ('loss_counter', 0.104), ('loss_bbox', 0.013), ('loss_giou', 0.175), ('loss_self_iou', 0.004), ('cardinality_error', 7.699), ('loss_ce_0', 0.242), ('loss_counter_0', 0.106), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.188), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.699), ('loss_caption_0', 1.734), ('loss_caption', 1.755), ('total_loss', 9.502)]), +time/iter = 0.169, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 23807 (epoch 17), +loss = OrderedDict([('loss_ce', 0.247), ('loss_counter', 0.109), ('loss_bbox', 0.013), ('loss_giou', 0.188), ('loss_self_iou', 0.004), ('cardinality_error', 8.023), ('loss_ce_0', 0.248), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.199), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 8.023), ('loss_caption_0', 1.838), ('loss_caption', 1.842), ('total_loss', 10.01)]), +time/iter = 0.176, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 23940 (epoch 17), +loss = OrderedDict([('loss_ce', 0.242), ('loss_counter', 0.107), ('loss_bbox', 0.013), ('loss_giou', 0.178), ('loss_self_iou', 0.004), ('cardinality_error', 7.789), ('loss_ce_0', 0.246), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.189), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.789), ('loss_caption_0', 1.661), ('loss_caption', 1.655), ('total_loss', 9.188)]), +time/iter = 0.168, bad_vid = 0.000 + +Validation results of iter 23994: +Bleu_1:0.19099469488969467 +Bleu_2:0.11646897839764006 +Bleu_3:0.06451308365995856 +Bleu_4:0.032200079484133 +METEOR:0.08912416771202449 +ROUGE_L:0.1730757893125124 +CIDEr:0.5693051160396969 +Recall:0.3097042977992106 +Precision:0.43274547601681085 +soda_c:0.08084297498321232 +para_Bleu_1:0.3924031546442418 +para_Bleu_2:0.23911474626028398 +para_Bleu_3:0.14600811918196227 +para_Bleu_4:0.09107950853175292 +para_METEOR:0.16594454181978452 +para_ROUGE_L:0.33729101832099057 +para_CIDEr:0.30892642009784 + +overall score of iter 23994: 0.5659504704493774 + +Save model at iter 23994 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. Save info to info.json -ID seq2-ft(mix)-gt_percent-1.0 iter 181000 (epoch 18), -loss = OrderedDict([('loss_ce', 0.256), ('loss_counter', 0.102), ('loss_bbox', 0.061), ('loss_giou', 0.163), ('loss_self_iou', 0.094), ('cardinality_error', 3.781), ('loss_ce_0', 0.261), ('loss_counter_0', 0.102), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.172), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.781), ('loss_caption_0', 2.743), ('loss_caption', 2.746), ('total_loss', 13.452)]), -time/iter = 0.750, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 182000 (epoch 18), -loss = OrderedDict([('loss_ce', 0.255), ('loss_counter', 0.101), ('loss_bbox', 0.062), ('loss_giou', 0.164), ('loss_self_iou', 0.1), ('cardinality_error', 3.726), ('loss_ce_0', 0.26), ('loss_counter_0', 0.101), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.1), ('cardinality_error_0', 3.726), ('loss_caption_0', 2.748), ('loss_caption', 2.746), ('total_loss', 13.472)]), -time/iter = 0.189, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 183000 (epoch 18), -loss = OrderedDict([('loss_ce', 0.256), ('loss_counter', 0.102), ('loss_bbox', 0.061), ('loss_giou', 0.163), ('loss_self_iou', 0.097), ('cardinality_error', 3.722), ('loss_ce_0', 0.26), ('loss_counter_0', 0.102), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.722), ('loss_caption_0', 2.729), ('loss_caption', 2.734), ('total_loss', 13.405)]), -time/iter = 0.191, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 184000 (epoch 18), -loss = OrderedDict([('loss_ce', 0.253), ('loss_counter', 0.104), ('loss_bbox', 0.061), ('loss_giou', 0.161), ('loss_self_iou', 0.098), ('cardinality_error', 3.726), ('loss_ce_0', 0.257), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.17), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.726), ('loss_caption_0', 2.783), ('loss_caption', 2.787), ('total_loss', 13.591)]), -time/iter = 0.191, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 185000 (epoch 18), -loss = OrderedDict([('loss_ce', 0.255), ('loss_counter', 0.098), ('loss_bbox', 0.063), ('loss_giou', 0.165), ('loss_self_iou', 0.087), ('cardinality_error', 3.667), ('loss_ce_0', 0.26), ('loss_counter_0', 0.098), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.088), ('cardinality_error_0', 3.667), ('loss_caption_0', 2.718), ('loss_caption', 2.716), ('total_loss', 13.354)]), -time/iter = 0.188, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 186000 (epoch 18), -loss = OrderedDict([('loss_ce', 0.254), ('loss_counter', 0.099), ('loss_bbox', 0.062), ('loss_giou', 0.166), ('loss_self_iou', 0.093), ('cardinality_error', 3.776), ('loss_ce_0', 0.259), ('loss_counter_0', 0.099), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.094), ('cardinality_error_0', 3.776), ('loss_caption_0', 2.75), ('loss_caption', 2.75), ('total_loss', 13.494)]), -time/iter = 0.194, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 187000 (epoch 18), -loss = OrderedDict([('loss_ce', 0.258), ('loss_counter', 0.109), ('loss_bbox', 0.062), ('loss_giou', 0.165), ('loss_self_iou', 0.089), ('cardinality_error', 3.803), ('loss_ce_0', 0.264), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.09), ('cardinality_error_0', 3.803), ('loss_caption_0', 2.788), ('loss_caption', 2.791), ('total_loss', 13.678)]), -time/iter = 0.198, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 188000 (epoch 18), -loss = OrderedDict([('loss_ce', 0.253), ('loss_counter', 0.1), ('loss_bbox', 0.062), ('loss_giou', 0.163), ('loss_self_iou', 0.091), ('cardinality_error', 3.71), ('loss_ce_0', 0.259), ('loss_counter_0', 0.1), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.71), ('loss_caption_0', 2.745), ('loss_caption', 2.743), ('total_loss', 13.444)]), -time/iter = 0.192, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 189000 (epoch 18), -loss = OrderedDict([('loss_ce', 0.25), ('loss_counter', 0.105), ('loss_bbox', 0.064), ('loss_giou', 0.165), ('loss_self_iou', 0.1), ('cardinality_error', 3.748), ('loss_ce_0', 0.256), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.067), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.748), ('loss_caption_0', 2.751), ('loss_caption', 2.753), ('total_loss', 13.484)]), -time/iter = 0.188, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 190000 (epoch 18), -loss = OrderedDict([('loss_ce', 0.257), ('loss_counter', 0.104), ('loss_bbox', 0.06), ('loss_giou', 0.161), ('loss_self_iou', 0.098), ('cardinality_error', 3.742), ('loss_ce_0', 0.264), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.172), ('loss_self_iou_0', 0.099), ('cardinality_error_0', 3.742), ('loss_caption_0', 2.729), ('loss_caption', 2.73), ('total_loss', 13.395)]), -time/iter = 0.189, bad_vid = 0.000 - -Validation results of iter 190171: -Bleu_1:0.1662475028889873 -Bleu_2:0.08895418147726737 -Bleu_3:0.04559170272578064 -Bleu_4:0.021869443641790748 -METEOR:0.0853620749347768 -ROUGE_L:0.16226693807975517 -CIDEr:0.3203697867996399 -Recall:0.5243080966273422 -Precision:0.5592002237136435 -soda_c:0.058066485957305666 -para_Bleu_1:0.47302383939773723 -para_Bleu_2:0.2848420020452884 -para_Bleu_3:0.17477626094199183 -para_Bleu_4:0.11005159892431456 -para_METEOR:0.16474042555391544 -para_ROUGE_L:0.31754161420686944 -para_CIDEr:0.2082818020277855 - -overall score of iter 190171: 0.4830738265060155 - -Save model at iter 190171 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. +ID seq2-ft(mix)-gt_percent-1.0 iter 24073 (epoch 18), +loss = OrderedDict([('loss_ce', 0.244), ('loss_counter', 0.11), ('loss_bbox', 0.012), ('loss_giou', 0.178), ('loss_self_iou', 0.003), ('cardinality_error', 7.97), ('loss_ce_0', 0.246), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.013), ('loss_giou_0', 0.191), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.97), ('loss_caption_0', 1.689), ('loss_caption', 1.683), ('total_loss', 9.309)]), +time/iter = 0.720, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 24206 (epoch 18), +loss = OrderedDict([('loss_ce', 0.237), ('loss_counter', 0.118), ('loss_bbox', 0.013), ('loss_giou', 0.183), ('loss_self_iou', 0.005), ('cardinality_error', 8.286), ('loss_ce_0', 0.236), ('loss_counter_0', 0.118), ('loss_bbox_0', 0.013), ('loss_giou_0', 0.195), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 8.286), ('loss_caption_0', 1.712), ('loss_caption', 1.715), ('total_loss', 9.432)]), +time/iter = 0.161, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 24339 (epoch 18), +loss = OrderedDict([('loss_ce', 0.245), ('loss_counter', 0.098), ('loss_bbox', 0.012), ('loss_giou', 0.167), ('loss_self_iou', 0.003), ('cardinality_error', 7.316), ('loss_ce_0', 0.247), ('loss_counter_0', 0.099), ('loss_bbox_0', 0.013), ('loss_giou_0', 0.179), ('loss_self_iou_0', 0.003), ('cardinality_error_0', 7.316), ('loss_caption_0', 1.695), ('loss_caption', 1.701), ('total_loss', 9.257)]), +time/iter = 0.159, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 24472 (epoch 18), +loss = OrderedDict([('loss_ce', 0.243), ('loss_counter', 0.108), ('loss_bbox', 0.013), ('loss_giou', 0.176), ('loss_self_iou', 0.003), ('cardinality_error', 7.459), ('loss_ce_0', 0.248), ('loss_counter_0', 0.109), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.187), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.459), ('loss_caption_0', 1.699), ('loss_caption', 1.699), ('total_loss', 9.337)]), +time/iter = 0.158, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 24605 (epoch 18), +loss = OrderedDict([('loss_ce', 0.242), ('loss_counter', 0.103), ('loss_bbox', 0.014), ('loss_giou', 0.18), ('loss_self_iou', 0.004), ('cardinality_error', 7.812), ('loss_ce_0', 0.243), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.189), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.812), ('loss_caption_0', 1.775), ('loss_caption', 1.773), ('total_loss', 9.644)]), +time/iter = 0.163, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 24738 (epoch 18), +loss = OrderedDict([('loss_ce', 0.243), ('loss_counter', 0.101), ('loss_bbox', 0.016), ('loss_giou', 0.187), ('loss_self_iou', 0.004), ('cardinality_error', 7.556), ('loss_ce_0', 0.246), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.016), ('loss_giou_0', 0.196), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.556), ('loss_caption_0', 1.727), ('loss_caption', 1.73), ('total_loss', 9.525)]), +time/iter = 0.166, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 24871 (epoch 18), +loss = OrderedDict([('loss_ce', 0.239), ('loss_counter', 0.104), ('loss_bbox', 0.013), ('loss_giou', 0.181), ('loss_self_iou', 0.004), ('cardinality_error', 7.692), ('loss_ce_0', 0.241), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.692), ('loss_caption_0', 1.77), ('loss_caption', 1.773), ('total_loss', 9.641)]), +time/iter = 0.154, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 25004 (epoch 18), +loss = OrderedDict([('loss_ce', 0.246), ('loss_counter', 0.109), ('loss_bbox', 0.013), ('loss_giou', 0.186), ('loss_self_iou', 0.004), ('cardinality_error', 8.143), ('loss_ce_0', 0.247), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.197), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 8.143), ('loss_caption_0', 1.692), ('loss_caption', 1.684), ('total_loss', 9.379)]), +time/iter = 0.151, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 25137 (epoch 18), +loss = OrderedDict([('loss_ce', 0.245), ('loss_counter', 0.111), ('loss_bbox', 0.014), ('loss_giou', 0.179), ('loss_self_iou', 0.004), ('cardinality_error', 7.88), ('loss_ce_0', 0.245), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.88), ('loss_caption_0', 1.691), ('loss_caption', 1.696), ('total_loss', 9.347)]), +time/iter = 0.154, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 25270 (epoch 18), +loss = OrderedDict([('loss_ce', 0.237), ('loss_counter', 0.103), ('loss_bbox', 0.014), ('loss_giou', 0.185), ('loss_self_iou', 0.004), ('cardinality_error', 7.767), ('loss_ce_0', 0.238), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.196), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.767), ('loss_caption_0', 1.687), ('loss_caption', 1.694), ('total_loss', 9.34)]), +time/iter = 0.146, bad_vid = 0.000 + +Validation results of iter 25327: +Bleu_1:0.19191750615066444 +Bleu_2:0.11783589874301872 +Bleu_3:0.06597231596326529 +Bleu_4:0.03167603834812624 +METEOR:0.08996609888818348 +ROUGE_L:0.1746391859525846 +CIDEr:0.5689023016363987 +Recall:0.31503357525649683 +Precision:0.4376628112951966 +soda_c:0.08097707611185051 +para_Bleu_1:0.3977375551078834 +para_Bleu_2:0.24323062675170298 +para_Bleu_3:0.1488548587270082 +para_Bleu_4:0.09292110149283073 +para_METEOR:0.16716298804356167 +para_ROUGE_L:0.33781551083855066 +para_CIDEr:0.31014493696748857 + +overall score of iter 25327: 0.570229026503881 + +Save model at iter 25327 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. Save info to info.json -ID seq2-ft(mix)-gt_percent-1.0 iter 191000 (epoch 19), -loss = OrderedDict([('loss_ce', 0.251), ('loss_counter', 0.099), ('loss_bbox', 0.062), ('loss_giou', 0.167), ('loss_self_iou', 0.086), ('cardinality_error', 3.653), ('loss_ce_0', 0.257), ('loss_counter_0', 0.099), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.087), ('cardinality_error_0', 3.653), ('loss_caption_0', 2.754), ('loss_caption', 2.752), ('total_loss', 13.501)]), -time/iter = 0.755, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 192000 (epoch 19), -loss = OrderedDict([('loss_ce', 0.252), ('loss_counter', 0.1), ('loss_bbox', 0.061), ('loss_giou', 0.164), ('loss_self_iou', 0.094), ('cardinality_error', 3.767), ('loss_ce_0', 0.258), ('loss_counter_0', 0.1), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.097), ('cardinality_error_0', 3.767), ('loss_caption_0', 2.717), ('loss_caption', 2.72), ('total_loss', 13.343)]), -time/iter = 0.188, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 193000 (epoch 19), -loss = OrderedDict([('loss_ce', 0.25), ('loss_counter', 0.106), ('loss_bbox', 0.06), ('loss_giou', 0.164), ('loss_self_iou', 0.093), ('cardinality_error', 3.847), ('loss_ce_0', 0.256), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.174), ('loss_self_iou_0', 0.093), ('cardinality_error_0', 3.847), ('loss_caption_0', 2.754), ('loss_caption', 2.759), ('total_loss', 13.499)]), -time/iter = 0.190, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 194000 (epoch 19), -loss = OrderedDict([('loss_ce', 0.256), ('loss_counter', 0.102), ('loss_bbox', 0.061), ('loss_giou', 0.165), ('loss_self_iou', 0.097), ('cardinality_error', 3.775), ('loss_ce_0', 0.262), ('loss_counter_0', 0.102), ('loss_bbox_0', 0.063), ('loss_giou_0', 0.176), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.775), ('loss_caption_0', 2.769), ('loss_caption', 2.772), ('total_loss', 13.587)]), -time/iter = 0.192, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 195000 (epoch 19), -loss = OrderedDict([('loss_ce', 0.257), ('loss_counter', 0.106), ('loss_bbox', 0.062), ('loss_giou', 0.165), ('loss_self_iou', 0.089), ('cardinality_error', 3.794), ('loss_ce_0', 0.261), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.175), ('loss_self_iou_0', 0.089), ('cardinality_error_0', 3.794), ('loss_caption_0', 2.751), ('loss_caption', 2.751), ('total_loss', 13.506)]), -time/iter = 0.191, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 196000 (epoch 19), -loss = OrderedDict([('loss_ce', 0.251), ('loss_counter', 0.095), ('loss_bbox', 0.061), ('loss_giou', 0.162), ('loss_self_iou', 0.1), ('cardinality_error', 3.652), ('loss_ce_0', 0.258), ('loss_counter_0', 0.095), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.172), ('loss_self_iou_0', 0.101), ('cardinality_error_0', 3.652), ('loss_caption_0', 2.743), ('loss_caption', 2.735), ('total_loss', 13.403)]), -time/iter = 0.191, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 197000 (epoch 19), -loss = OrderedDict([('loss_ce', 0.251), ('loss_counter', 0.104), ('loss_bbox', 0.061), ('loss_giou', 0.162), ('loss_self_iou', 0.091), ('cardinality_error', 3.759), ('loss_ce_0', 0.258), ('loss_counter_0', 0.104), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.171), ('loss_self_iou_0', 0.091), ('cardinality_error_0', 3.759), ('loss_caption_0', 2.74), ('loss_caption', 2.743), ('total_loss', 13.418)]), -time/iter = 0.191, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 198000 (epoch 19), -loss = OrderedDict([('loss_ce', 0.249), ('loss_counter', 0.098), ('loss_bbox', 0.062), ('loss_giou', 0.162), ('loss_self_iou', 0.092), ('cardinality_error', 3.664), ('loss_ce_0', 0.255), ('loss_counter_0', 0.098), ('loss_bbox_0', 0.064), ('loss_giou_0', 0.171), ('loss_self_iou_0', 0.092), ('cardinality_error_0', 3.664), ('loss_caption_0', 2.718), ('loss_caption', 2.72), ('total_loss', 13.31)]), -time/iter = 0.190, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 199000 (epoch 19), -loss = OrderedDict([('loss_ce', 0.252), ('loss_counter', 0.101), ('loss_bbox', 0.062), ('loss_giou', 0.162), ('loss_self_iou', 0.101), ('cardinality_error', 3.736), ('loss_ce_0', 0.257), ('loss_counter_0', 0.101), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.173), ('loss_self_iou_0', 0.102), ('cardinality_error_0', 3.736), ('loss_caption_0', 2.759), ('loss_caption', 2.76), ('total_loss', 13.502)]), -time/iter = 0.193, bad_vid = 0.000 -ID seq2-ft(mix)-gt_percent-1.0 iter 200000 (epoch 19), -loss = OrderedDict([('loss_ce', 0.253), ('loss_counter', 0.102), ('loss_bbox', 0.061), ('loss_giou', 0.159), ('loss_self_iou', 0.098), ('cardinality_error', 3.701), ('loss_ce_0', 0.259), ('loss_counter_0', 0.102), ('loss_bbox_0', 0.065), ('loss_giou_0', 0.17), ('loss_self_iou_0', 0.098), ('cardinality_error_0', 3.701), ('loss_caption_0', 2.766), ('loss_caption', 2.771), ('total_loss', 13.518)]), -time/iter = 0.190, bad_vid = 0.000 - -Validation results of iter 200180: -Bleu_1:0.16600244771432068 -Bleu_2:0.08859363359362551 -Bleu_3:0.045174799285766926 -Bleu_4:0.021453706973694267 -METEOR:0.08469975853590762 -ROUGE_L:0.1615333099598977 -CIDEr:0.3178372173219055 -Recall:0.5270524681293403 -Precision:0.5612365263371945 -soda_c:0.05852570981425518 -para_Bleu_1:0.47641872729084495 -para_Bleu_2:0.28679556025023933 -para_Bleu_3:0.1757988669447671 -para_Bleu_4:0.11061748158923715 -para_METEOR:0.1647238014039032 -para_ROUGE_L:0.3182336912910021 -para_CIDEr:0.21852415031403352 - -overall score of iter 200180: 0.4938654333071738 - -Save model at iter 200180 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-anet_anet_ori_pbox(similarity_op_order_v2)_CLIP/similarity_op_order_v2_topf30_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_2/model-last.pth. -Save Best-model at iter 200180 to checkpoint file. +ID seq2-ft(mix)-gt_percent-1.0 iter 25403 (epoch 19), +loss = OrderedDict([('loss_ce', 0.247), ('loss_counter', 0.102), ('loss_bbox', 0.013), ('loss_giou', 0.176), ('loss_self_iou', 0.005), ('cardinality_error', 7.429), ('loss_ce_0', 0.248), ('loss_counter_0', 0.105), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.186), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.429), ('loss_caption_0', 1.705), ('loss_caption', 1.695), ('total_loss', 9.343)]), +time/iter = 0.723, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 25536 (epoch 19), +loss = OrderedDict([('loss_ce', 0.241), ('loss_counter', 0.107), ('loss_bbox', 0.013), ('loss_giou', 0.189), ('loss_self_iou', 0.003), ('cardinality_error', 7.887), ('loss_ce_0', 0.246), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.196), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.887), ('loss_caption_0', 1.717), ('loss_caption', 1.729), ('total_loss', 9.517)]), +time/iter = 0.163, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 25669 (epoch 19), +loss = OrderedDict([('loss_ce', 0.239), ('loss_counter', 0.111), ('loss_bbox', 0.014), ('loss_giou', 0.177), ('loss_self_iou', 0.004), ('cardinality_error', 7.707), ('loss_ce_0', 0.243), ('loss_counter_0', 0.111), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.186), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.707), ('loss_caption_0', 1.718), ('loss_caption', 1.711), ('total_loss', 9.385)]), +time/iter = 0.161, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 25802 (epoch 19), +loss = OrderedDict([('loss_ce', 0.24), ('loss_counter', 0.111), ('loss_bbox', 0.013), ('loss_giou', 0.183), ('loss_self_iou', 0.004), ('cardinality_error', 8.173), ('loss_ce_0', 0.242), ('loss_counter_0', 0.113), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.193), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 8.173), ('loss_caption_0', 1.732), ('loss_caption', 1.735), ('total_loss', 9.515)]), +time/iter = 0.162, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 25935 (epoch 19), +loss = OrderedDict([('loss_ce', 0.241), ('loss_counter', 0.105), ('loss_bbox', 0.013), ('loss_giou', 0.179), ('loss_self_iou', 0.005), ('cardinality_error', 7.82), ('loss_ce_0', 0.241), ('loss_counter_0', 0.107), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.192), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.82), ('loss_caption_0', 1.626), ('loss_caption', 1.628), ('total_loss', 9.063)]), +time/iter = 0.153, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 26068 (epoch 19), +loss = OrderedDict([('loss_ce', 0.24), ('loss_counter', 0.102), ('loss_bbox', 0.014), ('loss_giou', 0.182), ('loss_self_iou', 0.005), ('cardinality_error', 7.444), ('loss_ce_0', 0.243), ('loss_counter_0', 0.103), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.19), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.444), ('loss_caption_0', 1.697), ('loss_caption', 1.701), ('total_loss', 9.35)]), +time/iter = 0.162, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 26201 (epoch 19), +loss = OrderedDict([('loss_ce', 0.239), ('loss_counter', 0.097), ('loss_bbox', 0.014), ('loss_giou', 0.168), ('loss_self_iou', 0.005), ('cardinality_error', 7.301), ('loss_ce_0', 0.237), ('loss_counter_0', 0.099), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.181), ('loss_self_iou_0', 0.005), ('cardinality_error_0', 7.301), ('loss_caption_0', 1.702), ('loss_caption', 1.703), ('total_loss', 9.254)]), +time/iter = 0.161, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 26334 (epoch 19), +loss = OrderedDict([('loss_ce', 0.238), ('loss_counter', 0.112), ('loss_bbox', 0.013), ('loss_giou', 0.174), ('loss_self_iou', 0.003), ('cardinality_error', 7.827), ('loss_ce_0', 0.242), ('loss_counter_0', 0.112), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.188), ('loss_self_iou_0', 0.003), ('cardinality_error_0', 7.827), ('loss_caption_0', 1.729), ('loss_caption', 1.725), ('total_loss', 9.424)]), +time/iter = 0.164, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 26467 (epoch 19), +loss = OrderedDict([('loss_ce', 0.247), ('loss_counter', 0.109), ('loss_bbox', 0.014), ('loss_giou', 0.181), ('loss_self_iou', 0.003), ('cardinality_error', 8.023), ('loss_ce_0', 0.245), ('loss_counter_0', 0.11), ('loss_bbox_0', 0.015), ('loss_giou_0', 0.195), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 8.023), ('loss_caption_0', 1.751), ('loss_caption', 1.746), ('total_loss', 9.586)]), +time/iter = 0.162, bad_vid = 0.000 +ID seq2-ft(mix)-gt_percent-1.0 iter 26600 (epoch 19), +loss = OrderedDict([('loss_ce', 0.242), ('loss_counter', 0.108), ('loss_bbox', 0.014), ('loss_giou', 0.186), ('loss_self_iou', 0.004), ('cardinality_error', 7.902), ('loss_ce_0', 0.242), ('loss_counter_0', 0.108), ('loss_bbox_0', 0.014), ('loss_giou_0', 0.196), ('loss_self_iou_0', 0.004), ('cardinality_error_0', 7.902), ('loss_caption_0', 1.727), ('loss_caption', 1.737), ('total_loss', 9.533)]), +time/iter = 0.156, bad_vid = 0.000 + +Validation results of iter 26660: +Bleu_1:0.1908811984292725 +Bleu_2:0.11664270449592412 +Bleu_3:0.06546844271584715 +Bleu_4:0.03266470081303028 +METEOR:0.08981101020496235 +ROUGE_L:0.17382953846907112 +CIDEr:0.5716745559959934 +Recall:0.31292035599338697 +Precision:0.4345220728699943 +soda_c:0.08127095018359767 +para_Bleu_1:0.40170065588267356 +para_Bleu_2:0.2447870245859959 +para_Bleu_3:0.14990588787772124 +para_Bleu_4:0.09419227635900729 +para_METEOR:0.16780671784283924 +para_ROUGE_L:0.33845945539662686 +para_CIDEr:0.3198675630646056 + +overall score of iter 26660: 0.5818665572664521 + +Save model at iter 26660 to /mnt/data/pjlab-3090-sport/wuhao/logs/dibs/howto-yc2_yc2_ori_pbox(similarity_op_order_v2)_Uni/similarity_op_order_v2_topf25_iter3_r1_th1_refine_aug(8,0.02)_top3_2stage_ins_cap_topk_mil_coef0_noFocal_seq2-ft(mix)-gt_percent-1.0_1/model-last.pth. Save info to info.json -Best epoch: 10 +Best epoch: 11 Best Model Performance: -Bleu_1:0.1671778590456048 -Bleu_2:0.09077014613023152 -Bleu_3:0.0476684747303012 -Bleu_4:0.02445564298599047 -METEOR:0.08933235383587503 -ROUGE_L:0.1654660162888944 -CIDEr:0.31886265111118334 -Recall:0.5314017615268335 -Precision:0.5831469052945512 -soda_c:0.05853263249839839 -para_Bleu_1:0.46544090189732323 -para_Bleu_2:0.2789325258737778 -para_Bleu_3:0.17172911957785325 -para_Bleu_4:0.10903514181091935 -para_METEOR:0.16550159188298816 -para_ROUGE_L:0.3181118223429575 -para_CIDEr:0.2056618808195008 +Bleu_1:0.1989422607268001 +Bleu_2:0.12223038556953512 +Bleu_3:0.06835990671747892 +Bleu_4:0.03486159828438583 +METEOR:0.09408978838449876 +ROUGE_L:0.18200142867223945 +CIDEr:0.593480700759431 +Recall:0.30795469953703025 +Precision:0.4513424333993264 +soda_c:0.0796861065455984 +para_Bleu_1:0.39594509057043764 +para_Bleu_2:0.24087109399513515 +para_Bleu_3:0.14790262814870953 +para_Bleu_4:0.09321042711819619 +para_METEOR:0.1655617051143519 +para_ROUGE_L:0.3391051008488012 +para_CIDEr:0.32807196750555834 avg_proposal_number:-1 -Best Overall Score epoch10: 1.5812763042668414 +Best Overall Score epoch11: 1.5265537286258848 diff --git a/val.log b/val.log index 2937f5d88e3790d388f53f3845a2179514931da2..76f83c2963d4b440439af55ee7506b115beba8c3 100644 --- a/val.log +++ b/val.log @@ -1,21 +1,21 @@ Best Model Performance: -Bleu_1:0.1671778590456048 -Bleu_2:0.09077014613023152 -Bleu_3:0.0476684747303012 -Bleu_4:0.02445564298599047 -METEOR:0.08933235383587503 -ROUGE_L:0.1654660162888944 -CIDEr:0.31886265111118334 -Recall:0.5314017615268335 -Precision:0.5831469052945512 -soda_c:0.05853263249839839 -para_Bleu_1:0.46544090189732323 -para_Bleu_2:0.2789325258737778 -para_Bleu_3:0.17172911957785325 -para_Bleu_4:0.10903514181091935 -para_METEOR:0.16550159188298816 -para_ROUGE_L:0.3181118223429575 -para_CIDEr:0.2056618808195008 +Bleu_1:0.1989422607268001 +Bleu_2:0.12223038556953512 +Bleu_3:0.06835990671747892 +Bleu_4:0.03486159828438583 +METEOR:0.09408978838449876 +ROUGE_L:0.18200142867223945 +CIDEr:0.593480700759431 +Recall:0.30795469953703025 +Precision:0.4513424333993264 +soda_c:0.0796861065455984 +para_Bleu_1:0.39594509057043764 +para_Bleu_2:0.24087109399513515 +para_Bleu_3:0.14790262814870953 +para_Bleu_4:0.09321042711819619 +para_METEOR:0.1655617051143519 +para_ROUGE_L:0.3391051008488012 +para_CIDEr:0.32807196750555834 avg_proposal_number:-1 -Best Overall Score epoch10: 1.5812763042668414 +Best Overall Score epoch11: 1.5265537286258848